From 559da498c98e2916a6c29e0333c7d1cd72dc44f9 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Wed, 30 Oct 2024 08:31:12 +0800 Subject: [PATCH] sm4: gcm purego optimize NewGCM --- sm4/gcm_amd64.s | 18 +++++++++++------- sm4/gcm_cipher_asm.go | 13 +++++++++---- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/sm4/gcm_amd64.s b/sm4/gcm_amd64.s index 3fdf1bc..a39970f 100644 --- a/sm4/gcm_amd64.s +++ b/sm4/gcm_amd64.s @@ -207,25 +207,29 @@ sm4InitEncLoop: MOVQ $7, AX initLoop: + // B0 * B2, Karatsuba Approach MOVOU B2, T0 MOVOU B2, T1 MOVOU B3, T2 - PCLMULQDQ $0x00, B0, T0 - PCLMULQDQ $0x11, B0, T1 - PCLMULQDQ $0x00, B1, T2 + PCLMULQDQ $0x00, B0, T0 // B0[0] * B2[0] + PCLMULQDQ $0x11, B0, T1 // B0[1] * B2[1] + PCLMULQDQ $0x00, B1, T2 // (B0[0] + B0[1]) * (B2[0] + B2[1]) - PXOR T0, T2 - PXOR T1, T2 + PXOR T0, T2 // (B0[0] + B0[1]) * (B2[0] + B2[1]) - B0[0] * B2[0] + PXOR T1, T2 // B0[0] * B2[1] + B0[1] * B2[0] MOVOU T2, B4 PSLLDQ $8, B4 PSRLDQ $8, T2 PXOR B4, T0 - PXOR T2, T1 + PXOR T2, T1 // [T1, T0] = B0 * B2 + // Fast reduction + // 1st reduction MOVOU POLY, B2 - PCLMULQDQ $0x01, T0, B2 + PCLMULQDQ $0x01, T0, B2 // B2 = T0[0] * POLY[1] PSHUFD $78, T0, T0 PXOR B2, T0 + // 2nd reduction MOVOU POLY, B2 PCLMULQDQ $0x01, T0, B2 PSHUFD $78, T0, T0 diff --git a/sm4/gcm_cipher_asm.go b/sm4/gcm_cipher_asm.go index c5db35c..a3be083 100644 --- a/sm4/gcm_cipher_asm.go +++ b/sm4/gcm_cipher_asm.go @@ -30,11 +30,16 @@ func (c *sm4CipherAsm) NewGCM(nonceSize, tagSize int) (cipher.AEAD, error) { binary.BigEndian.Uint64(key[:8]), binary.BigEndian.Uint64(key[8:]), } - g.productTable[reverseBits(1)] = x + g.productTable[8] = x // reverseBits(1) = 8 - for i := 2; i < 16; i += 2 { - g.productTable[reverseBits(i)] = gcmDouble(&g.productTable[reverseBits(i/2)]) - g.productTable[reverseBits(i+1)] = gcmAdd(&g.productTable[reverseBits(i)], &x) + for j := 4; j > 0; j /= 2 { + g.productTable[j] = gcmDouble(&g.productTable[j*2]) + } + + for j := 2; j < 16; j *= 2 { + for k := 1; k < j; k++ { + g.productTable[j+k] = gcmAdd(&g.productTable[j], &g.productTable[k]) + } } return g, nil