sm4: amd64, a bit change for single block

This commit is contained in:
Sun Yimin 2024-09-19 17:52:12 +08:00 committed by GitHub
parent 6eb5efd898
commit 761746de78
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 44 additions and 27 deletions

View File

@ -158,20 +158,15 @@ GLOBL fk_mask<>(SB), 8, $16
// SM4 single round function, handle 16 bytes data
// t0 ^= tao_l1(t1^t2^t3^xk)
// used R19 as temp 32/64 bits register
// parameters:
// - index: round key index immediate number
// - RK: round key register
// - IND: round key index base register
// - x: 128 bits temp register
// - x: 128 bits temp register (also as input RK)
// - y: 128 bits temp register
// - z: 128 bits temp register
// - t0: 128 bits register for data as result
// - t1: 128 bits register for data
// - t2: 128 bits register for data
// - t3: 128 bits register for data
#define SM4_SINGLE_ROUND(index, RK, IND, x, y, z, t0, t1, t2, t3) \
MOVL (index * 4)(RK)(IND*1), x; \
#define SM4_SINGLE_ROUND(x, y, z, t0, t1, t2, t3) \
PXOR t1, x; \
PXOR t2, x; \
PXOR t3, x; \

View File

@ -327,7 +327,7 @@ TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
MOVQ dst+8(FP), BX
MOVQ src+16(FP), DX
MOVOU (DX), t0
MOVUPS (DX), t0
PSHUFB flip_mask<>(SB), t0
PSHUFD $1, t0, t1
PSHUFD $2, t0, t2
@ -336,21 +336,25 @@ TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
XORL CX, CX
loop:
SM4_SINGLE_ROUND(0, AX, CX, x, y, XTMP6, t0, t1, t2, t3)
SM4_SINGLE_ROUND(1, AX, CX, x, y, XTMP6, t1, t2, t3, t0)
SM4_SINGLE_ROUND(2, AX, CX, x, y, XTMP6, t2, t3, t0, t1)
SM4_SINGLE_ROUND(3, AX, CX, x, y, XTMP6, t3, t0, t1, t2)
MOVUPS (AX)(CX*1), XTMP7
MOVOU XTMP7, x
SM4_SINGLE_ROUND(x, y, XTMP6, t0, t1, t2, t3)
PSHUFD $1, XTMP7, x
SM4_SINGLE_ROUND(x, y, XTMP6, t1, t2, t3, t0)
PSHUFD $2, XTMP7, x
SM4_SINGLE_ROUND(x, y, XTMP6, t2, t3, t0, t1)
PSHUFD $3, XTMP7, x
SM4_SINGLE_ROUND(x, y, XTMP6, t3, t0, t1, t2)
ADDL $16, CX
CMPL CX, $4*32
JB loop
PALIGNR $4, t3, t3
PALIGNR $4, t3, t2
PALIGNR $4, t2, t1
PALIGNR $4, t1, t0
PSHUFB flip_mask<>(SB), t0
MOVOU t0, (BX)
PUNPCKLLQ t2, t3
PUNPCKLLQ t0, t1
PUNPCKLQDQ t1, t3
PSHUFB flip_mask<>(SB), t3
MOVUPS t3, (BX)
done_sm4:
RET

View File

@ -131,10 +131,23 @@ func TestEncryptBlocksDoubleWithAESNI(t *testing.T) {
}
}
func BenchmarkExpand(b *testing.B) {
func BenchmarkExpandAESNI(b *testing.B) {
c := &sm4Cipher{}
b.ResetTimer()
for i := 0; i < b.N; i++ {
expandKey(encryptTests[0].key, c.enc[:], c.dec[:])
expandKeyAsm(&encryptTests[0].key[0], &ck[0], &c.enc[0], &c.dec[0], INST_AES)
}
}
func BenchmarkEncryptAsm(b *testing.B) {
src := []byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10}
encRes2 := make([]uint32, 32)
decRes2 := make([]uint32, 32)
expandKeyAsm(&src[0], &ck[0], &encRes2[0], &decRes2[0], 0)
dst := make([]byte, 16)
b.SetBytes(int64(len(src)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
encryptBlockAsm(&encRes2[0], &dst[0], &src[0], 0)
}
}

View File

@ -166,14 +166,19 @@ TEXT ·gcmSm4Init(SB),NOSPLIT,$0
XORL CX, CX
sm4InitEncLoop:
SM4_SINGLE_ROUND(0, RK, CX, T0, T1, T2, B3, B2, B1, B0)
SM4_SINGLE_ROUND(1, RK, CX, T0, T1, T2, B2, B1, B0, B3)
SM4_SINGLE_ROUND(2, RK, CX, T0, T1, T2, B1, B0, B3, B2)
SM4_SINGLE_ROUND(3, RK, CX, T0, T1, T2, B0, B3, B2, B1)
MOVUPS (RK)(CX*1), B4
MOVOU B4, T0
SM4_SINGLE_ROUND(T0, T1, T2, B3, B2, B1, B0)
PSHUFD $1, B4, T0
SM4_SINGLE_ROUND(T0, T1, T2, B2, B1, B0, B3)
PSHUFD $2, B4, T0
SM4_SINGLE_ROUND(T0, T1, T2, B1, B0, B3, B2)
PSHUFD $3, B4, T0
SM4_SINGLE_ROUND(T0, T1, T2, B0, B3, B2, B1)
ADDL $16, CX
CMPL CX, $4*32
JB sm4InitEncLoop
ADDL $16, CX
CMPL CX, $4*32
JB sm4InitEncLoop
PALIGNR $4, B3, B3
PALIGNR $4, B3, B2