diff --git a/sm3/sm3block_amd64.s b/sm3/sm3block_amd64.s index ea07cf3..6d082ee 100644 --- a/sm3/sm3block_amd64.s +++ b/sm3/sm3block_amd64.s @@ -1052,10 +1052,10 @@ avx2_loop0: // at each iteration works with one block (512 bit) VPSHUFB BYTE_FLIP_MASK, XTMP3, XTMP3 // Transpose data into high/low parts - VPERM2I128 $0x20, XTMP2, XTMP0, XDWORD0 // w3, w2, w1, w0 - VPERM2I128 $0x31, XTMP2, XTMP0, XDWORD1 // w7, w6, w5, w4 - VPERM2I128 $0x20, XTMP3, XTMP1, XDWORD2 // w11, w10, w9, w8 - VPERM2I128 $0x31, XTMP3, XTMP1, XDWORD3 // w15, w14, w13, w12 + VPERM2I128 $0x20, XTMP2, XTMP0, XDWORD0 // w19, w18, w17, w16; w3, w2, w1, w0 + VPERM2I128 $0x31, XTMP2, XTMP0, XDWORD1 // w23, w22, w21, w20; w7, w6, w5, w4 + VPERM2I128 $0x20, XTMP3, XTMP1, XDWORD2 // w27, w26, w25, w24; w11, w10, w9, w8 + VPERM2I128 $0x31, XTMP3, XTMP1, XDWORD3 // w31, w30, w29, w28; w15, w14, w13, w12 avx2_last_block_enter: ADDQ $64, INP diff --git a/sm4/asm_amd64.s b/sm4/asm_amd64.s index fb58739..005f4c4 100644 --- a/sm4/asm_amd64.s +++ b/sm4/asm_amd64.s @@ -113,11 +113,11 @@ GLOBL fk_mask<>(SB), RODATA, $16 MOVOU x, XTMP6; \ PSLLL $13, XTMP6; \ PSRLL $19, y; \ - PXOR XTMP6, y; \ //y = X roll 13 + POR XTMP6, y; \ //y = X roll 13 PSLLL $10, XTMP6; \ MOVOU x, XTMP7; \ PSRLL $9, XTMP7; \ - PXOR XTMP6, XTMP7; \ //XTMP7 = x roll 23 + POR XTMP6, XTMP7; \ //XTMP7 = x roll 23 PXOR XTMP7, y; \ PXOR y, x diff --git a/sm4/cipher_asm_fuzzy_test.go b/sm4/cipher_asm_fuzzy_test.go new file mode 100644 index 0000000..661a686 --- /dev/null +++ b/sm4/cipher_asm_fuzzy_test.go @@ -0,0 +1,45 @@ +//go:build amd64 +// +build amd64 + +package sm4 + +import ( + "crypto/rand" + "io" + "reflect" + "testing" + "time" +) + +func TestExpandKey(t *testing.T) { + key := make([]byte, 16) + + encRes1 := make([]uint32, 32) + decRes1 := make([]uint32, 32) + encRes2 := make([]uint32, 32) + decRes2 := make([]uint32, 32) + var timeout *time.Timer + + if testing.Short() { + timeout = time.NewTimer(10 * time.Millisecond) + } else { + timeout = time.NewTimer(2 * time.Second) + } + + for { + select { + case <-timeout.C: + return + default: + } + io.ReadFull(rand.Reader, key) + expandKeyGo(key, encRes1, decRes1) + expandKeyAsm(&key[0], &ck[0], &encRes2[0], &decRes2[0]) + if !reflect.DeepEqual(encRes1, encRes2) { + t.Fatalf("expected=%v, result=%v\n", encRes1, encRes2) + } + if !reflect.DeepEqual(decRes1, decRes2) { + t.Fatalf("expected=%v, result=%v\n", encRes1, encRes2) + } + } +}