diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 33b219a..aca2c26 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -108,7 +108,9 @@ WORD $0xcec084a8 \ //SM4E V8.4S, V5.4S WORD $0xcec084c8 \ //SM4E V8.4S, V6.4S WORD $0xcec084e8 \ //SM4E V8.4S, V7.4S - VREV32 V8.B16, V8.B16 \ + VREV64 V8.S4, V8.S4 \ + VEXT $8, V8.B16, V8.B16, V8.B16 \ + VREV32 V8.B16, V8.B16 \ VST1.P [V8.B16], 16(R9) // func expandKeyAsm(key *byte, ck, enc, dec *uint32, inst int) @@ -277,6 +279,8 @@ sm4niblock: WORD $0xcec08428 //SM4E V8.4S, V1.4S WORD $0xcec08448 //SM4E V8.4S, V2.4S WORD $0xcec08468 //SM4E V8.4S, V3.4S + VREV64 V8.S4, V8.S4 + VEXT $8, V8.B16, V8.B16, V8.B16 VREV32 V8.B16, V8.B16 VST1 [V8.B16], (R9) RET diff --git a/sm4/cipher_asm_fuzzy_test.go b/sm4/cipher_asm_fuzzy_test.go index 23e1622..4bf9987 100644 --- a/sm4/cipher_asm_fuzzy_test.go +++ b/sm4/cipher_asm_fuzzy_test.go @@ -36,10 +36,10 @@ func TestExpandKey(t *testing.T) { expandKeyGo(key, encRes1, decRes1) expandKey(key, encRes2, decRes2) if !reflect.DeepEqual(encRes1, encRes2) { - t.Errorf("expected=%v, result=%v\n", encRes1, encRes2) + t.Errorf("expected=%x, result=%x\n", encRes1, encRes2) } if !reflect.DeepEqual(decRes1, decRes2) { - t.Errorf("expected=%v, result=%v\n", encRes1, encRes2) + t.Errorf("expected=%x, result=%x\n", encRes1, encRes2) } } } diff --git a/sm4/cipher_test.go b/sm4/cipher_test.go index 2a3f4d0..c693c55 100644 --- a/sm4/cipher_test.go +++ b/sm4/cipher_test.go @@ -30,11 +30,11 @@ func Test_sample1(t *testing.T) { dst := make([]byte, 16) c.Encrypt(dst, src) if !reflect.DeepEqual(dst, expected) { - t.Errorf("expected=%v, result=%v\n", expected, dst) + t.Errorf("expected=%x, result=%x\n", expected, dst) } c.Decrypt(dst, expected) if !reflect.DeepEqual(dst, src) { - t.Errorf("expected=%v, result=%v\n", src, dst) + t.Errorf("expected=%x, result=%x\n", src, dst) } } @@ -56,7 +56,7 @@ func Test_sample2(t *testing.T) { c.Encrypt(dst, dst) } if !reflect.DeepEqual(dst, expected) { - t.Errorf("expected=%v, result=%v\n", expected, dst) + t.Errorf("expected=%x, result=%x\n", expected, dst) } } diff --git a/sm4/gen_arm64_ni.go b/sm4/gen_arm64_ni.go index 0db7724..efe2cd6 100644 --- a/sm4/gen_arm64_ni.go +++ b/sm4/gen_arm64_ni.go @@ -10,20 +10,21 @@ import ( "bytes" "fmt" "log" - "math/bits" "os" ) //SM4E .4S, .4S func sm4e(Vd, Vn byte) uint32 { inst := uint32(0xcec08400) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 - return bits.ReverseBytes32(inst) + // return bits.ReverseBytes32(inst) + return inst } //SM4EKEY .4S, .4S, .4S func sm4ekey(Vd, Vn, Vm byte) uint32 { inst := uint32(0xce60c800) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | (uint32(Vm&0x1f) << 16) - return bits.ReverseBytes32(inst) + // return bits.ReverseBytes32(inst) + return inst } func sm4ekeyRound(buf *bytes.Buffer, d, n, m byte) { @@ -93,6 +94,8 @@ TEXT ·encryptBlockSM4E(SB),NOSPLIT,$0 sm4eRound(buf, 8, 2) sm4eRound(buf, 8, 3) fmt.Fprintf(buf, ` + VREV64 V8.S4, V8.S4 + VEXT $8, V8.B16, V8.B16, V8.B16 VREV32 V8.B16, V8.B16 VST1 [V8.B16], (R9) RET @@ -121,6 +124,8 @@ TEXT ·encryptBlocksSM4E(SB),NOSPLIT,$0 sm4eRound(buf, 8, 5) sm4eRound(buf, 8, 6) sm4eRound(buf, 8, 7) + fmt.Fprintf(buf, "\tVREV64 V8.S4, V8.S4\n") + fmt.Fprintf(buf, "\tVEXT $8, V8.B16, V8.B16, V8.B16\n") fmt.Fprintf(buf, "\tVREV32 V8.B16, V8.B16\n") fmt.Fprintf(buf, "\tVST1.P [V8.B16], 16(R9)\n\n") }