From 6b0a557be16da5dc333dbf735649c91bdccacc96 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Thu, 12 Sep 2024 10:12:37 +0800 Subject: [PATCH] sm4: ppc64x, merge the implementation to cipher_asm --- internal/cpuid/cpuid_ppc64x.go | 6 ++ sm4/asm_ppc64x.s | 139 +++++++++++++++++++++++++++++++++ sm4/cipher_asm.go | 9 ++- sm4/cipher_asm_test.go | 2 +- sm4/cipher_generic.go | 2 +- sm4/cipher_ni.go | 9 ++- sm4/sm4_gcm_asm.go | 7 -- sm4/sm4_ppc64x.go | 12 --- sm4/sm4_ppc64x_test.go | 61 --------------- sm4/sm4ni_gcm_asm.go | 7 -- 10 files changed, 163 insertions(+), 91 deletions(-) create mode 100644 internal/cpuid/cpuid_ppc64x.go delete mode 100644 sm4/sm4_ppc64x.go delete mode 100644 sm4/sm4_ppc64x_test.go diff --git a/internal/cpuid/cpuid_ppc64x.go b/internal/cpuid/cpuid_ppc64x.go new file mode 100644 index 0000000..a15273f --- /dev/null +++ b/internal/cpuid/cpuid_ppc64x.go @@ -0,0 +1,6 @@ +//go:build (ppc64 || ppc64le) + +package cpuid + +var HasAES = true +var HasGFMUL = false diff --git a/sm4/asm_ppc64x.s b/sm4/asm_ppc64x.s index 6f898ad..9c89c39 100644 --- a/sm4/asm_ppc64x.s +++ b/sm4/asm_ppc64x.s @@ -310,6 +310,145 @@ encryptBlockLoop: RET +#define TMP0 V10 +#define TMP1 V11 +#define TMP2 V12 +#define TMP3 V13 // func encryptBlocksAsm(xk *uint32, dst, src []byte, inst int) TEXT ·encryptBlocksAsm(SB),NOSPLIT,$0 + // prepare/load constants + VSPLTISW $4, V_FOUR; +#ifdef NEEDS_PERMW + MOVD $·rcon(SB), R4 + LVX (R4), ESPERMW +#endif + MOVD $·rcon+0x10(SB), R4 + LXVD2X (R4)(R0), M0 + MOVD $0x10, R3 + LXVD2X (R4)(R3), M1 + MOVD $0x20, R3 + LXVD2X (R4)(R3), M2 + MOVD $0x30, R3 + LXVD2X (R4)(R3), M3 + MOVD $0x40, R3 + LXVD2X (R4)(R3), REVERSE_WORDS + MOVD $0x50, R3 + LXVD2X (R4)(R3), NIBBLE_MASK + MOVD $0x60, R3 + LXVD2X (R4)(R3), INVERSE_SHIFT_ROWS + MOVD $0x70, R3 + LXVD2X (R4)(R3), M1L + MOVD $0x80, R3 + LXVD2X (R4)(R3), M1H + MOVD $0x90, R3 + LXVD2X (R4)(R3), M2L + MOVD $0xa0, R3 + LXVD2X (R4)(R3), M2H + + MOVD xk+0(FP), R3 + MOVD dst+8(FP), R4 + MOVD src+32(FP), R5 + MOVD src_len+40(FP), R6 + + CMP R6, $128 + BEQ enc8blocks + +enc4blocks: + PPC64X_LXVW4X(R5, R0, V0) + MOVD $16, R7 + PPC64X_LXVW4X(R5, R7, V1) + MOVD $32, R7 + PPC64X_LXVW4X(R5, R7, V2) + MOVD $48, R7 + PPC64X_LXVW4X(R5, R7, V3) + TRANSPOSE_MATRIX(V0, V1, V2, V3) + // prepare counter + MOVD $8, R7 + MOVD R7, CTR + +enc4blocksLoop: + // load xk + LXVW4X (R3), V4 + VSPLTW $0, V4, V8 + SM4_ROUND(V8, TMP0, TMP1, TMP2, TMP3, V0, V1, V2, V3) + VSPLTW $1, V4, V8 + SM4_ROUND(V8, TMP0, TMP1, TMP2, TMP3, V1, V2, V3, V0) + VSPLTW $2, V4, V8 + SM4_ROUND(V8, TMP0, TMP1, TMP2, TMP3, V2, V3, V0, V1) + VSPLTW $3, V4, V8 + SM4_ROUND(V8, TMP0, TMP1, TMP2, TMP3, V3, V0, V1, V2) + ADD $16, R3 + BDNZ enc4blocksLoop + + TRANSPOSE_MATRIX(V0, V1, V2, V3) + PPC64X_STXVW4X(V0, R4, R0) + MOVD $16, R7 + PPC64X_STXVW4X(V1, R4, R7) + MOVD $32, R7 + PPC64X_STXVW4X(V2, R4, R7) + MOVD $48, R7 + PPC64X_STXVW4X(V3, R4, R7) RET + +enc8blocks: + PPC64X_LXVW4X(R5, R0, V0) + MOVD $16, R7 + PPC64X_LXVW4X(R5, R7, V1) + MOVD $32, R7 + PPC64X_LXVW4X(R5, R7, V2) + MOVD $48, R7 + PPC64X_LXVW4X(R5, R7, V3) + MOVD $64, R7 + PPC64X_LXVW4X(R5, R7, V4) + MOVD $80, R7 + PPC64X_LXVW4X(R5, R7, V5) + MOVD $96, R7 + PPC64X_LXVW4X(R5, R7, V6) + MOVD $112, R7 + PPC64X_LXVW4X(R5, R7, V7) + TRANSPOSE_MATRIX(V0, V1, V2, V3) + TRANSPOSE_MATRIX(V4, V5, V6, V7) + // prepare counter + MOVD $8, R7 + MOVD R7, CTR + +enc8blocksLoop: + LXVW4X (R3), V8 + VSPLTW $0, V8, V9 + SM4_ROUND(V9, TMP0, TMP1, TMP2, TMP3, V0, V1, V2, V3) + SM4_ROUND(V9, TMP0, TMP1, TMP2, TMP3, V4, V5, V6, V7) + VSPLTW $1, V8, V9 + SM4_ROUND(V9, TMP0, TMP1, TMP2, TMP3, V1, V2, V3, V0) + SM4_ROUND(V9, TMP0, TMP1, TMP2, TMP3, V5, V6, V7, V4) + VSPLTW $2, V8, V9 + SM4_ROUND(V9, TMP0, TMP1, TMP2, TMP3, V2, V3, V0, V1) + SM4_ROUND(V9, TMP0, TMP1, TMP2, TMP3, V6, V7, V4, V5) + VSPLTW $3, V8, V9 + SM4_ROUND(V9, TMP0, TMP1, TMP2, TMP3, V3, V0, V1, V2) + SM4_ROUND(V9, TMP0, TMP1, TMP2, TMP3, V7, V4, V5, V6) + ADD $16, R3 + BDNZ enc4blocksLoop + + TRANSPOSE_MATRIX(V0, V1, V2, V3) + TRANSPOSE_MATRIX(V4, V5, V6, V7) + PPC64X_STXVW4X(V0, R4, R0) + MOVD $16, R7 + PPC64X_STXVW4X(V1, R4, R7) + MOVD $32, R7 + PPC64X_STXVW4X(V2, R4, R7) + MOVD $48, R7 + PPC64X_STXVW4X(V3, R4, R7) + MOVD $64, R7 + PPC64X_STXVW4X(V4, R4, R7) + MOVD $80, R7 + PPC64X_STXVW4X(V5, R4, R7) + MOVD $96, R7 + PPC64X_STXVW4X(V6, R4, R7) + MOVD $112, R7 + PPC64X_STXVW4X(V7, R4, R7) + + RET +#undef TMP0 +#undef TMP1 +#undef TMP2 +#undef TMP3 diff --git a/sm4/cipher_asm.go b/sm4/cipher_asm.go index 0b65809..291a8f5 100644 --- a/sm4/cipher_asm.go +++ b/sm4/cipher_asm.go @@ -1,4 +1,4 @@ -//go:build (amd64 || arm64) && !purego +//go:build (amd64 || arm64 || ppc64 || ppc64le) && !purego package sm4 @@ -38,6 +38,13 @@ type sm4CipherAsm struct { blocksSize int } +// sm4CipherGCM implements crypto/cipher.gcmAble so that crypto/cipher.NewGCM +// will use the optimised implementation in this file when possible. Instances +// of this type only exist when hasGCMAsm and hasAES returns true. +type sm4CipherGCM struct { + sm4CipherAsm +} + func newCipher(key []byte) (cipher.Block, error) { if supportSM4 { return newCipherNI(key) diff --git a/sm4/cipher_asm_test.go b/sm4/cipher_asm_test.go index af420fe..f015939 100644 --- a/sm4/cipher_asm_test.go +++ b/sm4/cipher_asm_test.go @@ -1,4 +1,4 @@ -//go:build (amd64 || arm64) && !purego +//go:build (amd64 || arm64 || ppc64 || ppc64le) && !purego package sm4 diff --git a/sm4/cipher_generic.go b/sm4/cipher_generic.go index d51a9ea..ec437b0 100644 --- a/sm4/cipher_generic.go +++ b/sm4/cipher_generic.go @@ -1,4 +1,4 @@ -//go:build purego || !(amd64 || arm64) +//go:build purego || !(amd64 || arm64 || ppc64 || ppc64le) package sm4 diff --git a/sm4/cipher_ni.go b/sm4/cipher_ni.go index 9f27f24..5a75959 100644 --- a/sm4/cipher_ni.go +++ b/sm4/cipher_ni.go @@ -1,4 +1,4 @@ -//go:build (amd64 || arm64) && !purego +//go:build (amd64 || arm64 || ppc64 || ppc64le) && !purego package sm4 @@ -12,6 +12,13 @@ type sm4CipherNI struct { sm4Cipher } +// sm4CipherNIGCM implements crypto/cipher.gcmAble so that crypto/cipher.NewGCM +// will use the optimised implementation in this file when possible. Instances +// of this type only exist when hasGCMAsm and hasSM4 returns true. +type sm4CipherNIGCM struct { + sm4CipherNI +} + func newCipherNI(key []byte) (cipher.Block, error) { c := &sm4CipherNIGCM{sm4CipherNI{sm4Cipher{}}} expandKeyAsm(&key[0], &ck[0], &c.enc[0], &c.dec[0], INST_SM4) diff --git a/sm4/sm4_gcm_asm.go b/sm4/sm4_gcm_asm.go index c864580..bba902a 100644 --- a/sm4/sm4_gcm_asm.go +++ b/sm4/sm4_gcm_asm.go @@ -9,13 +9,6 @@ import ( "github.com/emmansun/gmsm/internal/alias" ) -// sm4CipherGCM implements crypto/cipher.gcmAble so that crypto/cipher.NewGCM -// will use the optimised implementation in this file when possible. Instances -// of this type only exist when hasGCMAsm and hasAES returns true. -type sm4CipherGCM struct { - sm4CipherAsm -} - // Assert that sm4CipherGCM implements the gcmAble interface. var _ gcmAble = (*sm4CipherGCM)(nil) diff --git a/sm4/sm4_ppc64x.go b/sm4/sm4_ppc64x.go deleted file mode 100644 index 261af48..0000000 --- a/sm4/sm4_ppc64x.go +++ /dev/null @@ -1,12 +0,0 @@ -//go:build (ppc64 || ppc64le) && !purego - -package sm4 - -//go:noescape -func encryptBlocksAsm(xk *uint32, dst, src []byte, inst int) - -//go:noescape -func encryptBlockAsm(xk *uint32, dst, src *byte, inst int) - -//go:noescape -func expandKeyAsm(key *byte, ck, enc, dec *uint32, inst int) diff --git a/sm4/sm4_ppc64x_test.go b/sm4/sm4_ppc64x_test.go deleted file mode 100644 index 67d23a1..0000000 --- a/sm4/sm4_ppc64x_test.go +++ /dev/null @@ -1,61 +0,0 @@ -//go:build (ppc64 || ppc64le) && !purego - -package sm4 - -import ( - "crypto/rand" - "io" - "reflect" - "testing" - "time" -) - -func TestExpandKey(t *testing.T) { - key := make([]byte, 16) - - var encRes1 [rounds]uint32 - var decRes1 [rounds]uint32 - encRes2 := make([]uint32, 32) - decRes2 := make([]uint32, 32) - var timeout *time.Timer - - if testing.Short() { - timeout = time.NewTimer(10 * time.Millisecond) - } else { - timeout = time.NewTimer(2 * time.Second) - } - - for { - select { - case <-timeout.C: - return - default: - } - io.ReadFull(rand.Reader, key) - expandKeyGo(key, &encRes1, &decRes1) - expandKeyAsm(&key[0], &ck[0], &encRes2[0], &decRes2[0], 0) - if !reflect.DeepEqual(encRes1[:], encRes2) { - t.Errorf("expected=%x, result=%x\n", encRes1[:], encRes2) - } - if !reflect.DeepEqual(decRes1[:], decRes2) { - t.Errorf("expected=%x, result=%x\n", decRes1[:], decRes2) - } - } -} - -func TestEncryptBlockAsm(t *testing.T) { - src := []byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10} - expected := []byte{0x68, 0x1e, 0xdf, 0x34, 0xd2, 0x06, 0x96, 0x5e, 0x86, 0xb3, 0xe9, 0x4f, 0x53, 0x6e, 0x42, 0x46} - encRes2 := make([]uint32, 32) - decRes2 := make([]uint32, 32) - expandKeyAsm(&src[0], &ck[0], &encRes2[0], &decRes2[0], 0) - dst := make([]byte, 16) - encryptBlockAsm(&encRes2[0], &dst[0], &src[0], 0) - if !reflect.DeepEqual(dst, expected) { - t.Errorf("expected=%x, result=%x\n", expected, dst) - } - encryptBlockAsm(&decRes2[0], &dst[0], &expected[0], 0) - if !reflect.DeepEqual(dst, src) { - t.Errorf("expected=%x, result=%x\n", src, dst) - } -} diff --git a/sm4/sm4ni_gcm_asm.go b/sm4/sm4ni_gcm_asm.go index 937e2e3..12d7855 100644 --- a/sm4/sm4ni_gcm_asm.go +++ b/sm4/sm4ni_gcm_asm.go @@ -15,13 +15,6 @@ func gcmSm4niEnc(productTable *[256]byte, dst, src []byte, ctr, T *[16]byte, rk //go:noescape func gcmSm4niDec(productTable *[256]byte, dst, src []byte, ctr, T *[16]byte, rk []uint32) -// sm4CipherNIGCM implements crypto/cipher.gcmAble so that crypto/cipher.NewGCM -// will use the optimised implementation in this file when possible. Instances -// of this type only exist when hasGCMAsm and hasSM4 returns true. -type sm4CipherNIGCM struct { - sm4CipherNI -} - // Assert that sm4CipherNIGCM implements the gcmAble interface. var _ gcmAble = (*sm4CipherNIGCM)(nil)