diff --git a/sm3/sm3blocks_ppc64x.go b/sm3/sm3blocks_ppc64x.go index 1058a2d..f3ee04d 100644 --- a/sm3/sm3blocks_ppc64x.go +++ b/sm3/sm3blocks_ppc64x.go @@ -8,3 +8,6 @@ package sm3 //go:noescape func transposeMatrix(dig **[8]uint32) + +//go:noescape +func copyResultsBy4(dig *uint32, p *byte) diff --git a/sm3/sm3blocks_ppc64x.s b/sm3/sm3blocks_ppc64x.s index ecf9846..47ef3cb 100644 --- a/sm3/sm3blocks_ppc64x.s +++ b/sm3/sm3blocks_ppc64x.s @@ -8,7 +8,7 @@ // For P9 instruction emulation #define ESPERMW V21 // Endian swapping permute into BE -#define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXVB16X +#define TMP2 V22 // Temporary for STOREWORDS DATA ·mask+0x00(SB)/8, $0x0c0d0e0f08090a0b // Permute for vector doubleword endian swap DATA ·mask+0x08(SB)/8, $0x0405060700010203 @@ -100,3 +100,41 @@ TEXT ·transposeMatrix(SB),NOSPLIT,$0 STOREWORDS(V7, R4, R6) RET + +// func copyResultsBy4(dig *uint32, dst *byte) +TEXT ·copyResultsBy4(SB),NOSPLIT,$0 + MOVD dig+0(FP), R3 + MOVD dst+8(FP), R4 + + LXVD2X (R0)(R3), V0 + STXVD2X V0, (R0)(R4) + + MOVD $16, R5 + LXVD2X (R5)(R3), V0 + STXVD2X V0, (R5)(R4) + + ADD $16, R5 + LXVD2X (R5)(R3), V0 + STXVD2X V0, (R5)(R4) + + ADD $16, R5 + LXVD2X (R5)(R3), V0 + STXVD2X V0, (R5)(R4) + + ADD $16, R5 + LXVD2X (R5)(R3), V0 + STXVD2X V0, (R5)(R4) + + ADD $16, R5 + LXVD2X (R5)(R3), V0 + STXVD2X V0, (R5)(R4) + + ADD $16, R5 + LXVD2X (R5)(R3), V0 + STXVD2X V0, (R5)(R4) + + ADD $16, R5 + LXVD2X (R5)(R3), V0 + STXVD2X V0, (R5)(R4) + + RET diff --git a/sm3/sm3blocks_ppc64x_test.go b/sm3/sm3blocks_ppc64x_test.go index e3246fb..5d7d89d 100644 --- a/sm3/sm3blocks_ppc64x_test.go +++ b/sm3/sm3blocks_ppc64x_test.go @@ -7,6 +7,7 @@ package sm3 import ( + "encoding/binary" "fmt" "testing" ) @@ -33,3 +34,26 @@ func TestTransposeMatrix(t *testing.T) { fmt.Println() } } + +func TestCopyResultsBy4(t *testing.T) { + var m [4][8]uint32 + var ret [128]byte + var k uint32 = 0 + for i := 0; i < 4; i++ { + for j := 0; j < 8; j++ { + m[i][j] = k + k++ + fmt.Printf("%08x ", m[i][j]) + } + fmt.Println() + } + copyResultsBy4(&m[0][0], &ret[0]) + fmt.Printf("got: %x\n", ret[:]) + + for i := 0; i < 4; i++ { + for j := 0; j < 8; j++ { + binary.BigEndian.PutUint32(ret[i*32+j*4:], m[i][j]) + } + } + fmt.Printf("expected %x\n", ret[:]) +}