diff --git a/.github/workflows/test_ppc64.yaml b/.github/workflows/test_ppc64.yaml index 14c05bd..5c1cfae 100644 --- a/.github/workflows/test_ppc64.yaml +++ b/.github/workflows/test_ppc64.yaml @@ -15,7 +15,8 @@ jobs: strategy: matrix: go-version: [1.19.x] - arch: [ppc64le] + arch: [ppc64le] + ppc64: [power8,power9] runs-on: ubuntu-latest steps: - name: Set up Go @@ -32,5 +33,11 @@ jobs: - name: Test internal run: go test ./internal/... env: - GOARCH: ${{ matrix.arch }} + GOARCH: ${{ matrix.arch }} + GOPPC64: ${{ matrix.ppc64 }} + - name: Test SM3 + run: go test ./sm3/... + env: + GOARCH: ${{ matrix.arch }} + GOPPC64: ${{ matrix.ppc64 }} diff --git a/sm3/sm3blocks_ppc64x.go b/sm3/sm3blocks_ppc64x.go new file mode 100644 index 0000000..1058a2d --- /dev/null +++ b/sm3/sm3blocks_ppc64x.go @@ -0,0 +1,10 @@ +// Copyright 2024 Sun Yimin. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +//go:build (ppc64 || ppc64le) && !purego + +package sm3 + +//go:noescape +func transposeMatrix(dig **[8]uint32) diff --git a/sm3/sm3blocks_ppc64x.s b/sm3/sm3blocks_ppc64x.s new file mode 100644 index 0000000..178bc38 --- /dev/null +++ b/sm3/sm3blocks_ppc64x.s @@ -0,0 +1,88 @@ +// Copyright 2024 Sun Yimin. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +//go:build (ppc64 || ppc64le) && !purego + +#include "textflag.h" + +DATA mask<>+0x00(SB)/8, $0x0001020310111213 +DATA mask<>+0x08(SB)/8, $0x0405060714151617 +DATA mask<>+0x10(SB)/8, $0x08090a0b18191a1b +DATA mask<>+0x18(SB)/8, $0x0c0d0e0f1c1d1e1f +DATA mask<>+0x20(SB)/8, $0x0001020304050607 +DATA mask<>+0x28(SB)/8, $0x1011121314151617 +DATA mask<>+0x30(SB)/8, $0x08090a0b0c0d0e0f +DATA mask<>+0x38(SB)/8, $0x18191a1b1c1d1e1f +GLOBL mask<>(SB), 8, $64 + +#ifdef GOARCH_ppc64le +#define P8_LXVB16X(RA,RB,VT) \ + LXVD2X (RA+RB), VT \ + XXPERMDI VT, VT, $2, VT + +#define P8_STXVB16X(VS,RA,RB) \ + XXPERMDI VS, VS, $2, VS \ + STXVD2X VS, (RA+RB) + +#else +#define P8_LXVB16X(RA,RB,VT) LXVD2X (RA+RB), VT +#define P8_STXVB16X(VS,RA,RB) STXVD2X VS, (RA+RB) +#endif + +#define TRANSPOSE_MATRIX(T0, T1, T2, T3, M0, M1, M2, M3, TMP0, TMP1, TMP2, TMP3) \ + VPERM T0, T1, M0, TMP0; \ + VPERM T2, T3, M0, TMP1; \ + VPERM T0, T1, M1, TMP2; \ + VPERM T2, T3, M1, TMP3; \ + VPERM TMP0, TMP1, M2, T0; \ + VPERM TMP0, TMP1, M3, T1; \ + VPERM TMP2, TMP3, M2, T2; \ + VPERM TMP2, TMP3, M3, T3 + +// transposeMatrix(dig **[8]uint32) +TEXT ·transposeMatrix(SB),NOSPLIT,$0 + MOVD dig+0(FP), R3 + MOVD $8, R5 + MOVD $16, R6 + MOVD $24, R7 + MOVD $32, R8 + MOVD $48, R9 + + MOVD (R0)(R3), R4 + P8_LXVB16X(R4, R0, V0) + P8_LXVB16X(R4, R6, V4) + MOVD (R5)(R3), R4 + P8_LXVB16X(R4, R0, V1) + P8_LXVB16X(R4, R6, V5) + MOVD (R6)(R3), R4 + P8_LXVB16X(R4, R0, V2) + P8_LXVB16X(R4, R6, V6) + MOVD (R7)(R3), R4 + P8_LXVB16X(R4, R0, V3) + P8_LXVB16X(R4, R6, V7) + + MOVD $mask<>+0x00(SB), R4 + + LVX (R0)(R4), V8 + LVX (R6)(R4), V9 + LVX (R8)(R4), V10 + LVX (R9)(R4), V11 + + TRANSPOSE_MATRIX(V0, V1, V2, V3, V8, V9, V10, V11, V12, V13, V14, V15) + TRANSPOSE_MATRIX(V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15) + + MOVD (R0)(R3), R4 + P8_STXVB16X(V0, R4, R0) + P8_STXVB16X(V4, R4, R6) + MOVD (R5)(R3), R4 + P8_STXVB16X(V1, R4, R0) + P8_STXVB16X(V5, R4, R6) + MOVD (R6)(R3), R4 + P8_STXVB16X(V2, R4, R0) + P8_STXVB16X(V6, R4, R6) + MOVD (R7)(R3), R4 + P8_STXVB16X(V3, R4, R0) + P8_STXVB16X(V7, R4, R6) + + RET diff --git a/sm3/sm3blocks_ppc64x_test.go b/sm3/sm3blocks_ppc64x_test.go new file mode 100644 index 0000000..7f708cd --- /dev/null +++ b/sm3/sm3blocks_ppc64x_test.go @@ -0,0 +1,35 @@ +// Copyright 2024 Sun Yimin. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +//go:build (ppc64 || ppc64le) && !purego + +package sm3 + +import ( + "fmt" + "testing" +) + +func TestTransposeMatrix(t *testing.T) { + var m [4][8]uint32 + var k uint32 = 0 + for i := 0; i < 4; i++ { + for j := 0; j < 8; j++ { + m[i][j] = k + k++ + fmt.Printf("%04x ", m[i][j]) + } + fmt.Println() + } + input := [4]*[8]uint32{&m[0], &m[1], &m[2], &m[3]} + transposeMatrix(&input[0]) + fmt.Println() + fmt.Println() + for i := 0; i < 4; i++ { + for j := 0; j < 8; j++ { + fmt.Printf("%04x ", m[i][j]) + } + fmt.Println() + } +}