sm4: ppc64x, fix encryptBlocksAsm #249

This commit is contained in:
Sun Yimin 2024-09-12 11:50:39 +08:00 committed by GitHub
parent 66a9dcdd81
commit 7aae08ff76
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -74,7 +74,17 @@ GLOBL ·rcon(SB), RODATA, $192
VSPLTISW $n, tmp \ VSPLTISW $n, tmp \
VRLW s, tmp, r VRLW s, tmp, r
#define TRANSPOSE_MATRIX(T0, T1, T2, T3) \ // input: from high to low
// t0 = t0.S3, t0.S2, t0.S1, t0.S0
// t1 = t1.S3, t1.S2, t1.S1, t1.S0
// t2 = t2.S3, t2.S2, t2.S1, t2.S0
// t3 = t3.S3, t3.S2, t3.S1, t3.S0
// output: from high to low
// t0 = t3.S0, t2.S0, t1.S0, t0.S0
// t1 = t3.S1, t2.S1, t1.S1, t0.S1
// t2 = t3.S2, t2.S2, t1.S2, t0.S2
// t3 = t3.S3, t2.S3, t1.S3, t0.S3
#define PRE_TRANSPOSE_MATRIX(T0, T1, T2, T3) \
VPERM T0, T1, M0, TMP0; \ VPERM T0, T1, M0, TMP0; \
VPERM T2, T3, M0, TMP1; \ VPERM T2, T3, M0, TMP1; \
VPERM T0, T1, M1, TMP2; \ VPERM T0, T1, M1, TMP2; \
@ -84,6 +94,26 @@ GLOBL ·rcon(SB), RODATA, $192
VPERM TMP2, TMP3, M2, T2; \ VPERM TMP2, TMP3, M2, T2; \
VPERM TMP2, TMP3, M3, T3 VPERM TMP2, TMP3, M3, T3
// input: from high to low
// t0 = t0.S3, t0.S2, t0.S1, t0.S0
// t1 = t1.S3, t1.S2, t1.S1, t1.S0
// t2 = t2.S3, t2.S2, t2.S1, t2.S0
// t3 = t3.S3, t3.S2, t3.S1, t3.S0
// output: from high to low
// t0 = t0.S0, t1.S0, t2.S0, t3.S0
// t1 = t0.S1, t1.S1, t2.S1, t3.S1
// t2 = t0.S2, t1.S2, t2.S2, t3.S2
// t3 = t0.S3, t1.S3, t2.S3, t3.S3
#define TRANSPOSE_MATRIX(T0, T1, T2, T3) \
VPERM T1, T0, M0, TMP0; \
VPERM T1, T0, M1, TMP1; \
VPERM T3, T2, M0, TMP2; \
VPERM T3, T2, M1, TMP3; \
VPERM TMP2, TMP0, M2, T0; \
VPERM TMP2, TMP0, M3, T1; \
VPERM TMP3, TMP1, M2, T2; \
VPERM TMP3, TMP1, M3, T3; \
// Affine Transform // Affine Transform
// parameters: // parameters:
// - L: table low nibbles // - L: table low nibbles
@ -361,7 +391,7 @@ enc4blocks:
PPC64X_LXVW4X(R5, R7, V2) PPC64X_LXVW4X(R5, R7, V2)
MOVD $48, R7 MOVD $48, R7
PPC64X_LXVW4X(R5, R7, V3) PPC64X_LXVW4X(R5, R7, V3)
TRANSPOSE_MATRIX(V0, V1, V2, V3) PRE_TRANSPOSE_MATRIX(V0, V1, V2, V3)
// prepare counter // prepare counter
MOVD $8, R7 MOVD $8, R7
MOVD R7, CTR MOVD R7, CTR
@ -381,13 +411,13 @@ enc4blocksLoop:
BDNZ enc4blocksLoop BDNZ enc4blocksLoop
TRANSPOSE_MATRIX(V0, V1, V2, V3) TRANSPOSE_MATRIX(V0, V1, V2, V3)
PPC64X_STXVW4X(V3, R4, R0) PPC64X_STXVW4X(V0, R4, R0)
MOVD $16, R7 MOVD $16, R7
PPC64X_STXVW4X(V2, R4, R7)
MOVD $32, R7
PPC64X_STXVW4X(V1, R4, R7) PPC64X_STXVW4X(V1, R4, R7)
MOVD $32, R7
PPC64X_STXVW4X(V2, R4, R7)
MOVD $48, R7 MOVD $48, R7
PPC64X_STXVW4X(V0, R4, R7) PPC64X_STXVW4X(V3, R4, R7)
RET RET
enc8blocks: enc8blocks:
@ -406,8 +436,8 @@ enc8blocks:
PPC64X_LXVW4X(R5, R7, V6) PPC64X_LXVW4X(R5, R7, V6)
MOVD $112, R7 MOVD $112, R7
PPC64X_LXVW4X(R5, R7, V7) PPC64X_LXVW4X(R5, R7, V7)
TRANSPOSE_MATRIX(V0, V1, V2, V3) PRE_TRANSPOSE_MATRIX(V0, V1, V2, V3)
TRANSPOSE_MATRIX(V4, V5, V6, V7) PRE_TRANSPOSE_MATRIX(V4, V5, V6, V7)
// prepare counter // prepare counter
MOVD $8, R7 MOVD $8, R7
MOVD R7, CTR MOVD R7, CTR
@ -431,21 +461,21 @@ enc8blocksLoop:
TRANSPOSE_MATRIX(V0, V1, V2, V3) TRANSPOSE_MATRIX(V0, V1, V2, V3)
TRANSPOSE_MATRIX(V4, V5, V6, V7) TRANSPOSE_MATRIX(V4, V5, V6, V7)
PPC64X_STXVW4X(V3, R4, R0) PPC64X_STXVW4X(V0, R4, R0)
MOVD $16, R7 MOVD $16, R7
PPC64X_STXVW4X(V2, R4, R7)
MOVD $32, R7
PPC64X_STXVW4X(V1, R4, R7) PPC64X_STXVW4X(V1, R4, R7)
MOVD $32, R7
PPC64X_STXVW4X(V2, R4, R7)
MOVD $48, R7 MOVD $48, R7
PPC64X_STXVW4X(V0, R4, R7) PPC64X_STXVW4X(V3, R4, R7)
MOVD $64, R7 MOVD $64, R7
PPC64X_STXVW4X(V7, R4, R7)
MOVD $80, R7
PPC64X_STXVW4X(V6, R4, R7)
MOVD $96, R7
PPC64X_STXVW4X(V5, R4, R7)
MOVD $112, R7
PPC64X_STXVW4X(V4, R4, R7) PPC64X_STXVW4X(V4, R4, R7)
MOVD $80, R7
PPC64X_STXVW4X(V5, R4, R7)
MOVD $96, R7
PPC64X_STXVW4X(V6, R4, R7)
MOVD $112, R7
PPC64X_STXVW4X(V7, R4, R7)
RET RET
#undef TMP0 #undef TMP0