sm4: arm64 cbc, fix register usage issue

This commit is contained in:
Sun Yimin 2024-09-13 11:23:31 +08:00 committed by GitHub
parent e5af209d83
commit 37493fe3df
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -123,6 +123,9 @@ cbcSm4Nibbles:
ADD dstPtr, srcPtrLen, R12 ADD dstPtr, srcPtrLen, R12
VLD1 (R10), [t0.S4, t1.S4, t2.S4, t3.S4] VLD1 (R10), [t0.S4, t1.S4, t2.S4, t3.S4]
VMOV t0.B16, t5.B16
VMOV t1.B16, t6.B16
VMOV t2.B16, t7.B16
VREV32 t0.B16, t0.B16 VREV32 t0.B16, t0.B16
VREV32 t1.B16, t1.B16 VREV32 t1.B16, t1.B16
VREV32 t2.B16, t2.B16 VREV32 t2.B16, t2.B16
@ -147,11 +150,11 @@ cbc4BlocksLoop:
VREV32 t2.B16, t2.B16 VREV32 t2.B16, t2.B16
VREV32 t3.B16, t3.B16 VREV32 t3.B16, t3.B16
VLD1 (R11), [V6.S4, V7.S4, V8.S4, V9.S4] VLD1 (R11), [t4.S4]
VEOR V6.B16, t0.B16, t0.B16 VEOR t4.B16, t0.B16, t0.B16
VEOR V7.B16, t1.B16, t1.B16 VEOR t5.B16, t1.B16, t1.B16
VEOR V8.B16, t2.B16, t2.B16 VEOR t6.B16, t2.B16, t2.B16
VEOR V9.B16, t3.B16, t3.B16 VEOR t7.B16, t3.B16, t3.B16
VST1 [t0.S4, t1.S4, t2.S4, t3.S4], (R12) VST1 [t0.S4, t1.S4, t2.S4, t3.S4], (R12)
@ -170,9 +173,9 @@ cbcSm4Single:
// 4 blocks // 4 blocks
VLD1 (srcPtr), [t0.S4, t1.S4, t2.S4, t3.S4] VLD1 (srcPtr), [t0.S4, t1.S4, t2.S4, t3.S4]
VMOV t0.B16, V6.B16 VMOV t0.B16, t4.B16
VMOV t1.B16, V7.B16 VMOV t1.B16, t5.B16
VMOV t2.B16, V8.B16 VMOV t2.B16, t6.B16
VREV32 t0.B16, t0.B16 VREV32 t0.B16, t0.B16
VREV32 t1.B16, t1.B16 VREV32 t1.B16, t1.B16
VREV32 t2.B16, t2.B16 VREV32 t2.B16, t2.B16
@ -196,9 +199,9 @@ cbc4BlocksLoop64:
VREV32 t3.B16, t3.B16 VREV32 t3.B16, t3.B16
VEOR IV.B16, t0.B16, t0.B16 VEOR IV.B16, t0.B16, t0.B16
VEOR V6.B16, t1.B16, t1.B16 VEOR t4.B16, t1.B16, t1.B16
VEOR V7.B16, t2.B16, t2.B16 VEOR t5.B16, t2.B16, t2.B16
VEOR V8.B16, t3.B16, t3.B16 VEOR t6.B16, t3.B16, t3.B16
VST1 [t0.S4, t1.S4, t2.S4, t3.S4], (dstPtr) VST1 [t0.S4, t1.S4, t2.S4, t3.S4], (dstPtr)
@ -234,7 +237,7 @@ cbc4BlocksLoop16:
cbcSm4Single32: cbcSm4Single32:
VLD1 (srcPtr), [t0.S4, t1.S4] VLD1 (srcPtr), [t0.S4, t1.S4]
VMOV t0.B16, V6.B16 VMOV t0.B16, t4.B16
VREV32 t0.B16, t0.B16 VREV32 t0.B16, t0.B16
VREV32 t1.B16, t1.B16 VREV32 t1.B16, t1.B16
PRE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y, XTMP6, XTMP7) PRE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y, XTMP6, XTMP7)
@ -254,15 +257,15 @@ cbc4BlocksLoop32:
VREV32 t1.B16, t1.B16 VREV32 t1.B16, t1.B16
VEOR IV.B16, t0.B16, t0.B16 VEOR IV.B16, t0.B16, t0.B16
VEOR V6.B16, t1.B16, t1.B16 VEOR t4.B16, t1.B16, t1.B16
VST1 [t0.S4, t1.S4], (dstPtr) VST1 [t0.S4, t1.S4], (dstPtr)
B cbcSm4Done B cbcSm4Done
cbcSm4Single48: cbcSm4Single48:
VLD1 (srcPtr), [t0.S4, t1.S4, t2.S4] VLD1 (srcPtr), [t0.S4, t1.S4, t2.S4]
VMOV t0.B16, V6.B16 VMOV t0.B16, t4.B16
VMOV t1.B16, V7.B16 VMOV t1.B16, t5.B16
VREV32 t0.B16, t0.B16 VREV32 t0.B16, t0.B16
VREV32 t1.B16, t1.B16 VREV32 t1.B16, t1.B16
VREV32 t2.B16, t2.B16 VREV32 t2.B16, t2.B16
@ -284,8 +287,8 @@ cbc4BlocksLoop48:
VREV32 t2.B16, t2.B16 VREV32 t2.B16, t2.B16
VEOR IV.B16, t0.B16, t0.B16 VEOR IV.B16, t0.B16, t0.B16
VEOR V6.B16, t1.B16, t1.B16 VEOR t4.B16, t1.B16, t1.B16
VEOR V7.B16, t2.B16, t2.B16 VEOR t5.B16, t2.B16, t2.B16
VST1 [t0.S4, t1.S4, t2.S4], (dstPtr) VST1 [t0.S4, t1.S4, t2.S4], (dstPtr)