mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-28 05:06:18 +08:00
sm4: arm64 cbc, fix register usage issue
This commit is contained in:
parent
e5af209d83
commit
37493fe3df
@ -123,6 +123,9 @@ cbcSm4Nibbles:
|
|||||||
ADD dstPtr, srcPtrLen, R12
|
ADD dstPtr, srcPtrLen, R12
|
||||||
|
|
||||||
VLD1 (R10), [t0.S4, t1.S4, t2.S4, t3.S4]
|
VLD1 (R10), [t0.S4, t1.S4, t2.S4, t3.S4]
|
||||||
|
VMOV t0.B16, t5.B16
|
||||||
|
VMOV t1.B16, t6.B16
|
||||||
|
VMOV t2.B16, t7.B16
|
||||||
VREV32 t0.B16, t0.B16
|
VREV32 t0.B16, t0.B16
|
||||||
VREV32 t1.B16, t1.B16
|
VREV32 t1.B16, t1.B16
|
||||||
VREV32 t2.B16, t2.B16
|
VREV32 t2.B16, t2.B16
|
||||||
@ -147,11 +150,11 @@ cbc4BlocksLoop:
|
|||||||
VREV32 t2.B16, t2.B16
|
VREV32 t2.B16, t2.B16
|
||||||
VREV32 t3.B16, t3.B16
|
VREV32 t3.B16, t3.B16
|
||||||
|
|
||||||
VLD1 (R11), [V6.S4, V7.S4, V8.S4, V9.S4]
|
VLD1 (R11), [t4.S4]
|
||||||
VEOR V6.B16, t0.B16, t0.B16
|
VEOR t4.B16, t0.B16, t0.B16
|
||||||
VEOR V7.B16, t1.B16, t1.B16
|
VEOR t5.B16, t1.B16, t1.B16
|
||||||
VEOR V8.B16, t2.B16, t2.B16
|
VEOR t6.B16, t2.B16, t2.B16
|
||||||
VEOR V9.B16, t3.B16, t3.B16
|
VEOR t7.B16, t3.B16, t3.B16
|
||||||
|
|
||||||
VST1 [t0.S4, t1.S4, t2.S4, t3.S4], (R12)
|
VST1 [t0.S4, t1.S4, t2.S4, t3.S4], (R12)
|
||||||
|
|
||||||
@ -170,9 +173,9 @@ cbcSm4Single:
|
|||||||
|
|
||||||
// 4 blocks
|
// 4 blocks
|
||||||
VLD1 (srcPtr), [t0.S4, t1.S4, t2.S4, t3.S4]
|
VLD1 (srcPtr), [t0.S4, t1.S4, t2.S4, t3.S4]
|
||||||
VMOV t0.B16, V6.B16
|
VMOV t0.B16, t4.B16
|
||||||
VMOV t1.B16, V7.B16
|
VMOV t1.B16, t5.B16
|
||||||
VMOV t2.B16, V8.B16
|
VMOV t2.B16, t6.B16
|
||||||
VREV32 t0.B16, t0.B16
|
VREV32 t0.B16, t0.B16
|
||||||
VREV32 t1.B16, t1.B16
|
VREV32 t1.B16, t1.B16
|
||||||
VREV32 t2.B16, t2.B16
|
VREV32 t2.B16, t2.B16
|
||||||
@ -196,9 +199,9 @@ cbc4BlocksLoop64:
|
|||||||
VREV32 t3.B16, t3.B16
|
VREV32 t3.B16, t3.B16
|
||||||
|
|
||||||
VEOR IV.B16, t0.B16, t0.B16
|
VEOR IV.B16, t0.B16, t0.B16
|
||||||
VEOR V6.B16, t1.B16, t1.B16
|
VEOR t4.B16, t1.B16, t1.B16
|
||||||
VEOR V7.B16, t2.B16, t2.B16
|
VEOR t5.B16, t2.B16, t2.B16
|
||||||
VEOR V8.B16, t3.B16, t3.B16
|
VEOR t6.B16, t3.B16, t3.B16
|
||||||
|
|
||||||
VST1 [t0.S4, t1.S4, t2.S4, t3.S4], (dstPtr)
|
VST1 [t0.S4, t1.S4, t2.S4, t3.S4], (dstPtr)
|
||||||
|
|
||||||
@ -234,7 +237,7 @@ cbc4BlocksLoop16:
|
|||||||
|
|
||||||
cbcSm4Single32:
|
cbcSm4Single32:
|
||||||
VLD1 (srcPtr), [t0.S4, t1.S4]
|
VLD1 (srcPtr), [t0.S4, t1.S4]
|
||||||
VMOV t0.B16, V6.B16
|
VMOV t0.B16, t4.B16
|
||||||
VREV32 t0.B16, t0.B16
|
VREV32 t0.B16, t0.B16
|
||||||
VREV32 t1.B16, t1.B16
|
VREV32 t1.B16, t1.B16
|
||||||
PRE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y, XTMP6, XTMP7)
|
PRE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y, XTMP6, XTMP7)
|
||||||
@ -254,15 +257,15 @@ cbc4BlocksLoop32:
|
|||||||
VREV32 t1.B16, t1.B16
|
VREV32 t1.B16, t1.B16
|
||||||
|
|
||||||
VEOR IV.B16, t0.B16, t0.B16
|
VEOR IV.B16, t0.B16, t0.B16
|
||||||
VEOR V6.B16, t1.B16, t1.B16
|
VEOR t4.B16, t1.B16, t1.B16
|
||||||
|
|
||||||
VST1 [t0.S4, t1.S4], (dstPtr)
|
VST1 [t0.S4, t1.S4], (dstPtr)
|
||||||
B cbcSm4Done
|
B cbcSm4Done
|
||||||
|
|
||||||
cbcSm4Single48:
|
cbcSm4Single48:
|
||||||
VLD1 (srcPtr), [t0.S4, t1.S4, t2.S4]
|
VLD1 (srcPtr), [t0.S4, t1.S4, t2.S4]
|
||||||
VMOV t0.B16, V6.B16
|
VMOV t0.B16, t4.B16
|
||||||
VMOV t1.B16, V7.B16
|
VMOV t1.B16, t5.B16
|
||||||
VREV32 t0.B16, t0.B16
|
VREV32 t0.B16, t0.B16
|
||||||
VREV32 t1.B16, t1.B16
|
VREV32 t1.B16, t1.B16
|
||||||
VREV32 t2.B16, t2.B16
|
VREV32 t2.B16, t2.B16
|
||||||
@ -284,8 +287,8 @@ cbc4BlocksLoop48:
|
|||||||
VREV32 t2.B16, t2.B16
|
VREV32 t2.B16, t2.B16
|
||||||
|
|
||||||
VEOR IV.B16, t0.B16, t0.B16
|
VEOR IV.B16, t0.B16, t0.B16
|
||||||
VEOR V6.B16, t1.B16, t1.B16
|
VEOR t4.B16, t1.B16, t1.B16
|
||||||
VEOR V7.B16, t2.B16, t2.B16
|
VEOR t5.B16, t2.B16, t2.B16
|
||||||
|
|
||||||
VST1 [t0.S4, t1.S4, t2.S4], (dstPtr)
|
VST1 [t0.S4, t1.S4, t2.S4], (dstPtr)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user