xts/gcm: arm64 use VSRI/VSLI

This commit is contained in:
Sun Yimin 2024-09-24 08:32:42 +08:00 committed by GitHub
parent f47051ed86
commit 25e0700f81
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 10 additions and 15 deletions

View File

@ -21,9 +21,8 @@
\ \
VUSHR $63, B0.D2, T2.D2 \ VUSHR $63, B0.D2, T2.D2 \
VEXT $8, T2.B16, ZERO.B16, T2.B16 \ VEXT $8, T2.B16, ZERO.B16, T2.B16 \
VSHL $1, B0.D2, B0.D2 \ VSLI $1, B0.D2, T2.D2 \
VEOR T1.B16, B0.B16, B0.B16 \ VEOR T1.B16, T2.B16, B0.B16
VEOR T2.B16, B0.B16, B0.B16
#define gbDoubleTweak(B0, ZERO, POLY, I, T1, T2) \ #define gbDoubleTweak(B0, ZERO, POLY, I, T1, T2) \
VREV64 B0.B16, B0.B16 \ VREV64 B0.B16, B0.B16 \
@ -37,9 +36,8 @@
\ \
VSHL $63, B0.D2, T2.D2 \ VSHL $63, B0.D2, T2.D2 \
VEXT $8, ZERO.B16, T2.B16, T2.B16 \ VEXT $8, ZERO.B16, T2.B16, T2.B16 \
VUSHR $1, B0.D2, B0.D2 \ VSRI $1, B0.D2, T2.D2 \
VEOR T1.B16, B0.B16, B0.B16 \ VEOR T1.B16, T2.B16, B0.B16 \
VEOR T2.B16, B0.B16, B0.B16 \
\ \
VEXT $8, B0.B16, B0.B16, B0.B16 \ VEXT $8, B0.B16, B0.B16, B0.B16 \
VREV64 B0.B16, B0.B16 VREV64 B0.B16, B0.B16

View File

@ -174,9 +174,8 @@ sm4InitEncDone:
VAND POLY.B16, T1.B16, T1.B16 VAND POLY.B16, T1.B16, T1.B16
VUSHR $63, B0.D2, T2.D2 VUSHR $63, B0.D2, T2.D2
VEXT $8, ZERO.B16, T2.B16, T2.B16 VEXT $8, ZERO.B16, T2.B16, T2.B16
VSHL $1, B0.D2, B0.D2 VSLI $1, B0.D2, T2.D2
VEOR T1.B16, B0.B16, B0.B16 VEOR T1.B16, T2.B16, B0.B16
VEOR T2.B16, B0.B16, B0.B16 // Can avoid this when VSLI is available
// Karatsuba pre-computation // Karatsuba pre-computation
VEXT $8, B0.B16, B0.B16, B1.B16 VEXT $8, B0.B16, B0.B16, B1.B16

View File

@ -6,9 +6,8 @@
\ \
VUSHR $63, TW.D2, K1.D2; \ VUSHR $63, TW.D2, K1.D2; \
VEXT $8, K1.B16, ZERO.B16, K1.B16; \ VEXT $8, K1.B16, ZERO.B16, K1.B16; \
VSHL $1, TW.D2, TW.D2; \ VSLI $1, TW.D2, K1.D2; \
VEOR K0.B16, TW.B16, TW.B16; \ VEOR K0.B16, K1.B16, TW.B16
VEOR K1.B16, TW.B16, TW.B16
#define mul2GBInline \ #define mul2GBInline \
VREV64 TW.B16, TW.B16; \ VREV64 TW.B16, TW.B16; \
@ -22,9 +21,8 @@
\ \
VSHL $63, TW.D2, K1.D2; \ VSHL $63, TW.D2, K1.D2; \
VEXT $8, ZERO.B16, K1.B16, K1.B16; \ VEXT $8, ZERO.B16, K1.B16, K1.B16; \
VUSHR $1, TW.D2, TW.D2; \ VSRI $1, TW.D2, K1.D2; \
VEOR K0.B16, TW.B16, TW.B16; \ VEOR K0.B16, K1.B16, TW.B16; \
VEOR K1.B16, TW.B16, TW.B16; \
\ \
VEXT $8, TW.B16, TW.B16, TW.B16; \ VEXT $8, TW.B16, TW.B16, TW.B16; \
VREV64 TW.B16, TW.B16 VREV64 TW.B16, TW.B16