From 25e0700f81339a550cdee7ab04c0c1f79c6fafc1 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Tue, 24 Sep 2024 08:32:42 +0800 Subject: [PATCH] xts/gcm: arm64 use VSRI/VSLI --- cipher/xts_arm64.s | 10 ++++------ sm4/gcm_arm64.s | 5 ++--- sm4/xts_macros_arm64.s | 10 ++++------ 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/cipher/xts_arm64.s b/cipher/xts_arm64.s index cb398e7..d6fc30e 100644 --- a/cipher/xts_arm64.s +++ b/cipher/xts_arm64.s @@ -21,9 +21,8 @@ \ VUSHR $63, B0.D2, T2.D2 \ VEXT $8, T2.B16, ZERO.B16, T2.B16 \ - VSHL $1, B0.D2, B0.D2 \ - VEOR T1.B16, B0.B16, B0.B16 \ - VEOR T2.B16, B0.B16, B0.B16 + VSLI $1, B0.D2, T2.D2 \ + VEOR T1.B16, T2.B16, B0.B16 #define gbDoubleTweak(B0, ZERO, POLY, I, T1, T2) \ VREV64 B0.B16, B0.B16 \ @@ -37,9 +36,8 @@ \ VSHL $63, B0.D2, T2.D2 \ VEXT $8, ZERO.B16, T2.B16, T2.B16 \ - VUSHR $1, B0.D2, B0.D2 \ - VEOR T1.B16, B0.B16, B0.B16 \ - VEOR T2.B16, B0.B16, B0.B16 \ + VSRI $1, B0.D2, T2.D2 \ + VEOR T1.B16, T2.B16, B0.B16 \ \ VEXT $8, B0.B16, B0.B16, B0.B16 \ VREV64 B0.B16, B0.B16 diff --git a/sm4/gcm_arm64.s b/sm4/gcm_arm64.s index 2858ef0..d52387a 100644 --- a/sm4/gcm_arm64.s +++ b/sm4/gcm_arm64.s @@ -174,9 +174,8 @@ sm4InitEncDone: VAND POLY.B16, T1.B16, T1.B16 VUSHR $63, B0.D2, T2.D2 VEXT $8, ZERO.B16, T2.B16, T2.B16 - VSHL $1, B0.D2, B0.D2 - VEOR T1.B16, B0.B16, B0.B16 - VEOR T2.B16, B0.B16, B0.B16 // Can avoid this when VSLI is available + VSLI $1, B0.D2, T2.D2 + VEOR T1.B16, T2.B16, B0.B16 // Karatsuba pre-computation VEXT $8, B0.B16, B0.B16, B1.B16 diff --git a/sm4/xts_macros_arm64.s b/sm4/xts_macros_arm64.s index 773f7b3..776c7fc 100644 --- a/sm4/xts_macros_arm64.s +++ b/sm4/xts_macros_arm64.s @@ -6,9 +6,8 @@ \ VUSHR $63, TW.D2, K1.D2; \ VEXT $8, K1.B16, ZERO.B16, K1.B16; \ - VSHL $1, TW.D2, TW.D2; \ - VEOR K0.B16, TW.B16, TW.B16; \ - VEOR K1.B16, TW.B16, TW.B16 + VSLI $1, TW.D2, K1.D2; \ + VEOR K0.B16, K1.B16, TW.B16 #define mul2GBInline \ VREV64 TW.B16, TW.B16; \ @@ -22,9 +21,8 @@ \ VSHL $63, TW.D2, K1.D2; \ VEXT $8, ZERO.B16, K1.B16, K1.B16; \ - VUSHR $1, TW.D2, TW.D2; \ - VEOR K0.B16, TW.B16, TW.B16; \ - VEOR K1.B16, TW.B16, TW.B16; \ + VSRI $1, TW.D2, K1.D2; \ + VEOR K0.B16, K1.B16, TW.B16; \ \ VEXT $8, TW.B16, TW.B16, TW.B16; \ VREV64 TW.B16, TW.B16