mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-28 05:06:18 +08:00
cipher: xts reduce duplicated code
This commit is contained in:
parent
5c6c1890ae
commit
f47051ed86
@ -28,7 +28,7 @@ Go语言商用密码软件,简称**GMSM**,一个安全、高性能、易于
|
|||||||
|
|
||||||
- **SM3** - SM3密码杂凑算法实现。**amd64**下分别针对**AVX2+BMI2、AVX、SSE2+SSSE3**做了消息扩展部分的SIMD实现; **arm64**下使用NEON指令做了消息扩展部分的SIMD实现,同时也提供了基于**A64扩展密码指令**的汇编实现;**s390x**和**ppc64x**通过向量指令做了消息扩展部分的优化实现。您也可以参考[SM3性能优化](https://github.com/emmansun/gmsm/wiki/SM3%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96)及相关Wiki和代码,以获得更多实现细节。
|
- **SM3** - SM3密码杂凑算法实现。**amd64**下分别针对**AVX2+BMI2、AVX、SSE2+SSSE3**做了消息扩展部分的SIMD实现; **arm64**下使用NEON指令做了消息扩展部分的SIMD实现,同时也提供了基于**A64扩展密码指令**的汇编实现;**s390x**和**ppc64x**通过向量指令做了消息扩展部分的优化实现。您也可以参考[SM3性能优化](https://github.com/emmansun/gmsm/wiki/SM3%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96)及相关Wiki和代码,以获得更多实现细节。
|
||||||
|
|
||||||
- **SM4** - SM4分组密码算法实现。**amd64**下使用**AES**指令加上**AVX2、AVX、SSE2+SSSE3**实现了比较好的性能。**arm64**下使用**AES**指令加上NEON指令实现了比较好的性能,同时也提供了基于**A64扩展密码指令**的汇编实现。**ppc64x**下使用**VCIPHERLAST**指令加上向量指令进行了并行优化。针对**ECB/CBC/GCM/XTS**加密模式,做了和SM4分组密码算法的融合汇编优化实现。您也可以参考[SM4性能优化](https://github.com/emmansun/gmsm/wiki/SM4%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96)及相关Wiki和代码,以获得更多实现细节。
|
- **SM4** - SM4分组密码算法实现。**amd64**下使用**AES**指令加上**AVX2、AVX、SSE2+SSSE3**实现了比较好的性能。**arm64**下使用**AES**指令加上NEON指令实现了比较好的性能,同时也提供了基于**A64扩展密码指令**的汇编实现。**ppc64x**下使用**vsbox**指令加上向量指令进行了并行优化。针对**ECB/CBC/GCM/XTS**加密模式,做了和SM4分组密码算法的融合汇编优化实现。您也可以参考[SM4性能优化](https://github.com/emmansun/gmsm/wiki/SM4%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96)及相关Wiki和代码,以获得更多实现细节。
|
||||||
|
|
||||||
- **SM9** - SM9标识密码算法实现。基础的素域、扩域、椭圆曲线运算以及双线性对运算位于[bn256](https://github.com/emmansun/gmsm/tree/main/sm9/bn256)包中,分别对**amd64**、**arm64**架构做了优化实现。您也可以参考[SM9实现及优化](https://github.com/emmansun/gmsm/wiki/SM9%E5%AE%9E%E7%8E%B0%E5%8F%8A%E4%BC%98%E5%8C%96)及相关讨论和代码,以获得更多实现细节。SM9包实现了SM9标识密码算法的密钥生成、数字签名算法、密钥封装机制和公钥加密算法、密钥交换协议。
|
- **SM9** - SM9标识密码算法实现。基础的素域、扩域、椭圆曲线运算以及双线性对运算位于[bn256](https://github.com/emmansun/gmsm/tree/main/sm9/bn256)包中,分别对**amd64**、**arm64**架构做了优化实现。您也可以参考[SM9实现及优化](https://github.com/emmansun/gmsm/wiki/SM9%E5%AE%9E%E7%8E%B0%E5%8F%8A%E4%BC%98%E5%8C%96)及相关讨论和代码,以获得更多实现细节。SM9包实现了SM9标识密码算法的密钥生成、数字签名算法、密钥封装机制和公钥加密算法、密钥交换协议。
|
||||||
|
|
||||||
|
@ -22,6 +22,34 @@ GLOBL gbGcmPoly<>(SB), (NOPTR+RODATA), $16
|
|||||||
#define T0 X3
|
#define T0 X3
|
||||||
#define T1 X4
|
#define T1 X4
|
||||||
|
|
||||||
|
#define doubleTweak(B0, POLY, T0, T1) \
|
||||||
|
\ // B0 * 2
|
||||||
|
PSHUFD $0xff, B0, T0 \
|
||||||
|
MOVOU B0, T1 \
|
||||||
|
PSRAL $31, T0 \ // T0 for reduction
|
||||||
|
PAND POLY, T0 \
|
||||||
|
PSRLL $31, T1 \
|
||||||
|
PSLLDQ $4, T1 \
|
||||||
|
PSLLL $1, B0 \
|
||||||
|
PXOR T0, B0 \
|
||||||
|
PXOR T1, B0
|
||||||
|
|
||||||
|
#define gbDoubleTweak(B0, BSWAP, POLY, T0, T1) \
|
||||||
|
PSHUFB BSWAP, B0 \
|
||||||
|
\ // B0 * 2
|
||||||
|
MOVOU B0, T0 \
|
||||||
|
PSHUFD $0, B0, T1 \
|
||||||
|
PSRLQ $1, B0 \
|
||||||
|
PSLLQ $63, T0 \
|
||||||
|
PSRLDQ $8, T0 \
|
||||||
|
POR T0, B0 \
|
||||||
|
\ // reduction
|
||||||
|
PSLLL $31, T1 \
|
||||||
|
PSRAL $31, T1 \
|
||||||
|
PAND POLY, T1 \
|
||||||
|
PXOR T1, B0 \
|
||||||
|
PSHUFB BSWAP, B0
|
||||||
|
|
||||||
// func mul2(tweak *[blockSize]byte, isGB bool)
|
// func mul2(tweak *[blockSize]byte, isGB bool)
|
||||||
TEXT ·mul2(SB),NOSPLIT,$0
|
TEXT ·mul2(SB),NOSPLIT,$0
|
||||||
MOVQ tweak+0(FP), DI
|
MOVQ tweak+0(FP), DI
|
||||||
@ -34,16 +62,7 @@ TEXT ·mul2(SB),NOSPLIT,$0
|
|||||||
|
|
||||||
MOVOU gcmPoly<>(SB), POLY
|
MOVOU gcmPoly<>(SB), POLY
|
||||||
|
|
||||||
// B0 * 2
|
doubleTweak(B0, POLY, T0, T1)
|
||||||
PSHUFD $0xff, B0, T0
|
|
||||||
MOVOU B0, T1
|
|
||||||
PSRAL $31, T0 // T0 for reduction
|
|
||||||
PAND POLY, T0
|
|
||||||
PSRLL $31, T1
|
|
||||||
PSLLDQ $4, T1
|
|
||||||
PSLLL $1, B0
|
|
||||||
PXOR T0, B0
|
|
||||||
PXOR T1, B0
|
|
||||||
|
|
||||||
MOVOU B0, (0*16)(DI)
|
MOVOU B0, (0*16)(DI)
|
||||||
|
|
||||||
@ -53,23 +72,8 @@ gb_alg:
|
|||||||
MOVOU bswapMask<>(SB), BSWAP
|
MOVOU bswapMask<>(SB), BSWAP
|
||||||
MOVOU gbGcmPoly<>(SB), POLY
|
MOVOU gbGcmPoly<>(SB), POLY
|
||||||
|
|
||||||
PSHUFB BSWAP, B0
|
gbDoubleTweak(B0, BSWAP, POLY, T0, T1)
|
||||||
|
|
||||||
// B0 * 2
|
|
||||||
MOVOU B0, T0
|
|
||||||
PSHUFD $0, B0, T1
|
|
||||||
PSRLQ $1, B0
|
|
||||||
PSLLQ $63, T0
|
|
||||||
PSRLDQ $8, T0
|
|
||||||
POR T0, B0
|
|
||||||
|
|
||||||
// reduction
|
|
||||||
PSLLL $31, T1
|
|
||||||
PSRAL $31, T1
|
|
||||||
PAND POLY, T1
|
|
||||||
PXOR T1, B0
|
|
||||||
|
|
||||||
PSHUFB BSWAP, B0
|
|
||||||
MOVOU B0, (0*16)(DI)
|
MOVOU B0, (0*16)(DI)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
@ -94,16 +98,7 @@ loop:
|
|||||||
MOVOU B0, (0*16)(AX)
|
MOVOU B0, (0*16)(AX)
|
||||||
LEAQ 16(AX), AX
|
LEAQ 16(AX), AX
|
||||||
|
|
||||||
// B0 * 2
|
doubleTweak(B0, POLY, T0, T1)
|
||||||
PSHUFD $0xff, B0, T0
|
|
||||||
MOVOU B0, T1
|
|
||||||
PSRAL $31, T0 // T0 for reduction
|
|
||||||
PAND POLY, T0
|
|
||||||
PSRLL $31, T1
|
|
||||||
PSLLDQ $4, T1
|
|
||||||
PSLLL $1, B0
|
|
||||||
PXOR T0, B0
|
|
||||||
PXOR T1, B0
|
|
||||||
|
|
||||||
ADDQ $1, DX
|
ADDQ $1, DX
|
||||||
CMPQ DX, BX
|
CMPQ DX, BX
|
||||||
@ -120,23 +115,8 @@ gb_loop:
|
|||||||
MOVOU B0, (0*16)(AX)
|
MOVOU B0, (0*16)(AX)
|
||||||
LEAQ 16(AX), AX
|
LEAQ 16(AX), AX
|
||||||
|
|
||||||
PSHUFB BSWAP, B0
|
gbDoubleTweak(B0, BSWAP, POLY, T0, T1)
|
||||||
|
|
||||||
// B0 * 2
|
|
||||||
MOVOU B0, T0
|
|
||||||
PSHUFD $0, B0, T1
|
|
||||||
PSRLQ $1, B0
|
|
||||||
PSLLQ $63, T0
|
|
||||||
PSRLDQ $8, T0
|
|
||||||
POR T0, B0
|
|
||||||
|
|
||||||
// reduction
|
|
||||||
PSLLL $31, T1
|
|
||||||
PSRAL $31, T1
|
|
||||||
PAND POLY, T1
|
|
||||||
PXOR T1, B0
|
|
||||||
|
|
||||||
PSHUFB BSWAP, B0
|
|
||||||
ADDQ $1, DX
|
ADDQ $1, DX
|
||||||
CMPQ DX, BX
|
CMPQ DX, BX
|
||||||
JB gb_loop
|
JB gb_loop
|
||||||
|
@ -13,6 +13,37 @@
|
|||||||
#define GB R1
|
#define GB R1
|
||||||
#define I R2
|
#define I R2
|
||||||
|
|
||||||
|
#define doubleTweak(B0, ZERO, POLY, I, T1, T2) \
|
||||||
|
VMOV B0.D[1], I \
|
||||||
|
ASR $63, I \
|
||||||
|
VMOV I, T1.D2 \
|
||||||
|
VAND POLY.B16, T1.B16, T1.B16 \
|
||||||
|
\
|
||||||
|
VUSHR $63, B0.D2, T2.D2 \
|
||||||
|
VEXT $8, T2.B16, ZERO.B16, T2.B16 \
|
||||||
|
VSHL $1, B0.D2, B0.D2 \
|
||||||
|
VEOR T1.B16, B0.B16, B0.B16 \
|
||||||
|
VEOR T2.B16, B0.B16, B0.B16
|
||||||
|
|
||||||
|
#define gbDoubleTweak(B0, ZERO, POLY, I, T1, T2) \
|
||||||
|
VREV64 B0.B16, B0.B16 \
|
||||||
|
VEXT $8, B0.B16, B0.B16, B0.B16 \
|
||||||
|
\
|
||||||
|
VMOV B0.D[0], I \
|
||||||
|
LSL $63, I \
|
||||||
|
ASR $63, I \
|
||||||
|
VMOV I, T1.D2 \
|
||||||
|
VAND POLY.B16, T1.B16, T1.B16 \
|
||||||
|
\
|
||||||
|
VSHL $63, B0.D2, T2.D2 \
|
||||||
|
VEXT $8, ZERO.B16, T2.B16, T2.B16 \
|
||||||
|
VUSHR $1, B0.D2, B0.D2 \
|
||||||
|
VEOR T1.B16, B0.B16, B0.B16 \
|
||||||
|
VEOR T2.B16, B0.B16, B0.B16 \
|
||||||
|
\
|
||||||
|
VEXT $8, B0.B16, B0.B16, B0.B16 \
|
||||||
|
VREV64 B0.B16, B0.B16
|
||||||
|
|
||||||
// func mul2(tweak *[blockSize]byte, isGB bool)
|
// func mul2(tweak *[blockSize]byte, isGB bool)
|
||||||
TEXT ·mul2(SB),NOSPLIT,$0
|
TEXT ·mul2(SB),NOSPLIT,$0
|
||||||
MOVD tweak+0(FP), TW
|
MOVD tweak+0(FP), TW
|
||||||
@ -29,16 +60,7 @@ TEXT ·mul2(SB),NOSPLIT,$0
|
|||||||
MOVD $0x87, I
|
MOVD $0x87, I
|
||||||
VMOV I, POLY.D[0]
|
VMOV I, POLY.D[0]
|
||||||
|
|
||||||
VMOV B0.D[1], I
|
doubleTweak(B0, ZERO, POLY, I, T1, T2)
|
||||||
ASR $63, I
|
|
||||||
VMOV I, T1.D2
|
|
||||||
VAND POLY.B16, T1.B16, T1.B16
|
|
||||||
|
|
||||||
VUSHR $63, B0.D2, T2.D2
|
|
||||||
VEXT $8, T2.B16, ZERO.B16, T2.B16
|
|
||||||
VSHL $1, B0.D2, B0.D2
|
|
||||||
VEOR T1.B16, B0.B16, B0.B16
|
|
||||||
VEOR T2.B16, B0.B16, B0.B16
|
|
||||||
|
|
||||||
VST1 [B0.B16], (TW)
|
VST1 [B0.B16], (TW)
|
||||||
RET
|
RET
|
||||||
@ -48,23 +70,7 @@ gb_alg:
|
|||||||
LSL $56, I
|
LSL $56, I
|
||||||
VMOV I, POLY.D[1]
|
VMOV I, POLY.D[1]
|
||||||
|
|
||||||
VREV64 B0.B16, B0.B16
|
gbDoubleTweak(B0, ZERO, POLY, I, T1, T2)
|
||||||
VEXT $8, B0.B16, B0.B16, B0.B16
|
|
||||||
|
|
||||||
VMOV B0.D[0], I
|
|
||||||
LSL $63, I
|
|
||||||
ASR $63, I
|
|
||||||
VMOV I, T1.D2
|
|
||||||
VAND POLY.B16, T1.B16, T1.B16
|
|
||||||
|
|
||||||
VSHL $63, B0.D2, T2.D2
|
|
||||||
VEXT $8, ZERO.B16, T2.B16, T2.B16
|
|
||||||
VUSHR $1, B0.D2, B0.D2
|
|
||||||
VEOR T1.B16, B0.B16, B0.B16
|
|
||||||
VEOR T2.B16, B0.B16, B0.B16
|
|
||||||
|
|
||||||
VEXT $8, B0.B16, B0.B16, B0.B16
|
|
||||||
VREV64 B0.B16, B0.B16
|
|
||||||
|
|
||||||
VST1 [B0.B16], (TW)
|
VST1 [B0.B16], (TW)
|
||||||
RET
|
RET
|
||||||
@ -93,16 +99,7 @@ TEXT ·doubleTweaks(SB),NOSPLIT,$0
|
|||||||
loop:
|
loop:
|
||||||
VST1.P [B0.B16], 16(R3)
|
VST1.P [B0.B16], 16(R3)
|
||||||
|
|
||||||
VMOV B0.D[1], I
|
doubleTweak(B0, ZERO, POLY, I, T1, T2)
|
||||||
ASR $63, I
|
|
||||||
VMOV I, T1.D2
|
|
||||||
VAND POLY.B16, T1.B16, T1.B16
|
|
||||||
|
|
||||||
VUSHR $63, B0.D2, T2.D2
|
|
||||||
VEXT $8, T2.B16, ZERO.B16, T2.B16
|
|
||||||
VSHL $1, B0.D2, B0.D2
|
|
||||||
VEOR T1.B16, B0.B16, B0.B16
|
|
||||||
VEOR T2.B16, B0.B16, B0.B16
|
|
||||||
|
|
||||||
ADD $1, R5
|
ADD $1, R5
|
||||||
CMP R4, R5
|
CMP R4, R5
|
||||||
@ -119,23 +116,7 @@ dt_gb_alg:
|
|||||||
gb_loop:
|
gb_loop:
|
||||||
VST1.P [B0.B16], 16(R3)
|
VST1.P [B0.B16], 16(R3)
|
||||||
|
|
||||||
VREV64 B0.B16, B0.B16
|
gbDoubleTweak(B0, ZERO, POLY, I, T1, T2)
|
||||||
VEXT $8, B0.B16, B0.B16, B0.B16
|
|
||||||
|
|
||||||
VMOV B0.D[0], I
|
|
||||||
LSL $63, I
|
|
||||||
ASR $63, I
|
|
||||||
VMOV I, T1.D2
|
|
||||||
VAND POLY.B16, T1.B16, T1.B16
|
|
||||||
|
|
||||||
VSHL $63, B0.D2, T2.D2
|
|
||||||
VEXT $8, ZERO.B16, T2.B16, T2.B16
|
|
||||||
VUSHR $1, B0.D2, B0.D2
|
|
||||||
VEOR T1.B16, B0.B16, B0.B16
|
|
||||||
VEOR T2.B16, B0.B16, B0.B16
|
|
||||||
|
|
||||||
VEXT $8, B0.B16, B0.B16, B0.B16
|
|
||||||
VREV64 B0.B16, B0.B16
|
|
||||||
|
|
||||||
ADD $1, R5
|
ADD $1, R5
|
||||||
CMP R4, R5
|
CMP R4, R5
|
||||||
|
@ -149,8 +149,8 @@ gb_alg:
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
gbLoop:
|
gbLoop:
|
||||||
STXVD2X B0, (R4)
|
STXVD2X B0, (R4)
|
||||||
ADD $16, R4
|
ADD $16, R4
|
||||||
|
|
||||||
#ifdef GOARCH_ppc64le
|
#ifdef GOARCH_ppc64le
|
||||||
VPERM B0, B0, ESPERM, B0
|
VPERM B0, B0, ESPERM, B0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user