mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-27 04:36:19 +08:00
MAGIC - optimize sm2P256SqrInternal
This commit is contained in:
parent
885cb37c5a
commit
aa6b436714
@ -1753,93 +1753,81 @@ TEXT sm2P256SqrInternal(SB),NOSPLIT,$0
|
|||||||
ADCQ mul0, t2
|
ADCQ mul0, t2
|
||||||
ADCQ DX, t3
|
ADCQ DX, t3
|
||||||
// First reduction step
|
// First reduction step
|
||||||
MOVQ p256p<>+0x08(SB), mul0
|
MOVQ acc0, hlp
|
||||||
MULQ acc0
|
|
||||||
ADDQ acc0, acc1
|
MOVQ hlp, mul0
|
||||||
ADCQ $0, mul1
|
MOVQ hlp, mul1
|
||||||
ADDQ mul0, acc1
|
SHLQ $32, mul0
|
||||||
ADCQ $0, mul1
|
SHRQ $32, mul1
|
||||||
MOVQ mul1, hlp
|
|
||||||
MOVQ p256p<>+0x010(SB), mul0
|
ADDQ hlp, acc1
|
||||||
MULQ acc0
|
ADCQ $0, acc2
|
||||||
ADDQ hlp, acc2
|
SUBQ mul0, acc1
|
||||||
ADCQ $0, mul1
|
SBBQ mul1, acc2
|
||||||
ADDQ mul0, acc2
|
|
||||||
ADCQ $0, mul1
|
|
||||||
MOVQ mul1, hlp
|
|
||||||
MOVQ p256p<>+0x018(SB), mul0
|
|
||||||
MULQ acc0
|
|
||||||
ADDQ hlp, acc3
|
ADDQ hlp, acc3
|
||||||
ADCQ $0, mul1
|
ADCQ $0, acc0
|
||||||
ADDQ mul0, acc3
|
SUBQ mul0, acc3
|
||||||
ADCQ $0, mul1
|
SBBQ mul1, acc0
|
||||||
MOVQ mul1, acc0
|
SUBQ hlp, acc3
|
||||||
|
SBBQ $0, acc0
|
||||||
// Second reduction step
|
// Second reduction step
|
||||||
MOVQ p256p<>+0x08(SB), mul0
|
MOVQ acc1, hlp
|
||||||
MULQ acc1
|
|
||||||
ADDQ acc1, acc2
|
MOVQ hlp, mul0
|
||||||
ADCQ $0, mul1
|
MOVQ hlp, mul1
|
||||||
ADDQ mul0, acc2
|
SHLQ $32, mul0
|
||||||
ADCQ $0, mul1
|
SHRQ $32, mul1
|
||||||
MOVQ mul1, hlp
|
|
||||||
MOVQ p256p<>+0x010(SB), mul0
|
|
||||||
MULQ acc1
|
|
||||||
ADDQ hlp, acc3
|
|
||||||
ADCQ $0, mul1
|
|
||||||
ADDQ mul0, acc3
|
|
||||||
ADCQ $0, mul1
|
|
||||||
MOVQ mul1, hlp
|
|
||||||
MOVQ p256p<>+0x018(SB), mul0
|
|
||||||
MULQ acc1
|
|
||||||
ADDQ hlp, acc0
|
|
||||||
ADCQ $0, mul1
|
|
||||||
ADDQ mul0, acc0
|
|
||||||
ADCQ $0, mul1
|
|
||||||
MOVQ mul1, acc1
|
|
||||||
// Third reduction step
|
|
||||||
MOVQ p256p<>+0x08(SB), mul0
|
|
||||||
MULQ acc2
|
|
||||||
ADDQ acc2, acc3
|
|
||||||
ADCQ $0, mul1
|
|
||||||
ADDQ mul0, acc3
|
|
||||||
ADCQ $0, mul1
|
|
||||||
MOVQ mul1, hlp
|
|
||||||
MOVQ p256p<>+0x010(SB), mul0
|
|
||||||
MULQ acc2
|
|
||||||
ADDQ hlp, acc0
|
|
||||||
ADCQ $0, mul1
|
|
||||||
ADDQ mul0, acc0
|
|
||||||
ADCQ $0, mul1
|
|
||||||
MOVQ mul1, hlp
|
|
||||||
MOVQ p256p<>+0x018(SB), mul0
|
|
||||||
MULQ acc2
|
|
||||||
ADDQ hlp, acc1
|
|
||||||
ADCQ $0, mul1
|
|
||||||
ADDQ mul0, acc1
|
|
||||||
ADCQ $0, mul1
|
|
||||||
MOVQ mul1, acc2
|
|
||||||
// Last reduction step
|
|
||||||
MOVQ p256p<>+0x08(SB), mul0
|
|
||||||
MULQ acc3
|
|
||||||
ADDQ acc3, acc0
|
|
||||||
ADCQ $0, mul1
|
|
||||||
ADDQ mul0, acc0
|
|
||||||
ADCQ $0, mul1
|
|
||||||
MOVQ mul1, hlp
|
|
||||||
MOVQ p256p<>+0x010(SB), mul0
|
|
||||||
MULQ acc3
|
|
||||||
ADDQ hlp, acc1
|
|
||||||
ADCQ $0, mul1
|
|
||||||
ADDQ mul0, acc1
|
|
||||||
ADCQ $0, mul1
|
|
||||||
MOVQ mul1, hlp
|
|
||||||
MOVQ p256p<>+0x018(SB), mul0
|
|
||||||
MULQ acc3
|
|
||||||
ADDQ hlp, acc2
|
ADDQ hlp, acc2
|
||||||
ADCQ $0, mul1
|
ADCQ $0, acc3
|
||||||
ADDQ mul0, acc2
|
SUBQ mul0, acc2
|
||||||
ADCQ $0, mul1
|
SBBQ mul1, acc3
|
||||||
MOVQ mul1, acc3
|
|
||||||
|
ADDQ hlp, acc0
|
||||||
|
ADCQ $0, acc1
|
||||||
|
SUBQ mul0, acc0
|
||||||
|
SBBQ mul1, acc1
|
||||||
|
SUBQ hlp, acc0
|
||||||
|
SBBQ $0, acc1
|
||||||
|
// Third reduction step
|
||||||
|
MOVQ acc2, hlp
|
||||||
|
|
||||||
|
MOVQ hlp, mul0
|
||||||
|
MOVQ hlp, mul1
|
||||||
|
SHLQ $32, mul0
|
||||||
|
SHRQ $32, mul1
|
||||||
|
|
||||||
|
ADDQ hlp, acc3
|
||||||
|
ADCQ $0, acc0
|
||||||
|
SUBQ mul0, acc3
|
||||||
|
SBBQ mul1, acc0
|
||||||
|
|
||||||
|
ADDQ hlp, acc1
|
||||||
|
ADCQ $0, acc2
|
||||||
|
SUBQ mul0, acc1
|
||||||
|
SBBQ mul1, acc2
|
||||||
|
SUBQ hlp, acc1
|
||||||
|
SBBQ $0, acc2
|
||||||
|
// Last reduction step
|
||||||
|
MOVQ acc3, hlp
|
||||||
|
|
||||||
|
MOVQ hlp, mul0
|
||||||
|
MOVQ hlp, mul1
|
||||||
|
SHLQ $32, mul0
|
||||||
|
SHRQ $32, mul1
|
||||||
|
|
||||||
|
ADDQ hlp, acc0
|
||||||
|
ADCQ $0, acc1
|
||||||
|
SUBQ mul0, acc0
|
||||||
|
SBBQ mul1, acc1
|
||||||
|
|
||||||
|
ADDQ hlp, acc2
|
||||||
|
ADCQ $0, acc3
|
||||||
|
SUBQ mul0, acc2
|
||||||
|
SBBQ mul1, acc3
|
||||||
|
SUBQ hlp, acc2
|
||||||
|
SBBQ $0, acc3
|
||||||
MOVQ $0, BP
|
MOVQ $0, BP
|
||||||
// Add bits [511:256] of the result
|
// Add bits [511:256] of the result
|
||||||
ADCQ acc0, t0
|
ADCQ acc0, t0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user