mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-27 12:46:18 +08:00
fix error
This commit is contained in:
parent
485d6317a3
commit
799a1d3ce4
@ -134,9 +134,9 @@ TEXT ·p256NegCond(SB),NOSPLIT,$0
|
|||||||
MOVD cond+24(FP), hlp0
|
MOVD cond+24(FP), hlp0
|
||||||
MOVD a_ptr, res_ptr
|
MOVD a_ptr, res_ptr
|
||||||
// acc = poly
|
// acc = poly
|
||||||
MOVD $-1, acc0
|
MOVD p256p<>+0x00(SB), acc0
|
||||||
MOVD p256p<>+0x08(SB), acc1
|
MOVD p256p<>+0x08(SB), acc1
|
||||||
MOVD $-1, acc2
|
MOVD p256p<>+0x10(SB), acc2
|
||||||
MOVD p256p<>+0x18(SB), acc3
|
MOVD p256p<>+0x18(SB), acc3
|
||||||
// Load the original value
|
// Load the original value
|
||||||
LDP 0*16(a_ptr), (t0, t1)
|
LDP 0*16(a_ptr), (t0, t1)
|
||||||
@ -227,13 +227,13 @@ TEXT ·p256FromMont(SB),NOSPLIT,$0
|
|||||||
MUL const3, acc0, t0 // t0 = L(acc0*p3)
|
MUL const3, acc0, t0 // t0 = L(acc0*p3)
|
||||||
ADCS t0, acc3, acc3 // (carry3,acc3) = acc3 + L(acc0*p3)
|
ADCS t0, acc3, acc3 // (carry3,acc3) = acc3 + L(acc0*p3)
|
||||||
|
|
||||||
UMULH const3, acc0, hlp1 // hlp1 = H(acc0*p3)
|
UMULH const3, acc0, y1 // y1 = H(acc0*p3)
|
||||||
ADC $0, hlp1
|
ADC $0, y1
|
||||||
|
|
||||||
ADDS acc0, acc1, acc1 // (carry4, acc1) = acc0 + acc1 + L(acc0*p1)
|
ADDS acc0, acc1, acc1 // (carry4, acc1) = acc0 + acc1 + L(acc0*p1)
|
||||||
ADCS y0, acc2, acc2 // (carry5, acc2) = carry4 + acc2 + L(acc0*p2) + H(acc0*p1)
|
ADCS y0, acc2, acc2 // (carry5, acc2) = carry4 + acc2 + L(acc0*p2) + H(acc0*p1)
|
||||||
ADCS hlp0, acc3, acc3 // (carry6, acc3) = carry5 + acc3 + L(acc0*p3) + H(acc0*p2)
|
ADCS hlp0, acc3, acc3 // (carry6, acc3) = carry5 + acc3 + L(acc0*p3) + H(acc0*p2)
|
||||||
ADC $0, hlp1, acc0 // acc0 = carry6 + H(acc0*p3)
|
ADC $0, y1, acc0 // acc0 = carry6 + H(acc0*p3)
|
||||||
|
|
||||||
// Second reduction step
|
// Second reduction step
|
||||||
MUL const1, acc1, t0
|
MUL const1, acc1, t0
|
||||||
@ -247,13 +247,13 @@ TEXT ·p256FromMont(SB),NOSPLIT,$0
|
|||||||
MUL const3, acc1, t0 // t0 = L(acc1*p3)
|
MUL const3, acc1, t0 // t0 = L(acc1*p3)
|
||||||
ADCS t0, acc0, acc0 // (carry3,acc0) = acc0 + L(acc1*p3)
|
ADCS t0, acc0, acc0 // (carry3,acc0) = acc0 + L(acc1*p3)
|
||||||
|
|
||||||
UMULH const3, acc1, hlp1 // hlp1 = H(acc1*p3)
|
UMULH const3, acc1, y1 // y1 = H(acc1*p3)
|
||||||
ADC $0, hlp1
|
ADC $0, y1
|
||||||
|
|
||||||
ADDS acc1, acc2, acc2 // (carry4, acc2) = acc1 + acc2 + L(acc1*p1)
|
ADDS acc1, acc2, acc2 // (carry4, acc2) = acc1 + acc2 + L(acc1*p1)
|
||||||
ADCS y0, acc3, acc3 // (carry5, acc3) = carry4 + acc3 + L(acc1*p2) + H(acc1*p1)
|
ADCS y0, acc3, acc3 // (carry5, acc3) = carry4 + acc3 + L(acc1*p2) + H(acc1*p1)
|
||||||
ADCS hlp0, acc0, acc0 // (carry6, acc0) = carry5 + acc0 + L(acc1*p3) + H(acc1*p2)
|
ADCS hlp0, acc0, acc0 // (carry6, acc0) = carry5 + acc0 + L(acc1*p3) + H(acc1*p2)
|
||||||
ADC $0, hlp1, acc1 // acc1 = carry6 + H(acc1*p3)
|
ADC $0, y1, acc1 // acc1 = carry6 + H(acc1*p3)
|
||||||
// Third reduction step
|
// Third reduction step
|
||||||
MUL const1, acc2, t0
|
MUL const1, acc2, t0
|
||||||
ADDS t0, acc3, acc3 // (carry1, acc3) = acc3 + L(acc2*p1)
|
ADDS t0, acc3, acc3 // (carry1, acc3) = acc3 + L(acc2*p1)
|
||||||
@ -266,13 +266,13 @@ TEXT ·p256FromMont(SB),NOSPLIT,$0
|
|||||||
MUL const3, acc2, t0 // t0 = L(acc2*p3)
|
MUL const3, acc2, t0 // t0 = L(acc2*p3)
|
||||||
ADCS t0, acc1, acc1 // (carry3,acc1) = acc1 + L(acc2*p3)
|
ADCS t0, acc1, acc1 // (carry3,acc1) = acc1 + L(acc2*p3)
|
||||||
|
|
||||||
UMULH const3, acc2, hlp1 // hlp1 = H(acc2*p3)
|
UMULH const3, acc2, y1 // y1 = H(acc2*p3)
|
||||||
ADC $0, hlp1
|
ADC $0, y1
|
||||||
|
|
||||||
ADDS acc2, acc3, acc3 // (carry4, acc3) = acc2 + acc3 + L(acc2*p1)
|
ADDS acc2, acc3, acc3 // (carry4, acc3) = acc2 + acc3 + L(acc2*p1)
|
||||||
ADCS y0, acc0, acc0 // (carry5, acc0) = carry4 + acc0 + L(acc2*p2) + H(acc2*p1)
|
ADCS y0, acc0, acc0 // (carry5, acc0) = carry4 + acc0 + L(acc2*p2) + H(acc2*p1)
|
||||||
ADCS hlp0, acc1, acc1 // (carry6, acc1) = carry5 + acc1 + L(acc2*p3) + H(acc2*p2)
|
ADCS hlp0, acc1, acc1 // (carry6, acc1) = carry5 + acc1 + L(acc2*p3) + H(acc2*p2)
|
||||||
ADC $0, hlp1, acc2 // acc2 = carry6 + H(acc2*p3)
|
ADC $0, y1, acc2 // acc2 = carry6 + H(acc2*p3)
|
||||||
// Last reduction step
|
// Last reduction step
|
||||||
MUL const1, acc3, t0
|
MUL const1, acc3, t0
|
||||||
ADDS t0, acc0, acc0 // (carry1, acc0) = acc0 + L(acc3*p1)
|
ADDS t0, acc0, acc0 // (carry1, acc0) = acc0 + L(acc3*p1)
|
||||||
@ -285,13 +285,13 @@ TEXT ·p256FromMont(SB),NOSPLIT,$0
|
|||||||
MUL const3, acc3, t0 // t0 = L(acc3*p3)
|
MUL const3, acc3, t0 // t0 = L(acc3*p3)
|
||||||
ADCS t0, acc2, acc2 // (carry3,acc2) = acc2 + L(acc3*p3)
|
ADCS t0, acc2, acc2 // (carry3,acc2) = acc2 + L(acc3*p3)
|
||||||
|
|
||||||
UMULH const3, acc3, hlp1 // hlp1 = H(acc3*p3)
|
UMULH const3, acc3, y1 // y1 = H(acc3*p3)
|
||||||
ADC $0, hlp1
|
ADC $0, y1
|
||||||
|
|
||||||
ADDS acc3, acc0, acc0 // (carry4, acc0) = acc3 + acc0 + L(acc3*p1)
|
ADDS acc3, acc0, acc0 // (carry4, acc0) = acc3 + acc0 + L(acc3*p1)
|
||||||
ADCS y0, acc1, acc1 // (carry5, acc1) = carry4 + acc1 + L(acc3*p2) + H(acc3*p1)
|
ADCS y0, acc1, acc1 // (carry5, acc1) = carry4 + acc1 + L(acc3*p2) + H(acc3*p1)
|
||||||
ADCS hlp0, acc2, acc2 // (carry6, acc2) = carry5 + acc2 + L(acc3*p3) + H(acc3*p2)
|
ADCS hlp0, acc2, acc2 // (carry6, acc2) = carry5 + acc2 + L(acc3*p3) + H(acc3*p2)
|
||||||
ADC $0, hlp1, acc3 // acc3 = carry6 + H(acc3*p3)
|
ADC $0, y1, acc3 // acc3 = carry6 + H(acc3*p3)
|
||||||
|
|
||||||
SUBS const0, acc0, t0
|
SUBS const0, acc0, t0
|
||||||
SBCS const1, acc1, t1
|
SBCS const1, acc1, t1
|
||||||
@ -834,10 +834,10 @@ TEXT sm2P256Subinternal<>(SB),NOSPLIT,$0
|
|||||||
SBCS x3, y3, acc3
|
SBCS x3, y3, acc3
|
||||||
SBC $0, ZR, t0
|
SBC $0, ZR, t0
|
||||||
|
|
||||||
ADDS $-1, acc0, acc4
|
ADDS const0, acc0, acc4
|
||||||
ADCS const0, acc1, acc5
|
ADCS const1, acc1, acc5
|
||||||
ADCS $-1, acc2, acc6
|
ADCS const2, acc2, acc6
|
||||||
ADC const1, acc3, acc7
|
ADC const3, acc3, acc7
|
||||||
|
|
||||||
ANDS $1, t0
|
ANDS $1, t0
|
||||||
CSEL EQ, acc0, acc4, x0
|
CSEL EQ, acc0, acc4, x0
|
||||||
@ -906,81 +906,81 @@ TEXT sm2P256SqrInternal<>(SB),NOSPLIT,$0
|
|||||||
UMULH x3, x3, t1
|
UMULH x3, x3, t1
|
||||||
ADCS t1, acc7, acc7
|
ADCS t1, acc7, acc7
|
||||||
// First reduction step
|
// First reduction step
|
||||||
MUL const0, acc0, t0
|
MUL const1, acc0, t0
|
||||||
ADDS t0, acc1, acc1 // (carry1, acc1) = acc1 + L(acc0*p1)
|
ADDS t0, acc1, acc1 // (carry1, acc1) = acc1 + L(acc0*p1)
|
||||||
UMULH const0, acc0, y0 // y0 = H(acc0*p1)
|
UMULH const1, acc0, y0 // y0 = H(acc0*p1)
|
||||||
|
|
||||||
MUL $-1, acc0, t0
|
MUL const2, acc0, t0
|
||||||
ADCS t0, acc2, acc2 // (carry2, acc2) = acc2 + L(acc0*p2)
|
ADCS t0, acc2, acc2 // (carry2, acc2) = acc2 + L(acc0*p2)
|
||||||
UMULH $-1, acc0, hlp0 // hlp0 = H(acc0*p2)
|
UMULH const2, acc0, hlp0 // hlp0 = H(acc0*p2)
|
||||||
|
|
||||||
MUL const1, acc0, t0 // t0 = L(acc0*p3)
|
MUL const3, acc0, t0 // t0 = L(acc0*p3)
|
||||||
ADCS t0, acc3, acc3 // (carry3,acc3) = acc3 + L(acc0*p3)
|
ADCS t0, acc3, acc3 // (carry3,acc3) = acc3 + L(acc0*p3)
|
||||||
|
|
||||||
UMULH const1, acc0, hlp1 // hlp1 = H(acc0*p3)
|
UMULH const3, acc0, y1 // y1 = H(acc0*p3)
|
||||||
ADC $0, hlp1 // hlp1 = carry3 + hlp1
|
ADC $0, y1
|
||||||
|
|
||||||
ADDS acc0, acc1, acc1 // (carry4, acc1) = acc0 + acc1 + L(acc0*p1)
|
ADDS acc0, acc1, acc1 // (carry4, acc1) = acc0 + acc1 + L(acc0*p1)
|
||||||
ADCS y0, acc2, acc2 // (carry5, acc2) = carry4 + acc2 + L(acc0*p2) + H(acc0*p1)
|
ADCS y0, acc2, acc2 // (carry5, acc2) = carry4 + acc2 + L(acc0*p2) + H(acc0*p1)
|
||||||
ADCS hlp0, acc3, acc3 // (carry6, acc3) = carry5 + acc3 + L(acc0*p3) + H(acc0*p2)
|
ADCS hlp0, acc3, acc3 // (carry6, acc3) = carry5 + acc3 + L(acc0*p3) + H(acc0*p2)
|
||||||
ADC $0, hlp1, acc0 // acc0 = carry6 + H(acc0*p3)
|
ADC $0, y1, acc0 // acc0 = carry6 + H(acc0*p3)
|
||||||
// Second reduction step
|
// Second reduction step
|
||||||
MUL const0, acc1, t0
|
MUL const1, acc1, t0
|
||||||
ADDS t0, acc2, acc2 // (carry1, acc2) = acc2 + L(acc1*p1)
|
ADDS t0, acc2, acc2 // (carry1, acc2) = acc2 + L(acc1*p1)
|
||||||
UMULH const0, acc1, y0 // y0 = H(acc1*p1)
|
UMULH const1, acc1, y0 // y0 = H(acc1*p1)
|
||||||
|
|
||||||
MUL $-1, acc1, t0
|
MUL const2, acc1, t0
|
||||||
ADCS t0, acc3, acc3 // (carry2, acc3) = acc3 + L(acc1*p2)
|
ADCS t0, acc3, acc3 // (carry2, acc3) = acc3 + L(acc1*p2)
|
||||||
UMULH $-1, acc1, hlp0 // hlp0 = H(acc1*p2)
|
UMULH const2, acc1, hlp0 // hlp0 = H(acc1*p2)
|
||||||
|
|
||||||
MUL const1, acc1, t0 // t0 = L(acc1*p3)
|
MUL const3, acc1, t0 // t0 = L(acc1*p3)
|
||||||
ADCS t0, acc0, acc0 // (carry3,acc0) = acc0 + L(acc1*p3)
|
ADCS t0, acc0, acc0 // (carry3,acc0) = acc0 + L(acc1*p3)
|
||||||
|
|
||||||
UMULH const1, acc1, hlp1 // hlp1 = H(acc1*p3)
|
UMULH const3, acc1, y1 // y1 = H(acc1*p3)
|
||||||
ADC $0, hlp1 // hlp1 = carry3 + hlp1
|
ADC $0, y1
|
||||||
|
|
||||||
ADDS acc1, acc2, acc2 // (carry4, acc2) = acc1 + acc2 + L(acc1*p1)
|
ADDS acc1, acc2, acc2 // (carry4, acc2) = acc1 + acc2 + L(acc1*p1)
|
||||||
ADCS y0, acc3, acc3 // (carry5, acc3) = carry4 + acc3 + L(acc1*p2) + H(acc1*p1)
|
ADCS y0, acc3, acc3 // (carry5, acc3) = carry4 + acc3 + L(acc1*p2) + H(acc1*p1)
|
||||||
ADCS hlp0, acc0, acc0 // (carry6, acc0) = carry5 + acc0 + L(acc1*p3) + H(acc1*p2)
|
ADCS hlp0, acc0, acc0 // (carry6, acc0) = carry5 + acc0 + L(acc1*p3) + H(acc1*p2)
|
||||||
ADC $0, hlp1, acc1 // acc1 = carry6 + H(acc1*p3)
|
ADC $0, y1, acc1 // acc1 = carry6 + H(acc1*p3)
|
||||||
// Third reduction step
|
// Third reduction step
|
||||||
MUL const0, acc2, t0
|
MUL const1, acc2, t0
|
||||||
ADDS t0, acc3, acc3 // (carry1, acc3) = acc3 + L(acc2*p1)
|
ADDS t0, acc3, acc3 // (carry1, acc3) = acc3 + L(acc2*p1)
|
||||||
UMULH const0, acc1, y0 // y0 = H(acc2*p1)
|
UMULH const1, acc1, y0 // y0 = H(acc2*p1)
|
||||||
|
|
||||||
MUL $-1, acc2, t0
|
MUL const2, acc2, t0
|
||||||
ADCS t0, acc0, acc0 // (carry2, acc0) = acc0 + L(acc2*p2)
|
ADCS t0, acc0, acc0 // (carry2, acc0) = acc0 + L(acc2*p2)
|
||||||
UMULH $-1, acc2, hlp0 // hlp0 = H(acc2*p2)
|
UMULH const2, acc2, hlp0 // hlp0 = H(acc2*p2)
|
||||||
|
|
||||||
MUL const1, acc2, t0 // t0 = L(acc2*p3)
|
MUL const3, acc2, t0 // t0 = L(acc2*p3)
|
||||||
ADCS t0, acc1, acc1 // (carry3,acc1) = acc1 + L(acc2*p3)
|
ADCS t0, acc1, acc1 // (carry3,acc1) = acc1 + L(acc2*p3)
|
||||||
|
|
||||||
UMULH const1, acc2, hlp1 // hlp1 = H(acc2*p3)
|
UMULH const3, acc2, y1 // y1 = H(acc2*p3)
|
||||||
ADC $0, hlp1 // hlp1 = carry3 + hlp1
|
ADC $0, y1
|
||||||
|
|
||||||
ADDS acc2, acc3, acc3 // (carry4, acc3) = acc2 + acc3 + L(acc2*p1)
|
ADDS acc2, acc3, acc3 // (carry4, acc3) = acc2 + acc3 + L(acc2*p1)
|
||||||
ADCS y0, acc0, acc0 // (carry5, acc0) = carry4 + acc0 + L(acc2*p2) + H(acc2*p1)
|
ADCS y0, acc0, acc0 // (carry5, acc0) = carry4 + acc0 + L(acc2*p2) + H(acc2*p1)
|
||||||
ADCS hlp0, acc1, acc1 // (carry6, acc1) = carry5 + acc1 + L(acc2*p3) + H(acc2*p2)
|
ADCS hlp0, acc1, acc1 // (carry6, acc1) = carry5 + acc1 + L(acc2*p3) + H(acc2*p2)
|
||||||
ADC $0, hlp1, acc2 // acc2 = carry6 + H(acc2*p3)
|
ADC $0, y1, acc2 // acc2 = carry6 + H(acc2*p3)
|
||||||
// Last reduction step
|
// Last reduction step
|
||||||
MUL const0, acc3, t0
|
MUL const1, acc3, t0
|
||||||
ADDS t0, acc0, acc0 // (carry1, acc0) = acc0 + L(acc3*p1)
|
ADDS t0, acc0, acc0 // (carry1, acc0) = acc0 + L(acc3*p1)
|
||||||
UMULH const0, acc1, y0 // y0 = H(acc2*p1)
|
UMULH const1, acc1, y0 // y0 = H(acc2*p1)
|
||||||
|
|
||||||
MUL $-1, acc3, t0
|
MUL const2, acc3, t0
|
||||||
ADCS t0, acc1, acc1 // (carry2, acc1) = acc1 + L(acc3*p2)
|
ADCS t0, acc1, acc1 // (carry2, acc1) = acc1 + L(acc3*p2)
|
||||||
UMULH $-1, acc3, hlp0 // hlp0 = H(acc3*p2)
|
UMULH const2, acc3, hlp0 // hlp0 = H(acc3*p2)
|
||||||
|
|
||||||
MUL const1, acc3, t0 // t0 = L(acc3*p3)
|
MUL const3, acc3, t0 // t0 = L(acc3*p3)
|
||||||
ADCS t0, acc2, acc2 // (carry3,acc2) = acc2 + L(acc3*p3)
|
ADCS t0, acc2, acc2 // (carry3,acc2) = acc2 + L(acc3*p3)
|
||||||
|
|
||||||
UMULH const1, acc3, hlp1 // hlp1 = H(acc3*p3)
|
UMULH const3, acc3, y1 // y1 = H(acc3*p3)
|
||||||
ADC $0, acc7 // acc7 = carry3 + acc7
|
ADC $0, acc7 // acc7 = carry3 + acc7
|
||||||
|
|
||||||
ADDS acc3, acc0, acc0 // (carry4, acc0) = acc3 + acc0 + L(acc3*p1)
|
ADDS acc3, acc0, acc0 // (carry4, acc0) = acc3 + acc0 + L(acc3*p1)
|
||||||
ADCS y0, acc1, acc1 // (carry5, acc1) = carry4 + acc1 + L(acc3*p2) + H(acc3*p1)
|
ADCS y0, acc1, acc1 // (carry5, acc1) = carry4 + acc1 + L(acc3*p2) + H(acc3*p1)
|
||||||
ADCS hlp0, acc2, acc2 // (carry6, acc2) = carry5 + acc2 + L(acc3*p3) + H(acc3*p2)
|
ADCS hlp0, acc2, acc2 // (carry6, acc2) = carry5 + acc2 + L(acc3*p3) + H(acc3*p2)
|
||||||
ADC $0, hlp1, acc3 // acc3 = carry6 + H(acc3*p3)
|
ADC $0, y1, acc3 // acc3 = carry6 + H(acc3*p3)
|
||||||
|
|
||||||
// Add bits [511:256] of the sqr result
|
// Add bits [511:256] of the sqr result
|
||||||
ADDS acc4, acc0, acc0
|
ADDS acc4, acc0, acc0
|
||||||
@ -989,10 +989,10 @@ TEXT sm2P256SqrInternal<>(SB),NOSPLIT,$0
|
|||||||
ADCS acc7, acc3, acc3
|
ADCS acc7, acc3, acc3
|
||||||
ADC $0, ZR, acc4
|
ADC $0, ZR, acc4
|
||||||
|
|
||||||
SUBS $-1, acc0, t0
|
SUBS const0, acc0, t0
|
||||||
SBCS const0, acc1, t1
|
SBCS const1, acc1, t1
|
||||||
SBCS $-1, acc2, t2
|
SBCS const2, acc2, t2
|
||||||
SBCS const1, acc3, t3
|
SBCS const3, acc3, t3
|
||||||
SBCS $0, acc4, acc4
|
SBCS $0, acc4, acc4
|
||||||
|
|
||||||
CSEL CS, t0, acc0, y0
|
CSEL CS, t0, acc0, y0
|
||||||
@ -1019,24 +1019,24 @@ TEXT sm2P256MulInternal<>(SB),NOSPLIT,$0
|
|||||||
UMULH y0, x3, acc4
|
UMULH y0, x3, acc4
|
||||||
ADC $0, acc4
|
ADC $0, acc4
|
||||||
// First reduction step
|
// First reduction step
|
||||||
MUL const0, acc0, t0
|
MUL const1, acc0, t0
|
||||||
ADDS t0, acc1, acc1 // (carry1, acc1) = acc1 + L(acc0*p1)
|
ADDS t0, acc1, acc1 // (carry1, acc1) = acc1 + L(acc0*p1)
|
||||||
UMULH const0, acc0, y0 // y0 = H(acc0*p1)
|
UMULH const1, acc0, y0 // y0 = H(acc0*p1)
|
||||||
|
|
||||||
MUL $-1, acc0, t0
|
MUL const2, acc0, t0
|
||||||
ADCS t0, acc2, acc2 // (carry2, acc2) = acc2 + L(acc0*p2)
|
ADCS t0, acc2, acc2 // (carry2, acc2) = acc2 + L(acc0*p2)
|
||||||
UMULH $-1, acc0, hlp0 // hlp0 = H(acc0*p2)
|
UMULH const2, acc0, hlp0 // hlp0 = H(acc0*p2)
|
||||||
|
|
||||||
MUL const1, acc0, t0 // t0 = L(acc0*p3)
|
MUL const3, acc0, t0 // t0 = L(acc0*p3)
|
||||||
ADCS t0, acc3, acc3 // (carry3,acc3) = acc3 + L(acc0*p3)
|
ADCS t0, acc3, acc3 // (carry3,acc3) = acc3 + L(acc0*p3)
|
||||||
|
|
||||||
UMULH const1, acc0, hlp1 // hlp1 = H(acc0*p3)
|
UMULH const3, acc0, acc5 // acc5 = H(acc0*p3)
|
||||||
ADC $0, acc4 // acc4 = carry3 + acc4
|
ADC $0, acc4 // acc4 = carry3 + acc4
|
||||||
|
|
||||||
ADDS acc0, acc1, acc1 // (carry4, acc1) = acc0 + acc1 + L(acc0*p1)
|
ADDS acc0, acc1, acc1 // (carry4, acc1) = acc0 + acc1 + L(acc0*p1)
|
||||||
ADCS y0, acc2, acc2 // (carry5, acc2) = carry4 + acc2 + L(acc0*p2) + H(acc0*p1)
|
ADCS y0, acc2, acc2 // (carry5, acc2) = carry4 + acc2 + L(acc0*p2) + H(acc0*p1)
|
||||||
ADCS hlp0, acc3, acc3 // (carry6, acc3) = carry5 + acc3 + L(acc0*p3) + H(acc0*p2)
|
ADCS hlp0, acc3, acc3 // (carry6, acc3) = carry5 + acc3 + L(acc0*p3) + H(acc0*p2)
|
||||||
ADC $0, hlp1, acc0 // acc0 = carry6 + H(acc0*p3)
|
ADC $0, acc5, acc0 // acc0 = carry6 + H(acc0*p3)
|
||||||
|
|
||||||
// y[1] * x
|
// y[1] * x
|
||||||
MUL y1, x0, t0
|
MUL y1, x0, t0
|
||||||
@ -1061,24 +1061,24 @@ TEXT sm2P256MulInternal<>(SB),NOSPLIT,$0
|
|||||||
ADCS t3, acc4
|
ADCS t3, acc4
|
||||||
ADC hlp0, acc5
|
ADC hlp0, acc5
|
||||||
// Second reduction step
|
// Second reduction step
|
||||||
MUL const0, acc1, t0
|
MUL const1, acc1, t0
|
||||||
ADDS t0, acc2, acc2 // (carry1, acc2) = acc2 + L(acc1*p1)
|
ADDS t0, acc2, acc2 // (carry1, acc2) = acc2 + L(acc1*p1)
|
||||||
UMULH const0, acc1, y0 // y0 = H(acc1*p1)
|
UMULH const1, acc1, y0 // y0 = H(acc1*p1)
|
||||||
|
|
||||||
MUL $-1, acc1, t0
|
MUL const2, acc1, t0
|
||||||
ADCS t0, acc3, acc3 // (carry2, acc3) = acc3 + L(acc1*p2)
|
ADCS t0, acc3, acc3 // (carry2, acc3) = acc3 + L(acc1*p2)
|
||||||
UMULH $-1, acc1, hlp0 // hlp0 = H(acc1*p2)
|
UMULH const2, acc1, hlp0 // hlp0 = H(acc1*p2)
|
||||||
|
|
||||||
MUL const1, acc1, t0 // t0 = L(acc1*p3)
|
MUL const3, acc1, t0 // t0 = L(acc1*p3)
|
||||||
ADCS t0, acc0, acc0 // (carry3,acc0) = acc0 + L(acc1*p3)
|
ADCS t0, acc0, acc0 // (carry3,acc0) = acc0 + L(acc1*p3)
|
||||||
|
|
||||||
UMULH const1, acc1, hlp1 // hlp1 = H(acc1*p3)
|
UMULH const3, acc1, y1 // y1 = H(acc1*p3)
|
||||||
ADC $0, acc5 // acc5 = carry3 + acc5
|
ADC $0, acc5 // acc5 = carry3 + acc5
|
||||||
|
|
||||||
ADDS acc1, acc2, acc2 // (carry4, acc2) = acc1 + acc2 + L(acc1*p1)
|
ADDS acc1, acc2, acc2 // (carry4, acc2) = acc1 + acc2 + L(acc1*p1)
|
||||||
ADCS y0, acc3, acc3 // (carry5, acc3) = carry4 + acc3 + L(acc1*p2) + H(acc1*p1)
|
ADCS y0, acc3, acc3 // (carry5, acc3) = carry4 + acc3 + L(acc1*p2) + H(acc1*p1)
|
||||||
ADCS hlp0, acc0, acc0 // (carry6, acc0) = carry5 + acc0 + L(acc1*p3) + H(acc1*p2)
|
ADCS hlp0, acc0, acc0 // (carry6, acc0) = carry5 + acc0 + L(acc1*p3) + H(acc1*p2)
|
||||||
ADC $0, hlp1, acc1 // acc1 = carry6 + H(acc1*p3)
|
ADC $0, y1, acc1 // acc1 = carry6 + H(acc1*p3)
|
||||||
|
|
||||||
// y[2] * x
|
// y[2] * x
|
||||||
MUL y2, x0, t0
|
MUL y2, x0, t0
|
||||||
@ -1103,24 +1103,24 @@ TEXT sm2P256MulInternal<>(SB),NOSPLIT,$0
|
|||||||
ADCS t3, acc5
|
ADCS t3, acc5
|
||||||
ADC hlp0, acc6
|
ADC hlp0, acc6
|
||||||
// Third reduction step
|
// Third reduction step
|
||||||
MUL const0, acc2, t0
|
MUL const1, acc2, t0
|
||||||
ADDS t0, acc3, acc3 // (carry1, acc3) = acc3 + L(acc2*p1)
|
ADDS t0, acc3, acc3 // (carry1, acc3) = acc3 + L(acc2*p1)
|
||||||
UMULH const0, acc1, y0 // y0 = H(acc2*p1)
|
UMULH const1, acc1, y0 // y0 = H(acc2*p1)
|
||||||
|
|
||||||
MUL $-1, acc2, t0
|
MUL const2, acc2, t0
|
||||||
ADCS t0, acc0, acc0 // (carry2, acc0) = acc0 + L(acc2*p2)
|
ADCS t0, acc0, acc0 // (carry2, acc0) = acc0 + L(acc2*p2)
|
||||||
UMULH $-1, acc2, hlp0 // hlp0 = H(acc2*p2)
|
UMULH const2, acc2, hlp0 // hlp0 = H(acc2*p2)
|
||||||
|
|
||||||
MUL const1, acc2, t0 // t0 = L(acc2*p3)
|
MUL const3, acc2, t0 // t0 = L(acc2*p3)
|
||||||
ADCS t0, acc1, acc1 // (carry3,acc1) = acc1 + L(acc2*p3)
|
ADCS t0, acc1, acc1 // (carry3,acc1) = acc1 + L(acc2*p3)
|
||||||
|
|
||||||
UMULH const1, acc2, hlp1 // hlp1 = H(acc2*p3)
|
UMULH const3, acc2, y1 // y1 = H(acc2*p3)
|
||||||
ADC $0, acc6 // acc6 = carry3 + acc6
|
ADC $0, acc6 // acc6 = carry3 + acc6
|
||||||
|
|
||||||
ADDS acc2, acc3, acc3 // (carry4, acc3) = acc2 + acc3 + L(acc2*p1)
|
ADDS acc2, acc3, acc3 // (carry4, acc3) = acc2 + acc3 + L(acc2*p1)
|
||||||
ADCS y0, acc0, acc0 // (carry5, acc0) = carry4 + acc0 + L(acc2*p2) + H(acc2*p1)
|
ADCS y0, acc0, acc0 // (carry5, acc0) = carry4 + acc0 + L(acc2*p2) + H(acc2*p1)
|
||||||
ADCS hlp0, acc1, acc1 // (carry6, acc1) = carry5 + acc1 + L(acc2*p3) + H(acc2*p2)
|
ADCS hlp0, acc1, acc1 // (carry6, acc1) = carry5 + acc1 + L(acc2*p3) + H(acc2*p2)
|
||||||
ADC $0, hlp1, acc2 // acc2 = carry6 + H(acc2*p3)
|
ADC $0, y1, acc2 // acc2 = carry6 + H(acc2*p3)
|
||||||
|
|
||||||
// y[3] * x
|
// y[3] * x
|
||||||
MUL y3, x0, t0
|
MUL y3, x0, t0
|
||||||
@ -1145,24 +1145,24 @@ TEXT sm2P256MulInternal<>(SB),NOSPLIT,$0
|
|||||||
ADCS t3, acc6
|
ADCS t3, acc6
|
||||||
ADC hlp0, acc7
|
ADC hlp0, acc7
|
||||||
// Last reduction step
|
// Last reduction step
|
||||||
MUL const0, acc3, t0
|
MUL const1, acc3, t0
|
||||||
ADDS t0, acc0, acc0 // (carry1, acc0) = acc0 + L(acc3*p1)
|
ADDS t0, acc0, acc0 // (carry1, acc0) = acc0 + L(acc3*p1)
|
||||||
UMULH const0, acc1, y0 // y0 = H(acc2*p1)
|
UMULH const1, acc1, y0 // y0 = H(acc2*p1)
|
||||||
|
|
||||||
MUL $-1, acc3, t0
|
MUL const2, acc3, t0
|
||||||
ADCS t0, acc1, acc1 // (carry2, acc1) = acc1 + L(acc3*p2)
|
ADCS t0, acc1, acc1 // (carry2, acc1) = acc1 + L(acc3*p2)
|
||||||
UMULH $-1, acc3, hlp0 // hlp0 = H(acc3*p2)
|
UMULH const2, acc3, hlp0 // hlp0 = H(acc3*p2)
|
||||||
|
|
||||||
MUL const1, acc3, t0 // t0 = L(acc3*p3)
|
MUL const3, acc3, t0 // t0 = L(acc3*p3)
|
||||||
ADCS t0, acc2, acc2 // (carry3,acc2) = acc2 + L(acc3*p3)
|
ADCS t0, acc2, acc2 // (carry3,acc2) = acc2 + L(acc3*p3)
|
||||||
|
|
||||||
UMULH const1, acc3, hlp1 // hlp1 = H(acc3*p3)
|
UMULH const3, acc3, y1 // y1 = H(acc3*p3)
|
||||||
ADC $0, acc7 // acc7 = carry3 + acc7
|
ADC $0, acc7 // acc7 = carry3 + acc7
|
||||||
|
|
||||||
ADDS acc3, acc0, acc0 // (carry4, acc0) = acc3 + acc0 + L(acc3*p1)
|
ADDS acc3, acc0, acc0 // (carry4, acc0) = acc3 + acc0 + L(acc3*p1)
|
||||||
ADCS y0, acc1, acc1 // (carry5, acc1) = carry4 + acc1 + L(acc3*p2) + H(acc3*p1)
|
ADCS y0, acc1, acc1 // (carry5, acc1) = carry4 + acc1 + L(acc3*p2) + H(acc3*p1)
|
||||||
ADCS hlp0, acc2, acc2 // (carry6, acc2) = carry5 + acc2 + L(acc3*p3) + H(acc3*p2)
|
ADCS hlp0, acc2, acc2 // (carry6, acc2) = carry5 + acc2 + L(acc3*p3) + H(acc3*p2)
|
||||||
ADC $0, hlp1, acc3 // acc3 = carry6 + H(acc3*p3)
|
ADC $0, y1, acc3 // acc3 = carry6 + H(acc3*p3)
|
||||||
|
|
||||||
// Add bits [511:256] of the mul result
|
// Add bits [511:256] of the mul result
|
||||||
ADDS acc4, acc0, acc0
|
ADDS acc4, acc0, acc0
|
||||||
@ -1171,10 +1171,10 @@ TEXT sm2P256MulInternal<>(SB),NOSPLIT,$0
|
|||||||
ADCS acc7, acc3, acc3
|
ADCS acc7, acc3, acc3
|
||||||
ADC $0, ZR, acc4
|
ADC $0, ZR, acc4
|
||||||
|
|
||||||
SUBS $-1, acc0, t0
|
SUBS const0, acc0, t0
|
||||||
SBCS const0, acc1, t1
|
SBCS const1, acc1, t1
|
||||||
SBCS $-1, acc2, t2
|
SBCS const2, acc2, t2
|
||||||
SBCS const1, acc3, t3
|
SBCS const3, acc3, t3
|
||||||
SBCS $0, acc4, acc4
|
SBCS $0, acc4, acc4
|
||||||
|
|
||||||
CSEL CS, t0, acc0, y0
|
CSEL CS, t0, acc0, y0
|
||||||
@ -1189,10 +1189,10 @@ TEXT sm2P256MulInternal<>(SB),NOSPLIT,$0
|
|||||||
ADCS y2, y2, x2; \
|
ADCS y2, y2, x2; \
|
||||||
ADCS y3, y3, x3; \
|
ADCS y3, y3, x3; \
|
||||||
ADC $0, ZR, hlp0; \
|
ADC $0, ZR, hlp0; \
|
||||||
SUBS $-1, x0, t0; \
|
SUBS const0, x0, t0; \
|
||||||
SBCS const0, x1, t1;\
|
SBCS const1, x1, t1;\
|
||||||
SBCS $-1, x2, t2; \
|
SBCS const2, x2, t2; \
|
||||||
SBCS const1, x3, t3;\
|
SBCS const3, x3, t3;\
|
||||||
SBCS $0, hlp0, hlp0;\
|
SBCS $0, hlp0, hlp0;\
|
||||||
CSEL CC, x0, t0, x0;\
|
CSEL CC, x0, t0, x0;\
|
||||||
CSEL CC, x1, t1, x1;\
|
CSEL CC, x1, t1, x1;\
|
||||||
@ -1240,25 +1240,24 @@ TEXT ·p256PointAddAffineAsm(SB),0,$264-96
|
|||||||
CMP $0, hlp1
|
CMP $0, hlp1
|
||||||
CSEL EQ, ZR, t0, hlp1
|
CSEL EQ, ZR, t0, hlp1
|
||||||
|
|
||||||
MOVD p256p<>+0x08(SB), const0
|
LDP p256p<>+0x00(SB), (const0, const1)
|
||||||
MOVD p256p<>+0x18(SB), const1
|
LDP p256p<>+0x10(SB), (const2, const3)
|
||||||
EOR t2<<1, hlp1
|
EOR t2<<1, hlp1
|
||||||
|
|
||||||
// Negate y2in based on sign
|
// Negate y2in based on sign
|
||||||
LDP 2*16(b_ptr), (y0, y1)
|
LDP 2*16(b_ptr), (y0, y1)
|
||||||
LDP 3*16(b_ptr), (y2, y3)
|
LDP 3*16(b_ptr), (y2, y3)
|
||||||
|
|
||||||
MOVD p256p<>+0x00(SB), acc4
|
SUBS y0, const0, acc0
|
||||||
SUBS y0, acc4, acc0
|
SBCS y1, const1, acc1
|
||||||
SBCS y1, const0, acc1
|
SBCS y2, const2, acc2
|
||||||
SBCS y2, acc4, acc2
|
SBCS y3, const3, acc3
|
||||||
SBCS y3, const1, acc3
|
|
||||||
SBC $0, ZR, t0
|
SBC $0, ZR, t0
|
||||||
|
|
||||||
ADDS $-1, acc0, acc4
|
ADDS const0, acc0, acc4
|
||||||
ADCS const0, acc1, acc5
|
ADCS const1, acc1, acc5
|
||||||
ADCS $-1, acc2, acc6
|
ADCS const2, acc2, acc6
|
||||||
ADCS const1, acc3, acc7
|
ADCS const3, acc3, acc7
|
||||||
ADC $0, t0, t0
|
ADC $0, t0, t0
|
||||||
|
|
||||||
CMP $0, t0
|
CMP $0, t0
|
||||||
@ -1408,10 +1407,10 @@ TEXT ·p256PointAddAffineAsm(SB),0,$264-96
|
|||||||
ADCS y2, x2, x2; \
|
ADCS y2, x2, x2; \
|
||||||
ADCS y3, x3, x3; \
|
ADCS y3, x3, x3; \
|
||||||
ADC $0, ZR, hlp0; \
|
ADC $0, ZR, hlp0; \
|
||||||
SUBS $-1, x0, t0; \
|
SUBS const0, x0, t0; \
|
||||||
SBCS const0, x1, t1;\
|
SBCS const1, x1, t1;\
|
||||||
SBCS $-1, x2, t2; \
|
SBCS const2, x2, t2; \
|
||||||
SBCS const1, x3, t3;\
|
SBCS const3, x3, t3;\
|
||||||
SBCS $0, hlp0, hlp0;\
|
SBCS $0, hlp0, hlp0;\
|
||||||
CSEL CC, x0, t0, x0;\
|
CSEL CC, x0, t0, x0;\
|
||||||
CSEL CC, x1, t1, x1;\
|
CSEL CC, x1, t1, x1;\
|
||||||
@ -1428,8 +1427,8 @@ TEXT ·p256PointDoubleAsm(SB),NOSPLIT,$136-48
|
|||||||
MOVD res+0(FP), res_ptr
|
MOVD res+0(FP), res_ptr
|
||||||
MOVD in+24(FP), a_ptr
|
MOVD in+24(FP), a_ptr
|
||||||
|
|
||||||
MOVD p256p<>+0x08(SB), const0
|
LDP p256p<>+0x00(SB), (const0, const1)
|
||||||
MOVD p256p<>+0x18(SB), const1
|
LDP p256p<>+0x10(SB), (const2, const3)
|
||||||
|
|
||||||
// Begin point double
|
// Begin point double
|
||||||
LDP 4*16(a_ptr), (x0, x1)
|
LDP 4*16(a_ptr), (x0, x1)
|
||||||
@ -1471,10 +1470,10 @@ TEXT ·p256PointDoubleAsm(SB),NOSPLIT,$136-48
|
|||||||
CALL sm2P256SqrInternal<>(SB)
|
CALL sm2P256SqrInternal<>(SB)
|
||||||
|
|
||||||
// Divide by 2
|
// Divide by 2
|
||||||
ADDS $-1, y0, t0
|
ADDS const0, y0, t0
|
||||||
ADCS const0, y1, t1
|
ADCS const1, y1, t1
|
||||||
ADCS $-1, y2, t2
|
ADCS const2, y2, t2
|
||||||
ADCS const1, y3, t3
|
ADCS const3, y3, t3
|
||||||
ADC $0, ZR, hlp0
|
ADC $0, ZR, hlp0
|
||||||
|
|
||||||
ANDS $1, y0, ZR
|
ANDS $1, y0, ZR
|
||||||
@ -1530,8 +1529,8 @@ TEXT ·p256PointAddAsm(SB),0,$392-80
|
|||||||
MOVD in1+24(FP), a_ptr
|
MOVD in1+24(FP), a_ptr
|
||||||
MOVD in2+48(FP), b_ptr
|
MOVD in2+48(FP), b_ptr
|
||||||
|
|
||||||
MOVD p256p<>+0x08(SB), const0
|
LDP p256p<>+0x00(SB), (const0, const1)
|
||||||
MOVD p256p<>+0x18(SB), const1
|
LDP p256p<>+0x10(SB), (const2, const3)
|
||||||
|
|
||||||
// Begin point add
|
// Begin point add
|
||||||
LDx(z2in)
|
LDx(z2in)
|
||||||
@ -1558,21 +1557,21 @@ TEXT ·p256PointAddAsm(SB),0,$392-80
|
|||||||
STx(r)
|
STx(r)
|
||||||
|
|
||||||
MOVD $1, acc1
|
MOVD $1, acc1
|
||||||
ORR x0, x1, t0 // Check if zero mod p256
|
ORR x0, x1, acc2 // Check if zero mod p256
|
||||||
ORR x2, x3, t1
|
ORR x2, x3, acc3
|
||||||
ORR t1, t0, t0
|
ORR acc3, acc2, acc2
|
||||||
CMP $0, t0
|
CMP $0, acc2
|
||||||
CSEL EQ, acc1, ZR, hlp1
|
CSEL EQ, acc1, ZR, hlp1
|
||||||
|
|
||||||
EOR $-1, x0, t0
|
EOR const0, x0, acc2
|
||||||
EOR const0, x1, t1
|
EOR const1, x1, acc3
|
||||||
EOR $-1, x2, t2
|
EOR const2, x2, acc4
|
||||||
EOR const1, x3, t3
|
EOR const3, x3, acc5
|
||||||
|
|
||||||
ORR t0, t1, t0
|
ORR acc2, acc3, acc2
|
||||||
ORR t2, t3, t1
|
ORR acc4, acc5, acc3
|
||||||
ORR t1, t0, t0
|
ORR acc3, acc2, acc2
|
||||||
CMP $0, t0
|
CMP $0, acc2
|
||||||
CSEL EQ, acc1, hlp1, hlp1
|
CSEL EQ, acc1, hlp1, hlp1
|
||||||
|
|
||||||
LDx(z2sqr)
|
LDx(z2sqr)
|
||||||
@ -1590,21 +1589,21 @@ TEXT ·p256PointAddAsm(SB),0,$392-80
|
|||||||
STx(h)
|
STx(h)
|
||||||
|
|
||||||
MOVD $1, acc1
|
MOVD $1, acc1
|
||||||
ORR x0, x1, t0 // Check if zero mod p256
|
ORR x0, x1, acc2 // Check if zero mod p256
|
||||||
ORR x2, x3, t1
|
ORR x2, x3, acc3
|
||||||
ORR t1, t0, t0
|
ORR acc3, acc2, acc2
|
||||||
CMP $0, t0
|
CMP $0, acc2
|
||||||
CSEL EQ, acc1, ZR, hlp0
|
CSEL EQ, acc1, ZR, hlp0
|
||||||
|
|
||||||
EOR $-1, x0, t0
|
EOR const0, x0, acc2
|
||||||
EOR const0, x1, t1
|
EOR const1, x1, acc3
|
||||||
EOR $-1, x2, t2
|
EOR const2, x2, acc4
|
||||||
EOR const1, x3, t3
|
EOR const3, x3, acc5
|
||||||
|
|
||||||
ORR t0, t1, t0
|
ORR acc2, acc3, acc2
|
||||||
ORR t2, t3, t1
|
ORR acc4, acc5, acc3
|
||||||
ORR t1, t0, t0
|
ORR acc3, acc2, acc2
|
||||||
CMP $0, t0
|
CMP $0, acc2
|
||||||
CSEL EQ, acc1, hlp0, hlp0
|
CSEL EQ, acc1, hlp0, hlp0
|
||||||
|
|
||||||
AND hlp0, hlp1, hlp1
|
AND hlp0, hlp1, hlp1
|
||||||
|
Loading…
x
Reference in New Issue
Block a user