internal/sme2c: enable s390x asm optimization

This commit is contained in:
Sun Yimin 2024-08-26 11:31:15 +08:00 committed by GitHub
parent 53508a6644
commit 05f3bcb57c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 126 additions and 478 deletions

View File

@ -1,4 +1,4 @@
//go:build (amd64 || arm64) && !purego //go:build (amd64 || arm64 || s390x) && !purego
package sm2ec package sm2ec

View File

@ -1,4 +1,4 @@
//go:build (amd64 && !purego) || (arm64 && !purego) //go:build (amd64 || arm64 || s390x) && !purego
package sm2ec package sm2ec

View File

@ -1109,7 +1109,7 @@ TEXT sm2p256MulInternal<>(SB), NOSPLIT, $0-0
VSBIQ RED2, RED3, CAR1, RED2 // Guaranteed not to underflow VSBIQ RED2, RED3, CAR1, RED2 // Guaranteed not to underflow
VSLDB $12, T1, T0, T0 VSLDB $12, T1, T0, T0
VSLDB $12, T2, T1, T1 VSLDB $12, T2, T1, T1 // T2 Free
VACCQ T0, ADD3H, CAR1 VACCQ T0, ADD3H, CAR1
VAQ T0, ADD3H, T0 VAQ T0, ADD3H, T0
@ -1187,7 +1187,7 @@ TEXT sm2p256MulInternal<>(SB), NOSPLIT, $0-0
VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0] VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0]
VSLDB $12, ADD2, ADD1, T0 // ADD1 Free VSLDB $12, ADD2, ADD1, T0 // ADD1 Free
VSLDB $12, T2, ADD2, T1 // ADD2 Free VSLDB $12, T2, ADD2, T1 // ADD2 Free->T1, T2 Free
VACCQ T0, ADD3, CAR1 VACCQ T0, ADD3, CAR1
VAQ T0, ADD3, T0 VAQ T0, ADD3, T0
@ -1233,8 +1233,8 @@ TEXT sm2p256MulInternal<>(SB), NOSPLIT, $0-0
VL 32(CPOOL), SEL1 VL 32(CPOOL), SEL1
VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0] VPERM ZER, ADD1, SEL1, RED3 // [d0 0 0 d0]
VSLDB $12, ADD2, ADD1, T0 VSLDB $12, ADD2, ADD1, T0 // ADD1 Free
VSLDB $12, T2, ADD2, T1 VSLDB $12, T2, ADD2, T1 // ADD2 Free->T1, T2 Free
VACCQ T0, ADD3, CAR1 VACCQ T0, ADD3, CAR1
VAQ T0, ADD3, T0 VAQ T0, ADD3, T0
@ -1961,6 +1961,106 @@ TEXT ·p256PointAddAffineAsm(SB), NOSPLIT, $0
* Y3 = T1-Y3 * Y3 = T1-Y3
*/ */
#define p256PointDoubleRound(P1ptr, P3ptr) \
\ // X=Z1; Y=Z1; MUL; T- // T1 = Z1²
VL 80(P1ptr), X1 \ // Z1H
VPDI $0x4, X1, X1, X1 \
VL 64(P1ptr), X0 \ // Z1L
VPDI $0x4, X0, X0, X0 \
VLR X0, Y0 \
VLR X1, Y1 \
CALL sm2p256SqrInternal<>(SB) \
\
\ // SUB(X<X1-T) // T2 = X1-T1
VL 16(P1ptr), X1H \
VPDI $0x4, X1H, X1H, X1H \
VL 0(P1ptr), X1L \
VPDI $0x4, X1L, X1L, X1L \
p256SubInternal(X1,X0,X1H,X1L,T1,T0) \
\
\ // ADD(Y<X1+T) // T1 = X1+T1
p256AddInternal(Y1,Y0,X1H,X1L,T1,T0) \
\
\ // X- ; Y- ; MUL; T- // T2 = T2*T1
CALL sm2p256MulInternal<>(SB) \
\
\ // ADD(T2<T+T); ADD(T2<T2+T) // T2 = 3*T2
p256AddInternal(T2H,T2L,T1,T0,T1,T0) \
p256AddInternal(T2H,T2L,T2H,T2L,T1,T0) \
\
\// ADD(X<Y1+Y1) // Y3 = 2*Y1
VL 48(P1ptr), Y1H \
VPDI $0x4, Y1H, Y1H, Y1H \
VL 32(P1ptr), Y1L \
VPDI $0x4, Y1L, Y1L, Y1L \
p256AddInternal(X1,X0,Y1H,Y1L,Y1H,Y1L) \
\
\// X- ; Y=Z1; MUL; Z3:=T // Z3 = Y3*Z1
VL 80(P1ptr), Y1 \ // Z1H
VPDI $0x4, Y1, Y1, Y1 \
VL 64(P1ptr), Y0 \ // Z1L
VPDI $0x4, Y0, Y0, Y0 \
CALL sm2p256MulInternal<>(SB) \
VPDI $0x4, T1, T1, TT1 \
VST TT1, 80(P3ptr) \
VPDI $0x4, T0, T0, TT0 \
VST TT0, 64(P3ptr) \
\
\ // X- ; Y=X ; MUL; T- // Y3 = Y3²
VLR X0, Y0 \
VLR X1, Y1 \
CALL sm2p256SqrInternal<>(SB) \
\
\ // X=T ; Y=X1; MUL; T3=T // T3 = Y3*X1
VLR T0, X0 \
VLR T1, X1 \
VL 16(P1ptr), Y1 \
VPDI $0x4, Y1, Y1, Y1 \
VL 0(P1ptr), Y0 \
VPDI $0x4, Y0, Y0, Y0 \
CALL sm2p256MulInternal<>(SB) \
VLR T0, T3L \
VLR T1, T3H \
\
\ // X- ; Y=X ; MUL; T- // Y3 = Y3²
VLR X0, Y0 \
VLR X1, Y1 \
CALL sm2p256SqrInternal<>(SB) \
\
\ // HAL(Y3<T) // Y3 = half*Y3
p256HalfInternal(Y3H,Y3L, T1,T0) \
\
\ // X=T2; Y=T2; MUL; T- // X3 = T2²
VLR T2L, X0 \
VLR T2H, X1 \
VLR T2L, Y0 \
VLR T2H, Y1 \
CALL sm2p256SqrInternal<>(SB) \
\
\ // ADD(T1<T3+T3) // T1 = 2*T3
p256AddInternal(T1H,T1L,T3H,T3L,T3H,T3L) \
\
\ // SUB(X3<T-T1) X3:=X3 // X3 = X3-T1
p256SubInternal(X3H,X3L,T1,T0,T1H,T1L) \
VPDI $0x4, X3H, X3H, TT1 \
VST TT1, 16(P3ptr) \
VPDI $0x4, X3L, X3L, TT0 \
VST TT0, 0(P3ptr) \
\
\ // SUB(X<T3-X3) // T1 = T3-X3
p256SubInternal(X1,X0,T3H,T3L,X3H,X3L) \
\
\ // X- ; Y- ; MUL; T- // T1 = T1*T2
CALL sm2p256MulInternal<>(SB) \
\
\ // SUB(Y3<T-Y3) // Y3 = T1-Y3
p256SubInternal(Y3H,Y3L,T1,T0,Y3H,Y3L) \
\
VPDI $0x4, Y3H, Y3H, Y3H \
VST Y3H, 48(P3ptr) \
VPDI $0x4, Y3L, Y3L, Y3L \
VST Y3L, 32(P3ptr) \
TEXT ·p256PointDoubleAsm(SB), NOSPLIT, $0 TEXT ·p256PointDoubleAsm(SB), NOSPLIT, $0
MOVD res+0(FP), P3ptr MOVD res+0(FP), P3ptr
MOVD in+8(FP), P1ptr MOVD in+8(FP), P1ptr
@ -1969,107 +2069,24 @@ TEXT ·p256PointDoubleAsm(SB), NOSPLIT, $0
VL 16(CPOOL), PL VL 16(CPOOL), PL
VL 0(CPOOL), PH VL 0(CPOOL), PH
// X=Z1; Y=Z1; MUL; T- // T1 = Z1² p256PointDoubleRound(P1ptr, P3ptr)
VL 80(P1ptr), X1 // Z1H
VPDI $0x4, X1, X1, X1
VL 64(P1ptr), X0 // Z1L
VPDI $0x4, X0, X0, X0
VLR X0, Y0
VLR X1, Y1
CALL sm2p256SqrInternal<>(SB)
// SUB(X<X1-T) // T2 = X1-T1
VL 16(P1ptr), X1H
VPDI $0x4, X1H, X1H, X1H
VL 0(P1ptr), X1L
VPDI $0x4, X1L, X1L, X1L
p256SubInternal(X1,X0,X1H,X1L,T1,T0)
// ADD(Y<X1+T) // T1 = X1+T1
p256AddInternal(Y1,Y0,X1H,X1L,T1,T0)
// X- ; Y- ; MUL; T- // T2 = T2*T1
CALL sm2p256MulInternal<>(SB)
// ADD(T2<T+T); ADD(T2<T2+T) // T2 = 3*T2
p256AddInternal(T2H,T2L,T1,T0,T1,T0)
p256AddInternal(T2H,T2L,T2H,T2L,T1,T0)
// ADD(X<Y1+Y1) // Y3 = 2*Y1
VL 48(P1ptr), Y1H
VPDI $0x4, Y1H, Y1H, Y1H
VL 32(P1ptr), Y1L
VPDI $0x4, Y1L, Y1L, Y1L
p256AddInternal(X1,X0,Y1H,Y1L,Y1H,Y1L)
// X- ; Y=Z1; MUL; Z3:=T // Z3 = Y3*Z1
VL 80(P1ptr), Y1 // Z1H
VPDI $0x4, Y1, Y1, Y1
VL 64(P1ptr), Y0 // Z1L
VPDI $0x4, Y0, Y0, Y0
CALL sm2p256MulInternal<>(SB)
VPDI $0x4, T1, T1, TT1
VST TT1, 80(P3ptr)
VPDI $0x4, T0, T0, TT0
VST TT0, 64(P3ptr)
// X- ; Y=X ; MUL; T- // Y3 = Y3²
VLR X0, Y0
VLR X1, Y1
CALL sm2p256SqrInternal<>(SB)
// X=T ; Y=X1; MUL; T3=T // T3 = Y3*X1
VLR T0, X0
VLR T1, X1
VL 16(P1ptr), Y1
VPDI $0x4, Y1, Y1, Y1
VL 0(P1ptr), Y0
VPDI $0x4, Y0, Y0, Y0
CALL sm2p256MulInternal<>(SB)
VLR T0, T3L
VLR T1, T3H
// X- ; Y=X ; MUL; T- // Y3 = Y3²
VLR X0, Y0
VLR X1, Y1
CALL sm2p256SqrInternal<>(SB)
// HAL(Y3<T) // Y3 = half*Y3
p256HalfInternal(Y3H,Y3L, T1,T0)
// X=T2; Y=T2; MUL; T- // X3 = T2²
VLR T2L, X0
VLR T2H, X1
VLR T2L, Y0
VLR T2H, Y1
CALL sm2p256SqrInternal<>(SB)
// ADD(T1<T3+T3) // T1 = 2*T3
p256AddInternal(T1H,T1L,T3H,T3L,T3H,T3L)
// SUB(X3<T-T1) X3:=X3 // X3 = X3-T1
p256SubInternal(X3H,X3L,T1,T0,T1H,T1L)
VPDI $0x4, X3H, X3H, TT1
VST TT1, 16(P3ptr)
VPDI $0x4, X3L, X3L, TT0
VST TT0, 0(P3ptr)
// SUB(X<T3-X3) // T1 = T3-X3
p256SubInternal(X1,X0,T3H,T3L,X3H,X3L)
// X- ; Y- ; MUL; T- // T1 = T1*T2
CALL sm2p256MulInternal<>(SB)
// SUB(Y3<T-Y3) // Y3 = T1-Y3
p256SubInternal(Y3H,Y3L,T1,T0,Y3H,Y3L)
VPDI $0x4, Y3H, Y3H, Y3H
VST Y3H, 48(P3ptr)
VPDI $0x4, Y3L, Y3L, Y3L
VST Y3L, 32(P3ptr)
RET RET
TEXT ·p256PointDouble6TimesAsm(SB), NOSPLIT, $0 TEXT ·p256PointDouble6TimesAsm(SB), NOSPLIT, $0
MOVD res+0(FP), P3ptr
MOVD in+8(FP), P1ptr
MOVD $p256mul<>+0x00(SB), CPOOL
VL 16(CPOOL), PL
VL 0(CPOOL), PH
p256PointDoubleRound(P1ptr, P3ptr)
p256PointDoubleRound(P3ptr, P3ptr)
p256PointDoubleRound(P3ptr, P3ptr)
p256PointDoubleRound(P3ptr, P3ptr)
p256PointDoubleRound(P3ptr, P3ptr)
p256PointDoubleRound(P3ptr, P3ptr)
RET RET
#undef P3ptr #undef P3ptr

View File

@ -1,4 +1,4 @@
//go:build !purego && (amd64 || arm64) //go:build (amd64 || arm64 || s390x) && !purego
package sm2ec package sm2ec

View File

@ -4,7 +4,7 @@
// Code generated by generate.go. DO NOT EDIT. // Code generated by generate.go. DO NOT EDIT.
//go:build purego || !(amd64 || arm64) //go:build purego || !(amd64 || arm64 || s390x)
package sm2ec package sm2ec

View File

@ -7,7 +7,7 @@
// 256-bit primes" // 256-bit primes"
// https://link.springer.com/article/10.1007%2Fs13389-014-0090-x // https://link.springer.com/article/10.1007%2Fs13389-014-0090-x
// https://eprint.iacr.org/2013/816.pdf // https://eprint.iacr.org/2013/816.pdf
//go:build (amd64 || arm64) && !purego //go:build (amd64 || arm64 || s390x) && !purego
package sm2ec package sm2ec

View File

@ -1,65 +0,0 @@
//go:build !purego
package sm2ec
// p256Element is a P-256 base field element in [0, P-1] in the Montgomery
// domain (with R 2²⁵⁶) as four limbs in little-endian order value.
type p256Element [4]uint64
// p256OrdElement is a P-256 scalar field element in [0, ord(G)-1] in the
// Montgomery domain (with R 2²⁵⁶) as four uint64 limbs in little-endian order.
type p256OrdElement [4]uint64
// Montgomery multiplication. Sets res = in1 * in2 * R⁻¹ mod p.
//
//go:noescape
func p256Mul(res, in1, in2 *p256Element)
// Montgomery square, repeated n times (n >= 1).
//
//go:noescape
func p256Sqr(res, in *p256Element, n int)
// Montgomery multiplication by R⁻¹, or 1 outside the domain.
// Sets res = in * R⁻¹, bringing res out of the Montgomery domain.
//
//go:noescape
func p256FromMont(res, in *p256Element)
// If cond is not 0, sets val = -val mod p.
//
//go:noescape
func p256NegCond(val *p256Element, cond int)
// If cond is 0, sets res = b, otherwise sets res = a.
//
//go:noescape
func p256MovCond(res, a, b *SM2P256Point, cond int)
//go:noescape
func p256BigToLittle(res *p256Element, in *[32]byte)
//go:noescape
func p256LittleToBig(res *[32]byte, in *p256Element)
//go:noescape
func p256OrdBigToLittle(res *p256OrdElement, in *[32]byte)
//go:noescape
func p256OrdLittleToBig(res *[32]byte, in *p256OrdElement)
// p256OrdReduce ensures s is in the range [0, ord(G)-1].
//
//go:noescape
func p256OrdReduce(s *p256OrdElement)
// Montgomery multiplication modulo org(G). Sets res = in1 * in2 * R⁻¹.
//
//go:noescape
func p256OrdMul(res, in1, in2 *p256OrdElement)
// Montgomery square modulo org(G), repeated n times (n >= 1).
//
//go:noescape
func p256OrdSqr(res, in *p256OrdElement, n int)

View File

@ -1,304 +0,0 @@
//go:build s390x && !purego
package sm2ec
import (
"crypto/rand"
"io"
"math/big"
"testing"
"time"
)
var bigOne = big.NewInt(1)
// fromBig converts a *big.Int into a format used by this code.
func fromBig(out *[4]uint64, big *big.Int) {
for i := range out {
out[i] = 0
}
for i, v := range big.Bits() {
out[i] = uint64(v)
}
}
func montFromBig(out *[4]uint64, n *big.Int) {
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
r := new(big.Int).Lsh(bigOne, 256)
// out = big * R mod P
outBig := new(big.Int).Mul(n, r)
outBig.Mod(outBig, p)
fromBig(out, outBig)
}
func toBigInt(in *p256Element) *big.Int {
var valBytes [32]byte
p256LittleToBig(&valBytes, in)
return new(big.Int).SetBytes(valBytes[:])
}
func ordElmToBigInt(in *p256OrdElement) *big.Int {
var valBytes [32]byte
p256OrdLittleToBig(&valBytes, in)
return new(big.Int).SetBytes(valBytes[:])
}
func testP256FromMont(v *big.Int, t *testing.T) {
val := new(p256Element)
montFromBig((*[4]uint64)(val), v)
res := new(p256Element)
p256FromMont(res, val)
if toBigInt(res).Cmp(v) != 0 {
t.Errorf("p256FromMont failed for %x", v.Bytes())
}
}
func TestP256FromMont(t *testing.T) {
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
for i := 0; i < 20; i++ {
bigVal := big.NewInt(int64(i))
testP256FromMont(bigVal, t)
if i != 0 {
bigVal = new(big.Int).Sub(p, big.NewInt(int64(i)))
testP256FromMont(bigVal, t)
}
}
}
func testP256OrderReduce(v, expected *big.Int, t *testing.T) {
val := new(p256OrdElement)
fromBig((*[4]uint64)(val), v)
p256OrdReduce(val)
if ordElmToBigInt(val).Cmp(expected) != 0 {
t.Errorf("p256OrdReduce failed for %x, expected %x", v.Bytes(), expected.Bytes())
}
}
func TestP256OrderReduce(t *testing.T) {
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFF7203DF6B21C6052B53BBF40939D54123", 16)
for i := 0; i < 20; i++ {
bigVal := big.NewInt(int64(i))
testP256OrderReduce(bigVal, bigVal, t)
bigVal = new(big.Int).Add(p, big.NewInt(int64(i)))
testP256OrderReduce(bigVal, big.NewInt(int64(i)), t)
}
testP256OrderReduce(p, big.NewInt(0), t)
for i := 1; i < 20; i++ {
bigVal := new(big.Int).Sub(p, big.NewInt(int64(i)))
testP256OrderReduce(bigVal, bigVal, t)
}
}
func p256OrderFromMont(in *p256OrdElement) []byte {
// Montgomery multiplication by R⁻¹, or 1 outside the domain as R⁻¹×R = 1,
// converts a Montgomery value out of the domain.
one := &p256OrdElement{1}
p256OrdMul(in, in, one)
var xOut [32]byte
p256OrdLittleToBig(&xOut, in)
return xOut[:]
}
func p256OrdMulTest(t *testing.T, x, y, p, r *big.Int) {
x1 := new(big.Int).Mul(x, r)
x1 = x1.Mod(x1, p)
y1 := new(big.Int).Mul(y, r)
y1 = y1.Mod(y1, p)
ax := new(p256OrdElement)
ay := new(p256OrdElement)
res2 := new(p256OrdElement)
fromBig((*[4]uint64)(ax), x1)
fromBig((*[4]uint64)(ay), y1)
p256OrdMul(res2, ax, ay)
resInt := new(big.Int).SetBytes(p256OrderFromMont(res2))
expected := new(big.Int).Mul(x, y)
expected = expected.Mod(expected, p)
if resInt.Cmp(expected) != 0 {
t.FailNow()
}
}
func TestP256OrdMulOrdMinus1(t *testing.T) {
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFF7203DF6B21C6052B53BBF40939D54123", 16)
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
pMinus1 := new(big.Int).Sub(p, big.NewInt(1))
p256OrdMulTest(t, pMinus1, pMinus1, p, r)
}
func TestFuzzyP256OrdMul(t *testing.T) {
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFF7203DF6B21C6052B53BBF40939D54123", 16)
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
var scalar1 [32]byte
var scalar2 [32]byte
var timeout *time.Timer
if testing.Short() {
timeout = time.NewTimer(10 * time.Millisecond)
} else {
timeout = time.NewTimer(2 * time.Second)
}
for {
select {
case <-timeout.C:
return
default:
}
io.ReadFull(rand.Reader, scalar1[:])
io.ReadFull(rand.Reader, scalar2[:])
x := new(big.Int).SetBytes(scalar1[:])
y := new(big.Int).SetBytes(scalar2[:])
p256OrdMulTest(t, x, y, p, r)
}
}
func p256OrderSqrTest(t *testing.T, x, p, r *big.Int) {
x1 := new(big.Int).Mul(x, r)
x1 = x1.Mod(x1, p)
ax := new(p256OrdElement)
res2 := new(p256OrdElement)
fromBig((*[4]uint64)(ax), x1)
p256OrdSqr(res2, ax, 1)
resInt := new(big.Int).SetBytes(p256OrderFromMont(res2))
expected := new(big.Int).Mul(x, x)
expected = expected.Mod(expected, p)
if resInt.Cmp(expected) != 0 {
t.FailNow()
}
}
func TestP256OrdSqrOrdMinus1(t *testing.T) {
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFF7203DF6B21C6052B53BBF40939D54123", 16)
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
pMinus1 := new(big.Int).Sub(p, big.NewInt(1))
p256OrderSqrTest(t, pMinus1, p, r)
}
func TestFuzzyP256OrdSqr(t *testing.T) {
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFF7203DF6B21C6052B53BBF40939D54123", 16)
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
var scalar1 [32]byte
var timeout *time.Timer
if testing.Short() {
timeout = time.NewTimer(10 * time.Millisecond)
} else {
timeout = time.NewTimer(2 * time.Second)
}
for {
select {
case <-timeout.C:
return
default:
}
io.ReadFull(rand.Reader, scalar1[:])
x := new(big.Int).SetBytes(scalar1[:])
p256OrderSqrTest(t, x, p, r)
}
}
func p256MulTest(t *testing.T, x, y, p, r *big.Int) {
x1 := new(big.Int).Mul(x, r)
x1 = x1.Mod(x1, p)
y1 := new(big.Int).Mul(y, r)
y1 = y1.Mod(y1, p)
ax := new(p256Element)
ay := new(p256Element)
res := new(p256Element)
res2 := new(p256Element)
fromBig((*[4]uint64)(ax), x1)
fromBig((*[4]uint64)(ay), y1)
p256Mul(res2, ax, ay)
p256FromMont(res, res2)
resInt := toBigInt(res)
expected := new(big.Int).Mul(x, y)
expected = expected.Mod(expected, p)
if resInt.Cmp(expected) != 0 {
t.FailNow()
}
}
func TestP256MulPMinus1(t *testing.T) {
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
pMinus1 := new(big.Int).Sub(p, big.NewInt(1))
p256MulTest(t, pMinus1, pMinus1, p, r)
}
func TestFuzzyP256Mul(t *testing.T) {
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
var scalar1 [32]byte
var scalar2 [32]byte
var timeout *time.Timer
if testing.Short() {
timeout = time.NewTimer(10 * time.Millisecond)
} else {
timeout = time.NewTimer(2 * time.Second)
}
for {
select {
case <-timeout.C:
return
default:
}
io.ReadFull(rand.Reader, scalar1[:])
io.ReadFull(rand.Reader, scalar2[:])
x := new(big.Int).SetBytes(scalar1[:])
y := new(big.Int).SetBytes(scalar2[:])
p256MulTest(t, x, y, p, r)
}
}
func p256SqrTest(t *testing.T, x, p, r *big.Int) {
x1 := new(big.Int).Mul(x, r)
x1 = x1.Mod(x1, p)
ax := new(p256Element)
res := new(p256Element)
res2 := new(p256Element)
fromBig((*[4]uint64)(ax), x1)
p256Sqr(res2, ax, 1)
p256FromMont(res, res2)
resInt := toBigInt(res)
expected := new(big.Int).Mul(x, x)
expected = expected.Mod(expected, p)
if resInt.Cmp(expected) != 0 {
t.FailNow()
}
}
func TestP256SqrPMinus1(t *testing.T) {
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
pMinus1 := new(big.Int).Sub(p, big.NewInt(1))
p256SqrTest(t, pMinus1, p, r)
}
func TestFuzzyP256Sqr(t *testing.T) {
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
var scalar1 [32]byte
var timeout *time.Timer
if testing.Short() {
timeout = time.NewTimer(10 * time.Millisecond)
} else {
timeout = time.NewTimer(2 * time.Second)
}
for {
select {
case <-timeout.C:
return
default:
}
io.ReadFull(rand.Reader, scalar1[:])
x := new(big.Int).SetBytes(scalar1[:])
p256SqrTest(t, x, p, r)
}
}

View File

@ -1,4 +1,4 @@
//go:build (amd64 || arm64) && !purego //go:build (amd64 || arm64 || s390x) && !purego
package sm2ec package sm2ec

View File

@ -1,4 +1,4 @@
//go:build purego || !(amd64 || arm64) //go:build purego || !(amd64 || arm64 || s390x)
package sm2ec package sm2ec