sm9/bn256: special square for final exp & optimize gfp2/gfp12 square #137 #139

This commit is contained in:
Sun Yimin 2023-07-05 17:58:19 +08:00 committed by GitHub
parent 89f3b0f6d3
commit 5f72151e74
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 420 additions and 241 deletions

View File

@ -10,7 +10,7 @@ func lineFunctionAdd(r, p, rOut *twistPoint, q *curvePoint, r2, a, b, c *gfP2) {
D.Square(D).Sub(D, r2).Sub(D, &r.t).Mul(D, &r.t) // D = ((Yp + Zr)^2 - Zr^2 - Yp^2)*Zr^2 = 2Yp*Zr^3
H := (&gfP2{}).Sub(B, &r.x) // H = Xp * Zr^2 - Xr
I := (&gfP2{}).SquareNC(H) // I = (Xp * Zr^2 - Xr)^2 = Xp^2*Zr^4 + Xr^2 - 2Xr*Xp*Zr^2
I := (&gfP2{}).SquareNC(H) // I = (Xp * Zr^2 - Xr)^2 = Xp^2*Zr^4 + Xr^2 - 2Xr*Xp*Zr^2
E := (&gfP2{}).Add(I, I) // E = 2*(Xp * Zr^2 - Xr)^2
E.Add(E, E) // E = 4*(Xp * Zr^2 - Xr)^2
@ -37,8 +37,8 @@ func lineFunctionAdd(r, p, rOut *twistPoint, q *curvePoint, r2, a, b, c *gfP2) {
t.Add(&p.y, &rOut.z).Square(t).Sub(t, r2).Sub(t, &rOut.t) // t = (Yp + rOut.Z)^2 - Yp^2 - rOut.Z^2 = 2Yp*rOut.Z
t2.Mul(L1, &p.x)
t2.Add(t2, t2) // t2 = 2 L1 * Xp
a.Sub(t2, t) // a = 2 L1 * Xp - 2 Yp * rOut.z
t2.Add(t2, t2) // t2 = 2 L1 * Xp
a.Sub(t2, t) // a = 2 L1 * Xp - 2 Yp * rOut.z
c.MulScalar(&rOut.z, &q.y)
c.Add(c, c)
@ -139,9 +139,9 @@ func miller(q *twistPoint, p *curvePoint) *gfP12 {
ret.Square(ret)
}
mulLine(ret, a, b, c)
tmpR= r
tmpR = r
r = newR
newR= tmpR
newR = tmpR
switch sixUPlus2NAF[i-1] {
case 1:
lineFunctionAdd(r, aAffine, newR, bAffine, r2, a, b, c)
@ -152,9 +152,9 @@ func miller(q *twistPoint, p *curvePoint) *gfP12 {
}
mulLine(ret, a, b, c)
tmpR= r
tmpR = r
r = newR
newR= tmpR
newR = tmpR
}
// In order to calculate Q1 we have to convert q from the sextic twist
@ -187,9 +187,9 @@ func miller(q *twistPoint, p *curvePoint) *gfP12 {
r2.Square(&q1.y)
lineFunctionAdd(r, q1, newR, bAffine, r2, a, b, c)
mulLine(ret, a, b, c)
tmpR= r
tmpR = r
r = newR
newR= tmpR
newR = tmpR
r2.Square(&minusQ2.y)
lineFunctionAdd(r, minusQ2, newR, bAffine, r2, a, b, c)
@ -202,51 +202,50 @@ func miller(q *twistPoint, p *curvePoint) *gfP12 {
// GF(p¹²) to obtain an element of GT. https://eprint.iacr.org/2007/390.pdf
// http://cryptojedi.org/papers/dclxvi-20100714.pdf
func finalExponentiation(in *gfP12) *gfP12 {
t1 := &gfP12{}
// This is the p^6-Frobenius
t1.FrobeniusP6(in)
t1 := (&gfP12{}).FrobeniusP6(in)
inv := &gfP12{}
inv.Invert(in)
inv := (&gfP12{}).Invert(in)
t1.Mul(t1, inv)
t2 := (&gfP12{}).FrobeniusP2(t1)
t1.Mul(t1, t2)
t2 := inv.FrobeniusP2(t1) // reuse inv
t1.Mul(t1, t2) // t1 = in ^ ((p^6 - 1) * (p^2 + 1)), the first two parts of the exponentiation
fp := (&gfP12{}).Frobenius(t1)
fp2 := (&gfP12{}).FrobeniusP2(t1)
fp3 := (&gfP12{}).Frobenius(fp2)
fu := (&gfP12{}).gfP12ExpU(t1)
fu2 := (&gfP12{}).gfP12ExpU(fu)
fu3 := (&gfP12{}).gfP12ExpU(fu2)
y0 := &gfP12{}
y0.MulNC(fp, fp2).Mul(y0, fp3) // y0 = (t1^p) * (t1^(p^2)) * (t1^(p^3))
// reuse fp, fp2, fp3 local variables
// [gfP12ExpU] is most time consuming operation
fu := fp.gfP12ExpU(t1)
fu2 := fp2.gfP12ExpU(fu)
fu3 := fp3.gfP12ExpU(fu2)
y3 := (&gfP12{}).Frobenius(fu)
fu2p := (&gfP12{}).Frobenius(fu2)
fu3p := (&gfP12{}).Frobenius(fu3)
y2 := (&gfP12{}).FrobeniusP2(fu2)
y0 := &gfP12{}
y0.MulNC(fp, fp2).Mul(y0, fp3)
y1 := (&gfP12{}).Conjugate(t1) // y1 = 1 / t1
y2 := (&gfP12{}).FrobeniusP2(fu2) // y2 = (t1^(u^2))^(p^2)
y3 := (&gfP12{}).Frobenius(fu) // y3 = (t1^u)^p
y3.Conjugate(y3) // y3 = 1 / (t1^u)^p
y4 := (&gfP12{}).MulNC(fu, fu2p) // y4 = (t1^u) * ((t1^(u^2))^p)
y4.Conjugate(y4) // y4 = 1 / ((t1^u) * ((t1^(u^2))^p))
y5 := fu2p.Conjugate(fu2) // y5 = 1 / t1^(u^2), reuse fu2p
y6 := (&gfP12{}).MulNC(fu3, fu3p) // y6 = t1^(u^3) * (t1^(u^3))^p
y6.Conjugate(y6) // y6 = 1 / (t1^(u^3) * (t1^(u^3))^p)
y1 := (&gfP12{}).Conjugate(t1)
y5 := (&gfP12{}).Conjugate(fu2)
y3.Conjugate(y3)
y4 := (&gfP12{}).MulNC(fu, fu2p)
y4.Conjugate(y4)
y6 := (&gfP12{}).MulNC(fu3, fu3p)
y6.Conjugate(y6)
t0 := (&gfP12{}).SquareNC(y6)
// https://eprint.iacr.org/2008/490.pdf
t0 := (&gfP12{}).SpecialSquareNC(y6)
t0.Mul(t0, y4).Mul(t0, y5)
t1.Mul(y3, y5).Mul(t1, t0)
t0.Mul(t0, y2)
t1.Square(t1).Mul(t1, t0).Square(t1)
t1.SpecialSquare(t1).Mul(t1, t0).SpecialSquare(t1)
t0.Mul(t1, y1)
t1.Mul(t1, y0)
t0.Square(t0).Mul(t0, t1)
t0.SpecialSquare(t0).Mul(t0, t1)
return t0
}

View File

@ -9,6 +9,7 @@ import (
type gfP [4]uint64
var genericZero = &gfP{0}
var zero = newGFp(0)
var one = newGFp(1)
var two = newGFp(2)

View File

@ -140,36 +140,7 @@ func (e *gfP12) Mul(a, b *gfP12) *gfP12 {
// +x0*z1*w^2 + x0*y1*v + x0*x1*v*w
//=(z0*z1+y0*x1*v+x0*y1*v) + (z0*y1+y0*z1+x0*x1*v)w + (z0*x1 + y0*y1 + x0*z1)*w^2
tmp := &gfP12{}
tx := &tmp.x
ty := &tmp.y
tz := &tmp.z
t, v0, v1, v2 := &gfP4{}, &gfP4{}, &gfP4{}, &gfP4{}
v0.MulNC(&a.z, &b.z)
v1.MulNC(&a.y, &b.y)
v2.MulNC(&a.x, &b.x)
t.Add(&a.y, &a.x)
tz.Add(&b.y, &b.x)
t.Mul(t, tz)
t.Sub(t, v1)
t.Sub(t, v2)
t.MulV1(t)
tz.Add(t, v0)
t.Add(&a.z, &a.y)
ty.Add(&b.z, &b.y)
ty.Mul(t, ty)
ty.Sub(ty, v0)
ty.Sub(ty, v1)
t.MulV1(v2)
ty.Add(ty, t)
t.Add(&a.z, &a.x)
tx.Add(&b.z, &b.x)
tx.Mul(tx, t)
tx.Sub(tx, v0)
tx.Add(tx, v1)
tx.Sub(tx, v2)
tmp.MulNC(a, b)
gfp12Copy(e, tmp)
return e
}
@ -180,6 +151,7 @@ func (e *gfP12) MulNC(a, b *gfP12) *gfP12 {
// +y0*z1*w + y0*y1*w^2 + y0*x1*v
// +x0*z1*w^2 + x0*y1*v + x0*x1*v*w
//=(z0*z1+y0*x1*v+x0*y1*v) + (z0*y1+y0*z1+x0*x1*v)w + (z0*x1 + y0*y1 + x0*z1)*w^2
// Karatsuba method
tx := &e.x
ty := &e.y
tz := &e.z
@ -219,25 +191,7 @@ func (e *gfP12) Square(a *gfP12) *gfP12 {
// (z^2 + y*x*v + x*y*v) + (z*y + y*z + v * x^2)w + (z*x + y^2 + x*z)*w^2
// (z^2 + 2*x*y*v) + (v*x^2 + 2*y*z) *w + (y^2 + 2*x*z) * w^2
tmp := &gfP12{}
tx := &tmp.x
ty := &tmp.y
tz := &tmp.z
t := &gfP4{}
tz.SquareNC(&a.z)
t.MulV(&a.x, &a.y)
t.Add(t, t)
tz.Add(tz, t)
ty.SquareVNC(&a.x)
t.Mul(&a.y, &a.z)
t.Add(t, t)
ty.Add(ty, t)
tx.SquareNC(&a.y)
t.Mul(&a.x, &a.z)
t.Add(t, t)
tx.Add(tx, t)
tmp.SquareNC(a)
gfp12Copy(e, tmp)
return e
}
@ -247,25 +201,148 @@ func (e *gfP12) SquareNC(a *gfP12) *gfP12 {
// z^2 + z*y*w + z*x*w^2 + y*z*w + y^2*w^2 + y*x*v + x*z*w^2 + x*y*v + x^2 *v *w
// (z^2 + y*x*v + x*y*v) + (z*y + y*z + v * x^2)w + (z*x + y^2 + x*z)*w^2
// (z^2 + 2*x*y*v) + (v*x^2 + 2*y*z) *w + (y^2 + 2*x*z) * w^2
// Karatsuba method
tx := &e.x
ty := &e.y
tz := &e.z
t := &gfP4{}
t, v0, v1, v2 := &gfP4{}, &gfP4{}, &gfP4{}, &gfP4{}
v0.SquareNC(&a.z)
v1.SquareNC(&a.y)
v2.SquareNC(&a.x)
tz.SquareNC(&a.z)
t.MulV(&a.x, &a.y)
t.Add(t, t)
tz.Add(tz, t)
t.Add(&a.y, &a.x)
tz.SquareNC(t)
tz.Sub(tz, v1)
tz.Sub(tz, v2)
tz.MulV1(tz)
tz.Add(tz, v0)
ty.SquareVNC(&a.x)
t.Mul(&a.y, &a.z)
t.Add(t, t)
t.Add(&a.z, &a.y)
ty.SquareNC(t)
ty.Sub(ty, v0)
ty.Sub(ty, v1)
t.MulV1(v2)
ty.Add(ty, t)
tx.SquareNC(&a.y)
t.Mul(&a.x, &a.z)
t.Add(t, t)
tx.Add(tx, t)
t.Add(&a.z, &a.x)
tx.SquareNC(t)
tx.Sub(tx, v0)
tx.Add(tx, v1)
tx.Sub(tx, v2)
return e
}
// Special squaring for use on elements in T_6(fp2) (after the
// easy part of the final exponentiation. Used in the hard part
// of the final exponentiation. Function uses formulas in
// Granger/Scott (PKC2010).
func (e *gfP12) SpecialSquare(a *gfP12) *gfP12 {
tmp := &gfP12{}
tmp.SpecialSquareNC(a)
gfp12Copy(e, tmp)
return e
}
func (e *gfP12) SpecialSquares(a *gfP12, n int) *gfP12 {
// Square first round
in := &gfP12{}
tx, ty, tz := &gfP4{}, &gfP4{}, &gfP4{}
v0 := &in.x
v1 := &in.y
v2 := &in.z
v0.SquareVNC(&a.x) // (t02, t10)
v1.SquareNC(&a.y) // (t12, t01)
v2.SquareNC(&a.z) // (t11, t00)
tx.Add(v0, v0)
tx.Add(v0, tx)
ty.Add(v1, v1)
ty.Add(v1, ty)
tz.Add(v2, v2)
tz.Add(v2, tz)
v0.Add(&a.x, &a.x) // (f12, f01)
v0.y.Neg(&v0.y)
v1.Add(&a.y, &a.y) // (f02, f10)
v1.x.Neg(&v1.x)
v2.Add(&a.z, &a.z) // (f11, f00)
v2.y.Neg(&v2.y)
v0.Add(ty, v0)
v1.Add(tx, v1)
v2.Add(tz, v2)
tmp := &gfP12{}
var tmp2 *gfP12
for i := 1; i < n; i++ {
v0 = &tmp.x
v1 = &tmp.y
v2 = &tmp.z
v0.SquareVNC(&in.x) // (t02, t10)
v1.SquareNC(&in.y) // (t12, t01)
v2.SquareNC(&in.z) // (t11, t00)
tx.Add(v0, v0)
tx.Add(v0, tx)
ty.Add(v1, v1)
ty.Add(v1, ty)
tz.Add(v2, v2)
tz.Add(v2, tz)
v0.Add(&in.x, &in.x) // (f12, f01)
v0.y.Neg(&v0.y)
v1.Add(&in.y, &in.y) // (f02, f10)
v1.x.Neg(&v1.x)
v2.Add(&in.z, &in.z) // (f11, f00)
v2.y.Neg(&v2.y)
v0.Add(ty, v0)
v1.Add(tx, v1)
v2.Add(tz, v2)
// Switch references
tmp2 = in
in = tmp
tmp = tmp2
}
gfp12Copy(e, in)
return e
}
func (e *gfP12) SpecialSquareNC(a *gfP12) *gfP12 {
tx, ty, tz := &gfP4{}, &gfP4{}, &gfP4{}
v0 := &e.x
v1 := &e.y
v2 := &e.z
v0.SquareVNC(&a.x) // (t02, t10)
v1.SquareNC(&a.y) // (t12, t01)
v2.SquareNC(&a.z) // (t11, t00)
tx.Add(v0, v0)
tx.Add(v0, tx)
ty.Add(v1, v1)
ty.Add(v1, ty)
tz.Add(v2, v2)
tz.Add(v2, tz)
v0.Add(&a.x, &a.x) // (f12, f01)
v0.y.Neg(&v0.y)
v1.Add(&a.y, &a.y) // (f02, f10)
v1.x.Neg(&v1.x)
v2.Add(&a.z, &a.z) // (f11, f00)
v2.y.Neg(&v2.y)
v0.Add(ty, v0)
v1.Add(tx, v1)
v2.Add(tz, v2)
return e
}
@ -275,51 +352,68 @@ func (e *gfP12) Squares(a *gfP12, n int) *gfP12 {
tx := &in.x
ty := &in.y
tz := &in.z
t := &gfP4{}
t, v0, v1, v2 := &gfP4{}, &gfP4{}, &gfP4{}, &gfP4{}
tz.SquareNC(&a.z)
t.MulV(&a.x, &a.y)
t.Add(t, t)
tz.Add(tz, t)
v0.SquareNC(&a.z)
v1.SquareNC(&a.y)
v2.SquareNC(&a.x)
ty.SquareVNC(&a.x)
t.Mul(&a.y, &a.z)
t.Add(t, t)
t.Add(&a.y, &a.x)
tz.SquareNC(t)
tz.Sub(tz, v1)
tz.Sub(tz, v2)
tz.MulV1(tz)
tz.Add(tz, v0)
t.Add(&a.z, &a.y)
ty.SquareNC(t)
ty.Sub(ty, v0)
ty.Sub(ty, v1)
t.MulV1(v2)
ty.Add(ty, t)
tx.SquareNC(&a.y)
t.Mul(&a.x, &a.z)
t.Add(t, t)
tx.Add(tx, t)
t.Add(&a.z, &a.x)
tx.SquareNC(t)
tx.Sub(tx, v0)
tx.Add(tx, v1)
tx.Sub(tx, v2)
tmp := &gfP12{}
var tmp2 *gfP12
tx = &tmp.x
ty = &tmp.y
tz = &tmp.z
for i := 1; i < n; i++ {
tz.SquareNC(&in.z)
t.MulV(&in.x, &in.y)
t.Add(t, t)
tz.Add(tz, t)
ty.SquareVNC(&in.x)
t.Mul(&in.y, &in.z)
t.Add(t, t)
for i := 1; i < n; i++ {
tx = &tmp.x
ty = &tmp.y
tz = &tmp.z
v0.SquareNC(&in.z)
v1.SquareNC(&in.y)
v2.SquareNC(&in.x)
t.Add(&in.y, &in.x)
tz.SquareNC(t)
tz.Sub(tz, v1)
tz.Sub(tz, v2)
tz.MulV1(tz)
tz.Add(tz, v0)
t.Add(&in.z, &in.y)
ty.SquareNC(t)
ty.Sub(ty, v0)
ty.Sub(ty, v1)
t.MulV1(v2)
ty.Add(ty, t)
tx.SquareNC(&in.y)
t.Mul(&in.x, &in.z)
t.Add(t, t)
tx.Add(tx, t)
t.Add(&in.z, &in.x)
tx.SquareNC(t)
tx.Sub(tx, v0)
tx.Add(tx, v1)
tx.Sub(tx, v2)
// Switch references
tmp2 = in
in = tmp
tmp = tmp2
tx = &tmp.x
ty = &tmp.y
tz = &tmp.z
}
gfp12Copy(e, in)
return e

View File

@ -1,5 +1,6 @@
package bn256
// Use special square
func (e *gfP12) gfP12ExpU(x *gfP12) *gfP12 {
// The sequence of 10 multiplications and 61 squarings is derived from the
// following addition chain generated with github.com/mmcloughlin/addchain v0.4.0.
@ -20,23 +21,23 @@ func (e *gfP12) gfP12ExpU(x *gfP12) *gfP12 {
var t2 = new(gfP12)
var t3 = new(gfP12)
t2.SquareNC(x)
t1.SquareNC(t2)
t2.SpecialSquareNC(x)
t1.SpecialSquareNC(t2)
z.MulNC(x, t1)
t0.MulNC(t1, z)
t2.Mul(t2, t0)
t3.MulNC(x, t2)
t3.Squares(t3, 40)
t3.SpecialSquares(t3, 40)
t3.Mul(t2, t3)
t3.Squares(t3, 7)
t3.SpecialSquares(t3, 7)
t2.Mul(t2, t3)
t1.Mul(t1, t2)
t1.Squares(t1, 4)
t1.SpecialSquares(t1, 4)
t0.Mul(t0, t1)
t0.Square(t0)
t0.SpecialSquare(t0)
t0.Mul(x, t0)
t0.Squares(t0, 6)
t0.SpecialSquares(t0, 6)
z.Mul(z, t0)
z.Square(z)
z.SpecialSquare(z)
return e
}

View File

@ -35,6 +35,31 @@ func Test_gfP12Square(t *testing.T) {
}
}
func TestSpecialSquare(t *testing.T) {
in := &gfP12{
testdataP4,
testdataP4,
*(&gfP4{}).SetOne(),
}
// This is the p^6-Frobenius
t1 := (&gfP12{}).FrobeniusP6(in)
inv := (&gfP12{}).Invert(in)
t1.Mul(t1, inv)
t2 := inv.FrobeniusP2(t1) // reuse inv
t1.Mul(t1, t2) // t1 = in ^ ((p^6 - 1) * (p^2 + 1)), the first two parts of the exponentiation
got := &gfP12{}
expected := &gfP12{}
got.SpecialSquare(t1)
expected.Square(t1)
if *got != *expected {
t.Errorf("not same got=%v, expected=%v", got, expected)
}
}
func BenchmarkGfP12Square(b *testing.B) {
x := &gfP12{
testdataP4,
@ -49,6 +74,52 @@ func BenchmarkGfP12Square(b *testing.B) {
}
}
func BenchmarkGfP12SpecialSquare(b *testing.B) {
in := &gfP12{
testdataP4,
testdataP4,
*(&gfP4{}).SetOne(),
}
// This is the p^6-Frobenius
t1 := (&gfP12{}).FrobeniusP6(in)
inv := (&gfP12{}).Invert(in)
t1.Mul(t1, inv)
t2 := inv.FrobeniusP2(t1) // reuse inv
t1.Mul(t1, t2) // t1 = in ^ ((p^6 - 1) * (p^2 + 1)), the first two parts of the exponentiation
x2 := &gfP12{}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
x2.Square(t1)
}
}
func BenchmarkGfP12SpecialSqures(b *testing.B) {
in := &gfP12{
testdataP4,
testdataP4,
*(&gfP4{}).SetOne(),
}
// This is the p^6-Frobenius
t1 := (&gfP12{}).FrobeniusP6(in)
inv := (&gfP12{}).Invert(in)
t1.Mul(t1, inv)
t2 := inv.FrobeniusP2(t1) // reuse inv
t1.Mul(t1, t2) // t1 = in ^ ((p^6 - 1) * (p^2 + 1)), the first two parts of the exponentiation
got := &gfP12{}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
got.SpecialSquares(in, 61)
}
}
func testGfP12Invert(t *testing.T, x *gfP12) {
xInv := &gfP12{}
xInv.Invert(x)
@ -281,6 +352,20 @@ func Test_W3(t *testing.T) {
}
}
func BenchmarkGfP12Invert(b *testing.B) {
x := &gfP12{
testdataP4,
testdataP4,
testdataP4,
}
got := &gfP12{}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
got.Invert(x)
}
}
func BenchmarkGfP12Frobenius(b *testing.B) {
x := &gfP12{
testdataP4,
@ -300,6 +385,48 @@ func BenchmarkGfP12Frobenius(b *testing.B) {
}
}
func BenchmarkGfP12Mul(b *testing.B) {
x := &gfP12{
testdataP4,
testdataP4,
testdataP4,
}
got := &gfP12{}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
got.Mul(x, x)
}
}
func BenchmarkGfP12Squre(b *testing.B) {
x := &gfP12{
testdataP4,
testdataP4,
testdataP4,
}
got := &gfP12{}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
got.Square(x)
}
}
func BenchmarkGfP12Squres(b *testing.B) {
x := &gfP12{
testdataP4,
testdataP4,
testdataP4,
}
got := &gfP12{}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
got.Squares(x, 61)
}
}
func BenchmarkGfP12ExpU(b *testing.B) {
x := &gfP12{
testdataP4,
@ -311,6 +438,8 @@ func BenchmarkGfP12ExpU(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
got.gfP12ExpU(x)
got.gfP12ExpU(x)
got.gfP12ExpU(x)
}
}
@ -325,5 +454,7 @@ func BenchmarkGfP12ExpU2(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
got.Exp(x, u)
got.Exp(x, u)
got.Exp(x, u)
}
}

View File

@ -64,13 +64,13 @@ func (e *gfP2) IsOne() bool {
func (e *gfP2) Conjugate(a *gfP2) *gfP2 {
e.y.Set(&a.y)
gfpNeg(&e.x, &a.x)
gfpSub(&e.x, genericZero, &a.x)
return e
}
func (e *gfP2) Neg(a *gfP2) *gfP2 {
gfpNeg(&e.x, &a.x)
gfpNeg(&e.y, &a.y)
gfpSub(&e.x, genericZero, &a.x)
gfpSub(&e.y, genericZero, &a.y)
return e
}
@ -109,22 +109,7 @@ func (e *gfP2) Triple(a *gfP2) *gfP2 {
// c1 = (a0 + a1)(b0 + b1) - a0*b0 - a1*b1 = a0*b1 + a1*b0
func (e *gfP2) Mul(a, b *gfP2) *gfP2 {
tmp := &gfP2{}
tx := &tmp.x
ty := &tmp.y
v0, v1 := &gfP{}, &gfP{}
gfpMul(v0, &a.y, &b.y)
gfpMul(v1, &a.x, &b.x)
gfpAdd(tx, &a.x, &a.y)
gfpAdd(ty, &b.x, &b.y)
gfpMul(tx, tx, ty)
gfpSub(tx, tx, v0)
gfpSub(tx, tx, v1)
gfpSub(ty, v0, v1)
gfpSub(ty, ty, v1)
tmp.MulNC(a, b)
gfp2Copy(e, tmp)
return e
}
@ -170,7 +155,7 @@ func (e *gfP2) MulU(a, b *gfP2) *gfP2 {
gfpSub(ty, ty, v0)
gfpSub(ty, ty, v1)
gfpAdd(ty, ty, ty)
gfpNeg(ty, ty)
gfpSub(ty, genericZero, ty)
gfpSub(tx, v0, v1)
gfpSub(tx, tx, v1)
@ -186,7 +171,7 @@ func (e *gfP2) MulU(a, b *gfP2) *gfP2 {
func (e *gfP2) MulU1(a *gfP2) *gfP2 {
t := &gfP{}
gfpAdd(t, &a.x, &a.x)
gfpNeg(t, t)
gfpSub(t, genericZero, t)
gfpCopy(&e.x, &a.y)
gfpCopy(&e.y, t)
@ -197,15 +182,7 @@ func (e *gfP2) Square(a *gfP2) *gfP2 {
// Complex squaring algorithm:
// (xu+y)² = y^2-2*x^2 + 2*u*x*y
tmp := &gfP2{}
tx := &tmp.x
ty := &tmp.y
gfpSqr(tx, &a.x, 1)
gfpSqr(ty, &a.y, 1)
gfpSub(ty, ty, tx)
gfpSub(ty, ty, tx)
gfpMul(tx, &a.x, &a.y)
gfpAdd(tx, tx, tx)
tmp.SquareNC(a)
gfp2Copy(e, tmp)
return e
}
@ -215,13 +192,15 @@ func (e *gfP2) SquareNC(a *gfP2) *gfP2 {
// (xu+y)² = y^2-2*x^2 + 2*u*x*y
tx := &e.x
ty := &e.y
gfpSqr(tx, &a.x, 1)
gfpSqr(ty, &a.y, 1)
gfpSub(ty, ty, tx)
gfpSub(ty, ty, tx)
gfpAdd(ty, &a.x, &a.y)
gfpAdd(tx, &a.x, &a.x)
gfpSub(tx, &a.y, tx)
gfpMul(ty, tx, ty)
gfpMul(tx, &a.x, &a.y)
gfpAdd(ty, tx, ty)
gfpAdd(tx, tx, tx)
return e
}
@ -230,20 +209,7 @@ func (e *gfP2) SquareU(a *gfP2) *gfP2 {
// (xu+y)²*u = (y^2-2*x^2)u - 4*x*y
tmp := &gfP2{}
tx := &tmp.x
ty := &tmp.y
// tx = a0^2 - 2 * a1^2
gfpSqr(ty, &a.x, 1)
gfpSqr(tx, &a.y, 1)
gfpAdd(ty, ty, ty)
gfpSub(tx, tx, ty)
// ty = -4 * a0 * a1
gfpMul(ty, &a.x, &a.y)
gfpAdd(ty, ty, ty)
gfpAdd(ty, ty, ty)
gfpNeg(ty, ty)
tmp.SquareUNC(a)
gfp2Copy(e, tmp)
return e
}
@ -251,20 +217,18 @@ func (e *gfP2) SquareU(a *gfP2) *gfP2 {
func (e *gfP2) SquareUNC(a *gfP2) *gfP2 {
// Complex squaring algorithm:
// (xu+y)²*u = (y^2-2*x^2)u - 4*x*y
tx := &e.x
ty := &e.y
// tx = a0^2 - 2 * a1^2
gfpSqr(ty, &a.x, 1)
gfpSqr(tx, &a.y, 1)
gfpAdd(ty, ty, ty)
gfpSub(tx, tx, ty)
// ty = -4 * a0 * a1
gfpAdd(tx, &a.x, &a.y)
gfpAdd(ty, &a.x, &a.x)
gfpSub(ty, &a.y, ty)
gfpMul(tx, tx, ty)
gfpMul(ty, &a.x, &a.y)
gfpAdd(tx, tx, ty)
gfpAdd(ty, ty, ty)
gfpAdd(ty, ty, ty)
gfpNeg(ty, ty)
gfpSub(ty, genericZero, ty)
return e
}
@ -287,7 +251,7 @@ func (e *gfP2) Invert(a *gfP2) *gfP2 {
inv := &gfP{}
inv.Invert(t3) // inv = (2 * a.x ^ 2 + a.y ^ 2) ^ (-1)
gfpNeg(t1, &a.x)
gfpSub(t1, genericZero, &a.x)
gfpMul(&e.x, t1, inv) // x = - a.x * inv
gfpMul(&e.y, &a.y, inv) // y = a.y * inv

View File

@ -172,6 +172,18 @@ func BenchmarkGfP2Square(b *testing.B) {
}
}
func BenchmarkGfP2SquareU(b *testing.B) {
x := &gfP2{
*fromBigInt(bigFromHex("85AEF3D078640C98597B6027B441A01FF1DD2C190F5E93C454806C11D8806141")),
*fromBigInt(bigFromHex("3722755292130B08D2AAB97FD34EC120EE265948D19C17ABF9B7213BAF82D65B")),
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
x.SquareU(x)
}
}
/*
func Test_gfP2QuadraticResidue(t *testing.T) {
x := &gfP2{

View File

@ -99,21 +99,7 @@ func (e *gfP4) Mul(a, b *gfP4) *gfP4 {
//c0 = a0*b0 +a1*b1*u
//c1 = (a0 + a1)(b0 + b1) - a0*b0 - a1*b1 = a0*b1 + a1*b0
tmp := &gfP4{}
tx := &tmp.x
ty := &tmp.y
v0, v1 := &gfP2{}, &gfP2{}
v0.MulNC(&a.y, &b.y)
v1.MulNC(&a.x, &b.x)
tx.Add(&a.x, &a.y)
ty.Add(&b.x, &b.y)
tx.Mul(tx, ty)
tx.Sub(tx, v0)
tx.Sub(tx, v1)
ty.MulU1(v1)
ty.Add(ty, v0)
tmp.MulNC(a, b)
gfp4Copy(e, tmp)
return e
}
@ -151,22 +137,7 @@ func (e *gfP4) MulNC(a, b *gfP4) *gfP4 {
// c1 = a0*b0 + a1*b1*u
func (e *gfP4) MulV(a, b *gfP4) *gfP4 {
tmp := &gfP4{}
tx := &tmp.x
ty := &tmp.y
v0, v1 := &gfP2{}, &gfP2{}
v0.MulNC(&a.y, &b.y)
v1.MulNC(&a.x, &b.x)
tx.Add(&a.x, &a.y)
ty.Add(&b.x, &b.y)
ty.Mul(tx, ty)
ty.Sub(ty, v0)
ty.Sub(ty, v1)
ty.MulU1(ty)
tx.MulU1(v1)
tx.Add(tx, v0)
tmp.MulVNC(a, b)
gfp4Copy(e, tmp)
return e
}
@ -208,15 +179,7 @@ func (e *gfP4) Square(a *gfP4) *gfP4 {
// Complex squaring algorithm:
// (xv+y)² = (x^2*u + y^2) + 2*x*y*v
tmp := &gfP4{}
tx := &tmp.x
ty := &tmp.y
tx.SquareUNC(&a.x)
ty.SquareNC(&a.y)
ty.Add(tx, ty)
tx.Mul(&a.x, &a.y)
tx.Add(tx, tx)
tmp.SquareNC(a)
gfp4Copy(e, tmp)
return e
}
@ -224,13 +187,15 @@ func (e *gfP4) Square(a *gfP4) *gfP4 {
func (e *gfP4) SquareNC(a *gfP4) *gfP4 {
// Complex squaring algorithm:
// (xv+y)² = (x^2*u + y^2) + 2*x*y*v
// = (xu + y)(x + y) -xy(1+u) + 2xy*v
tx := &e.x
ty := &e.y
tx.SquareUNC(&a.x)
ty.SquareNC(&a.y)
ty.Add(tx, ty)
tx.Mul(&a.x, &a.y)
tx.MulNC(&a.x, &a.y)
tx.Add(tx, tx)
return e
@ -240,15 +205,7 @@ func (e *gfP4) SquareNC(a *gfP4) *gfP4 {
// v*(xv+y)² = (x^2*u + y^2)v + 2*x*y*u
func (e *gfP4) SquareV(a *gfP4) *gfP4 {
tmp := &gfP4{}
tx := &tmp.x
ty := &tmp.y
tx.SquareUNC(&a.x)
ty.SquareNC(&a.y)
tx.Add(tx, ty)
ty.MulU(&a.x, &a.y)
ty.Add(ty, ty)
tmp.SquareVNC(a)
gfp4Copy(e, tmp)
return e
}

View File

@ -207,3 +207,22 @@ func BenchmarkGfP4Mul(b *testing.B) {
t.Mul(x, y)
}
}
func BenchmarkGfP4Square(b *testing.B) {
x := &gfP4{
gfP2{
*fromBigInt(bigFromHex("85AEF3D078640C98597B6027B441A01FF1DD2C190F5E93C454806C11D8806141")),
*fromBigInt(bigFromHex("3722755292130B08D2AAB97FD34EC120EE265948D19C17ABF9B7213BAF82D65B")),
},
gfP2{
*fromBigInt(bigFromHex("17509B092E845C1266BA0D262CBEE6ED0736A96FA347C8BD856DC76B84EBEB96")),
*fromBigInt(bigFromHex("A7CF28D519BE3DA65F3170153D278FF247EFBA98A71A08116215BBA5C999A7C7")),
},
}
t := &gfP4{}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
t.Square(x)
}
}

View File

@ -16,6 +16,7 @@ import (
var supportADX = cpu.X86.HasADX && cpu.X86.HasBMI2
// Set c = p - a, if c == p, then c = 0
// It seems this function's performance is worse than gfpSub with zero.
//
// go:noescape
func gfpNeg(c, a *gfP)