sm9/bn256: ppc64x gfp #256

This commit is contained in:
Sun Yimin 2024-10-06 09:45:50 +08:00 committed by GitHub
parent ed4eafdc3e
commit 74d1bb82e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 121 additions and 202 deletions

View File

@ -1,4 +1,4 @@
//go:build (amd64 || arm64) && !purego
//go:build (amd64 || arm64 || ppc64 || ppc64le) && !purego
package bn256

View File

@ -1,4 +1,4 @@
//go:build purego || !(amd64 || arm64)
//go:build purego || !(amd64 || arm64 || ppc64 || ppc64le)
package bn256

View File

@ -1,53 +0,0 @@
// Copyright 2024 Sun Yimin. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
//go:build (ppc64 || ppc64le) && !purego
package bn256
// Set c = p - a, if c == p, then c = 0
// It seems this function's performance is worse than gfpSub with zero.
//
//go:noescape
func gfpNegAsm(c, a *gfP)
// Set c = a + b, if c >= p, then c = c - p
//
//go:noescape
func gfpAddAsm(c, a, b *gfP)
// Set c = a + a
//
//go:noescape
func gfpDoubleAsm(c, a *gfP)
// Set c = a + a + a
//
//go:noescape
func gfpTripleAsm(c, a *gfP)
// Set c = a - b, if c is negative, then c = c + p
//
//go:noescape
func gfpSubAsm(c, a, b *gfP)
// Montgomery multiplication. Sets res = in1 * in2 * R⁻¹ mod p.
//
//go:noescape
func gfpMulAsm(c, a, b *gfP)
// Montgomery square, repeated n times (n >= 1).
//
//go:noescape
func gfpSqrAsm(res, in *gfP, n int)
// Marshal gfP into big endian form
//
//go:noescape
func gfpMarshalAsm(out *[32]byte, in *gfP)
// Unmarshal the bytes into little endian form
//
//go:noescape
func gfpUnmarshalAsm(out *gfP, in *[32]byte)

View File

@ -7,19 +7,20 @@
#include "textflag.h"
//func gfpUnmarshal(out *gfP, in *[32]byte)
TEXT ·gfpUnmarshalAsm(SB), NOSPLIT, $0-16
TEXT ·gfpUnmarshal(SB), NOSPLIT, $0-16
MOVD res+0(FP), R3
MOVD in+8(FP), R4
BR gfpInternalEndianSwap<>(SB)
// func gfpMarshal(out *[32]byte, in *gfP)
TEXT ·gfpMarshalAsm(SB), NOSPLIT, $0-16
TEXT ·gfpMarshal(SB), NOSPLIT, $0-16
MOVD res+0(FP), R3
MOVD in+8(FP), R4
BR gfpInternalEndianSwap<>(SB)
TEXT gfpInternalEndianSwap<>(SB), NOSPLIT, $0-0
// Index registers needed for BR movs
#ifdef GOARCH_ppc64le
MOVD $8, R9
MOVD $16, R10
MOVD $24, R14
@ -33,7 +34,17 @@ TEXT gfpInternalEndianSwap<>(SB), NOSPLIT, $0-0
MOVD R7, 8(R3)
MOVD R6, 16(R3)
MOVD R5, 24(R3)
#else
MOVD $16, R10
LXVD2X (R4)(R0), V0
LXVD2X (R4)(R10), V1
XXPERMDI V0, V0, $2, V0
XXPERMDI V1, V1, $2, V1
STXVD2X V1, (R0+R3)
STXVD2X V0, (R10+R3)
#endif
RET
#define X1L V0
@ -70,7 +81,7 @@ TEXT gfpInternalEndianSwap<>(SB), NOSPLIT, $0-0
VSEL TT0, T0, SEL1, T0 \
VSEL TT1, T1, SEL1, T1 \
TEXT ·gfpNegAsm(SB),0,$0-16
TEXT ·gfpNeg(SB),0,$0-16
MOVD c+0(FP), R3
MOVD a+8(FP), R4
@ -98,7 +109,7 @@ TEXT ·gfpNegAsm(SB),0,$0-16
STXVD2X T1, (R5+R3)
RET
TEXT ·gfpSubAsm(SB),0,$0-24
TEXT ·gfpSub(SB),0,$0-24
MOVD c+0(FP), R3
MOVD a+8(FP), R4
MOVD b+16(FP), R5
@ -144,7 +155,7 @@ TEXT ·gfpSubAsm(SB),0,$0-24
VSEL TT0, T0, SEL1, T0 \
VSEL TT1, T1, SEL1, T1
TEXT ·gfpAddAsm(SB),0,$0-24
TEXT ·gfpAdd(SB),0,$0-24
MOVD c+0(FP), R3
MOVD a+8(FP), R4
MOVD b+16(FP), R5
@ -177,7 +188,7 @@ TEXT ·gfpAddAsm(SB),0,$0-24
STXVD2X T1, (R6+R3)
RET
TEXT ·gfpDoubleAsm(SB),0,$0-16
TEXT ·gfpDouble(SB),0,$0-16
MOVD c+0(FP), R3
MOVD a+8(FP), R4
@ -204,7 +215,7 @@ TEXT ·gfpDoubleAsm(SB),0,$0-16
STXVD2X T1, (R6+R3)
RET
TEXT ·gfpTripleAsm(SB),0,$0-16
TEXT ·gfpTriple(SB),0,$0-16
MOVD c+0(FP), R3
MOVD a+8(FP), R4
@ -726,7 +737,7 @@ TEXT gfpMulInternal<>(SB), NOSPLIT, $0
#define T1 V7
#define K0 V31
TEXT ·gfpMulAsm(SB),NOSPLIT,$0
TEXT ·gfpMul(SB),NOSPLIT,$0
MOVD c+0(FP), res_ptr
MOVD a+8(FP), x_ptr
MOVD b+16(FP), y_ptr
@ -766,7 +777,7 @@ TEXT ·gfpMulAsm(SB),NOSPLIT,$0
RET
// func gfpSqr(res, in *gfP, n int)
TEXT ·gfpSqrAsm(SB),NOSPLIT,$0
TEXT ·gfpSqr(SB),NOSPLIT,$0
MOVD res+0(FP), res_ptr
MOVD in+8(FP), x_ptr
MOVD n+16(FP), N
@ -825,3 +836,102 @@ done:
#undef T0
#undef T1
#undef K0
/* ---------------------------------------*/
#define res_ptr R3
#define x_ptr R4
#define CPOOL R7
#define M0 V5
#define M1 V4
#define T0 V6
#define T1 V7
#define T2 V8
#define ADD1 V16
#define ADD1H V17
#define ADD2 V18
#define ADD2H V19
#define RED1 V20
#define RED1H V21
#define RED2 V22
#define RED2H V23
#define CAR1 V24
#define CAR1M V25
#define MK0 V30
#define K0 V31
// TMP1, TMP2 used in
// VMULT macros
#define TMP1 V13
#define TMP2 V27
#define ONE V29 // 1s splatted by word
// func gfpFromMont(res, in *gfP)
TEXT ·gfpFromMont(SB),NOSPLIT,$0
MOVD res+0(FP), res_ptr
MOVD in+8(FP), x_ptr
MOVD $16, R16
LXVD2X (R0)(x_ptr), T0
LXVD2X (R16)(x_ptr), T1
XXPERMDI T0, T0, $2, T0
XXPERMDI T1, T1, $2, T1
MOVD $·p2+0(SB), CPOOL
LXVD2X (CPOOL)(R0), M0
LXVD2X (CPOOL)(R16), M1
XXPERMDI M0, M0, $2, M0
XXPERMDI M1, M1, $2, M1
MOVD $·np+0(SB), CPOOL
LXVD2X (CPOOL)(R0), K0
VSPLTW $1, K0, K0
// ---------------------------------------------------------------------------/
VSPLTISW $1, ONE
VSPLTISB $0, T2 // VZERO T2
MOVD $8, R5
MOVD R5, CTR
loop:
VMULUWM T0, K0, MK0
VSPLTW $3, MK0, MK0
VMULT_ADD(M0, MK0, T0, ONE, RED1, RED1H)
VMULT_ADD(M1, MK0, T1, ONE, RED2, RED2H)
VSLDOI $12, RED2, RED1, RED1 // VSLDB
VSLDOI $12, T2, RED2, RED2 // VSLDB
VADDCUQ RED1H, RED1, CAR1M // VACCQ
VADDUQM RED1H, RED1, T0 // VAQ
// << ready for next MK0
VADDECUQ RED2H, RED2, CAR1M, T2 // VACCCQ
VADDEUQM RED2H, RED2, CAR1M, T1 // VACQ
BDNZ loop
// ---------------------------------------------------
VSPLTISB $0, RED1 // VZERO RED1
VSUBCUQ T0, M0, CAR1 // VSCBIQ
VSUBUQM T0, M0, ADD1 // VSQ
VSUBECUQ T1, M1, CAR1, CAR1M // VSBCBIQ
VSUBEUQM T1, M1, CAR1, ADD2 // VSBIQ
VSUBEUQM T2, RED1, CAR1M, T2 // VSBIQ
// what output to use, ADD2||ADD1 or T1||T0?
VSEL ADD1, T0, T2, T0
VSEL ADD2, T1, T2, T1
XXPERMDI T0, T0, $2, T0
XXPERMDI T1, T1, $2, T1
STXVD2X T0, (R0)(res_ptr)
STXVD2X T1, (R16)(res_ptr)
RET

View File

@ -1,138 +0,0 @@
// Copyright 2024 Sun Yimin. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
//go:build (ppc64 || ppc64le) && !purego
package bn256
import "testing"
func TestGfpNegAsm(t *testing.T) {
x := fromBigInt(bigFromHex("9093a2b979e6186f43a9b28d41ba644d533377f2ede8c66b19774bf4a9c7a596"))
got := &gfP{}
gfpSubAsm(got, zero, x)
expected := &gfP{}
gfpNegAsm(expected, x)
if *expected != *got {
t.Errorf("got %v, expected %v", got, expected)
}
gfpSubAsm(got, zero, zero)
gfpNegAsm(expected, zero)
if *expected != *got {
t.Errorf("got %v, expected %v", got, expected)
}
}
func TestGfpAsmBasicOperations(t *testing.T) {
x := fromBigInt(bigFromHex("85AEF3D078640C98597B6027B441A01FF1DD2C190F5E93C454806C11D8806141"))
y := fromBigInt(bigFromHex("3722755292130B08D2AAB97FD34EC120EE265948D19C17ABF9B7213BAF82D65B"))
expectedAdd := fromBigInt(bigFromHex("0691692307d370af56226e57920199fbbe10f216c67fbc9468c7f225a4b1f21f"))
expectedDouble := fromBigInt(bigFromHex("551de7a0ee24723edcf314ff72f478fac1c7c4e7044238acc3913cfbcdaf7d05"))
expectedSub := fromBigInt(bigFromHex("67b381821c52a5624f3304a8149be8461e3bc07adcb872c38aa65051ba53ba97"))
expectedNeg := fromBigInt(bigFromHex("7f1d8aad70909be90358f1d02240062433cc3a0248ded72febb879ec33ce6f22"))
expectedMul := fromBigInt(bigFromHex("3d08bbad376584e4f74bd31f78f716372b96ba8c3f939c12b8d54e79b6489e76"))
expectedMul2 := fromBigInt(bigFromHex("1df94a9e05a559ff38e0ab50cece734dc058d33738ceacaa15986a67cbff1ef6"))
t.Parallel()
t.Run("add", func(t *testing.T) {
ret := &gfP{}
gfpAddAsm(ret, x, y)
if *expectedAdd != *ret {
t.Errorf("add not same")
}
x1 := &gfP{}
x1.Set(x)
gfpAddAsm(x1, x1, y)
if *expectedAdd != *x1 {
t.Errorf("add not same when add self")
}
})
t.Run("double", func(t *testing.T) {
ret := &gfP{}
gfpDoubleAsm(ret, x)
if ret.Equal(expectedDouble) != 1 {
t.Errorf("double not same, got %v, expected %v", ret, expectedDouble)
}
ret.Set(x)
gfpDoubleAsm(ret, ret)
if ret.Equal(expectedDouble) != 1 {
t.Errorf("double not same, got %v, expected %v", ret, expectedDouble)
}
})
t.Run("triple", func(t *testing.T) {
expected := &gfP{}
gfpAddAsm(expected, x, expectedDouble)
ret := &gfP{}
ret.Set(x)
gfpTripleAsm(ret, ret)
if ret.Equal(expected) != 1 {
t.Errorf("expected %v, got %v", expected, ret)
}
})
t.Run("sub", func(t *testing.T) {
ret := &gfP{}
gfpSubAsm(ret, y, x)
if *expectedSub != *ret {
t.Errorf("sub not same")
}
x1 := &gfP{}
x1.Set(x)
gfpSubAsm(x1, y, x1)
if *expectedSub != *x1 {
t.Errorf("sub not same when sub self")
}
gfpSubAsm(ret, x, x)
if *ret != *zero {
t.Errorf("expected zero")
}
})
t.Run("neg", func(t *testing.T) {
ret := &gfP{}
gfpNegAsm(ret, y)
if *expectedNeg != *ret {
t.Errorf("neg not same")
}
ret.Set(y)
gfpNegAsm(ret, ret)
if *expectedNeg != *ret {
t.Errorf("neg not same when neg self")
}
})
t.Run("mul", func(t *testing.T) {
ret := &gfP{}
gfpMulAsm(ret, x, y)
if *expectedMul != *ret {
t.Errorf("mul not same")
}
ret.Set(x)
gfpMulAsm(ret, ret, y)
if *expectedMul != *ret {
t.Errorf("mul not same when mul self")
}
})
t.Run("square", func(t *testing.T) {
ret, ret1, ret2 := &gfP{}, &gfP{}, &gfP{}
gfpMulAsm(ret, x, y)
gfpMulAsm(ret1, ret, ret)
if *ret1 != *expectedMul2 {
t.Errorf("mul not same")
}
gfpMulAsm(ret1, ret1, ret1)
gfpSqrAsm(ret2, ret, 2)
if *ret1 != *ret2 {
t.Errorf("mul/sqr not same")
}
ret2.Set(ret)
gfpSqrAsm(ret2, ret2, 2)
if *ret1 != *ret2 {
t.Errorf("mul/sqr not same when square self")
}
})
}