store SM2 precomputed basepoint table in source

2025-04-28 05:06:18 +08:00 · 2021-11-04 10:00:50 +08:00 · 2021-11-04 10:00:50 +08:00 · e1fa144e15
commit e1fa144e15
parent 8a2098aa26
5 changed files with 1593 additions and 52 deletions
--- a/sm2/gen_p256_table.go
+++ b/sm2/gen_p256_table.go
@ -0,0 +1,96 @@
 // Copyright 2021 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 //go:build amd64
 // +build amd64
 package sm2
 import (
 	"bytes"
 	"encoding/binary"
 	"fmt"
 	"go/format"
 )
 func GenTables() {
 	buf := new(bytes.Buffer)
 	fmt.Fprint(buf, `
 // Generated by gen_p256_table.go. DO NOT EDIT.
 //go:build amd64
 // +build amd64
 package sm2
 `[1:])
 	// Generate precomputed p256 tables.
 	var pre [43][32 * 8]uint64
 	basePoint := []uint64{
 		0x61328990f418029e, 0x3e7981eddca6c050, 0xd6a1ed99ac24c3c3, 0x91167a5ee1c13b05,
 		0xc1354e593c2d0ddd, 0xc1f5e5788d3295fa, 0x8d4cfb066e2a48f8, 0x63cd65d481d735bd,
 		0x0000000000000001, 0x00000000ffffffff, 0x0000000000000000, 0x0000000100000000,
 	}
 	t1 := make([]uint64, 12)
 	t2 := make([]uint64, 12)
 	copy(t2, basePoint)
 	zInv := make([]uint64, 4)
 	zInvSq := make([]uint64, 4)
 	for j := 0; j < 32; j++ {
 		copy(t1, t2)
 		for i := 0; i < 43; i++ {
 			// The window size is 6 so we need to double 6 times.
 			if i != 0 {
 				for k := 0; k < 6; k++ {
 					p256PointDoubleAsm(t1, t1)
 				}
 			}
 			// Convert the point to affine form. (Its values are
 			// still in Montgomery form however.)
 			p256Inverse(zInv, t1[8:12])
 			p256Sqr(zInvSq, zInv, 1)
 			p256Mul(zInv, zInv, zInvSq)
 			p256Mul(t1[:4], t1[:4], zInvSq)
 			p256Mul(t1[4:8], t1[4:8], zInv)
 			copy(t1[8:12], basePoint[8:12])
 			// Update the table entry
 			copy(pre[i][j*8:], t1[:8])
 		}
 		if j == 0 {
 			p256PointDoubleAsm(t2, basePoint)
 		} else {
 			p256PointAddAsm(t2, t2, basePoint)
 		}
 	}
 	fmt.Fprint(buf, "const p256Precomputed = \"\" +\n\n")
 	// Dump the precomputed tables, flattened, little-endian.
 	// These tables are used directly by assembly on little-endian platforms.
 	// Putting the data in a const string lets it be stored readonly.
 	for i := range &pre {
 		for j, v := range &pre[i] {
 			fmt.Fprintf(buf, "\"")
 			var u8 [8]byte
 			binary.LittleEndian.PutUint64(u8[:], v)
 			for _, b := range &u8 {
 				fmt.Fprintf(buf, "\\x%02x", b)
 			}
 			fmt.Fprintf(buf, "\"")
 			if i < len(pre)-1 || j < len(pre[i])-1 {
 				fmt.Fprint(buf, "+")
 			}
 			if j%8 == 7 {
 				fmt.Fprint(buf, "\n")
 			}
 		}
 		fmt.Fprint(buf, "\n")
 	}
 	src := buf.Bytes()
 	fmtsrc, fmterr := format.Source(src)
 	// If formatting failed, keep the original source for debugging.
 	if fmterr == nil {
 		src = fmtsrc
 	}
 	fmt.Println(string(src))
 }
--- a/sm2/p256_asm.go
+++ b/sm2/p256_asm.go
@ -15,7 +15,6 @@ package sm2
 import (
 	"crypto/elliptic"
 	"math/big"
 	"sync"
 )
 type (
@ -30,8 +29,6 @@ type (
 var (
 	p256 p256Curve
 	p256Precomputed *[43][32 * 8]uint64
 	precomputeOnce  sync.Once
 )
 func initP256() {
@ -82,7 +79,7 @@ func p256LittleToBig(res []byte, in []uint64)
 func p256Select(point, table []uint64, idx int)
 //go:noescape
-func p256SelectBase(point, table []uint64, idx int)
+func p256SelectBase(point *[12]uint64, table string, idx int)
 // Montgomery multiplication modulo Ord(G)
 //go:noescape
@ -446,52 +443,10 @@ func boothW6(in uint) (int, int) {
 	return int(d), int(s & 1)
 }
 // table[i][j] = (2^(6*i))*(j+1)*G mod P
 func initTable() {
 	p256Precomputed = new([43][32 * 8]uint64)
 	t1 := make([]uint64, 12)
 	t2 := make([]uint64, 12)
 	copy(t2, basePoint)
 	zInv := make([]uint64, 4)
 	zInvSq := make([]uint64, 4)
 	for j := 0; j < 32; j++ {
 		copy(t1, t2)
 		for i := 0; i < 43; i++ {
 			// The window size is 6 so we need to double 6 times.
 			if i != 0 {
 				for k := 0; k < 6; k++ {
 					p256PointDoubleAsm(t1, t1)
 				}
 			}
 			// Convert the point to affine form. (Its values are
 			// still in Montgomery form however.)
 			p256Inverse(zInv, t1[8:12])
 			p256Sqr(zInvSq, zInv, 1)
 			p256Mul(zInv, zInv, zInvSq)
 			p256Mul(t1[:4], t1[:4], zInvSq)
 			p256Mul(t1[4:8], t1[4:8], zInv)
 			copy(t1[8:12], basePoint[8:12])
 			// Update the table entry
 			copy(p256Precomputed[i][j*8:], t1[:8])
 		}
 		if j == 0 {
 			p256PointDoubleAsm(t2, basePoint)
 		} else {
 			p256PointAddAsm(t2, t2, basePoint)
 		}
 	}
 }
 func (p *p256Point) p256BaseMult(scalar []uint64) {
 	precomputeOnce.Do(initTable)
 	wvalue := (scalar[0] << 1) & 0x7f
 	sel, sign := boothW6(uint(wvalue))
-	p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel)
+	p256SelectBase(&p.xyz, p256Precomputed, sel)
 	p256NegCond(p.xyz[4:8], sign)
 	// (This is one, in the Montgomery domain.)
@ -518,7 +473,7 @@ func (p *p256Point) p256BaseMult(scalar []uint64) {
 		}
 		index += 6
 		sel, sign = boothW6(uint(wvalue))
-		p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel)
+		p256SelectBase(&t0.xyz, p256Precomputed[i*32*8*8:], sel)
 		p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
 		zero |= sel
 	}
--- a/sm2/p256_asm_amd64.s
+++ b/sm2/p256_asm_amd64.s
@ -773,10 +773,10 @@ loop_select:
 	RET
 /* ---------------------------------------*/
 // Constant time point access to base point table.
-// func p256SelectBase(point, table []uint64, idx int)
+// func p256SelectBase(point *[12]uint64, table string, idx int)
 TEXT ·p256SelectBase(SB),NOSPLIT,$0
-	MOVQ idx+48(FP),AX
+	MOVQ idx+24(FP),AX
-	MOVQ table+24(FP),DI
+	MOVQ table+8(FP),DI
 	MOVQ point+0(FP),DX
 	PXOR X15, X15	// X15 = 0
--- a/sm2/p256_asm_table.go
+++ b/sm2/p256_asm_table.go
--- a/sm2/p256_asm_table_test.go
+++ b/sm2/p256_asm_table_test.go
@ -0,0 +1,65 @@
 // Copyright 2021 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 //go:build amd64
 // +build amd64
 package sm2
 import (
 	"encoding/binary"
 	"reflect"
 	"testing"
 )
 func TestP256PrecomputedTable(t *testing.T) {
 	basePoint := []uint64{
 		0x61328990f418029e, 0x3e7981eddca6c050, 0xd6a1ed99ac24c3c3, 0x91167a5ee1c13b05,
 		0xc1354e593c2d0ddd, 0xc1f5e5788d3295fa, 0x8d4cfb066e2a48f8, 0x63cd65d481d735bd,
 		0x0000000000000001, 0x00000000ffffffff, 0x0000000000000000, 0x0000000100000000,
 	}
 	t1 := make([]uint64, 12)
 	t2 := make([]uint64, 12)
 	copy(t2, basePoint)
 	zInv := make([]uint64, 4)
 	zInvSq := make([]uint64, 4)
 	for j := 0; j < 32; j++ {
 		copy(t1, t2)
 		for i := 0; i < 43; i++ {
 			// The window size is 6 so we need to double 6 times.
 			if i != 0 {
 				for k := 0; k < 6; k++ {
 					p256PointDoubleAsm(t1, t1)
 				}
 			}
 			// Convert the point to affine form. (Its values are
 			// still in Montgomery form however.)
 			p256Inverse(zInv, t1[8:12])
 			p256Sqr(zInvSq, zInv, 1)
 			p256Mul(zInv, zInv, zInvSq)
 			p256Mul(t1[:4], t1[:4], zInvSq)
 			p256Mul(t1[4:8], t1[4:8], zInv)
 			copy(t1[8:12], basePoint[8:12])
 			buf := make([]byte, 8*8)
 			for i, u := range t1[:8] {
 				binary.LittleEndian.PutUint64(buf[i*8:i*8+8], u)
 			}
 			start := i*32*8*8 + j*8*8
 			if got, want := p256Precomputed[start:start+64], string(buf); !reflect.DeepEqual(got, want) {
 				t.Fatalf("Unexpected table entry at [%d][%d:%d]: got %v, want %v", i, j*8, (j*8)+8, got, want)
 			}
 		}
 		if j == 0 {
 			p256PointDoubleAsm(t2, basePoint)
 		} else {
 			p256PointAddAsm(t2, t2, basePoint)
 		}
 	}
 }