store SM2 precomputed basepoint table in source

2025-04-27 20:56:18 +08:00 · 2021-11-04 10:00:50 +08:00 · 2021-11-04 10:00:50 +08:00 · e1fa144e15
commit e1fa144e15
parent 8a2098aa26
5 changed files with 1593 additions and 52 deletions
--- a/sm2/gen_p256_table.go
+++ b/sm2/gen_p256_table.go
@ -0,0 +1,96 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64
+// +build amd64
+
+package sm2
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"go/format"
+)
+
+func GenTables() {
+	buf := new(bytes.Buffer)
+	fmt.Fprint(buf, `
+// Generated by gen_p256_table.go. DO NOT EDIT.
+//go:build amd64
+// +build amd64
+package sm2
+`[1:])
+
+	// Generate precomputed p256 tables.
+	var pre [43][32 * 8]uint64
+	basePoint := []uint64{
+		0x61328990f418029e, 0x3e7981eddca6c050, 0xd6a1ed99ac24c3c3, 0x91167a5ee1c13b05,
+		0xc1354e593c2d0ddd, 0xc1f5e5788d3295fa, 0x8d4cfb066e2a48f8, 0x63cd65d481d735bd,
+		0x0000000000000001, 0x00000000ffffffff, 0x0000000000000000, 0x0000000100000000,
+	}
+	t1 := make([]uint64, 12)
+	t2 := make([]uint64, 12)
+	copy(t2, basePoint)
+	zInv := make([]uint64, 4)
+	zInvSq := make([]uint64, 4)
+	for j := 0; j < 32; j++ {
+		copy(t1, t2)
+		for i := 0; i < 43; i++ {
+			// The window size is 6 so we need to double 6 times.
+			if i != 0 {
+				for k := 0; k < 6; k++ {
+					p256PointDoubleAsm(t1, t1)
+				}
+			}
+			// Convert the point to affine form. (Its values are
+			// still in Montgomery form however.)
+			p256Inverse(zInv, t1[8:12])
+			p256Sqr(zInvSq, zInv, 1)
+			p256Mul(zInv, zInv, zInvSq)
+			p256Mul(t1[:4], t1[:4], zInvSq)
+			p256Mul(t1[4:8], t1[4:8], zInv)
+			copy(t1[8:12], basePoint[8:12])
+			// Update the table entry
+			copy(pre[i][j*8:], t1[:8])
+		}
+		if j == 0 {
+			p256PointDoubleAsm(t2, basePoint)
+		} else {
+			p256PointAddAsm(t2, t2, basePoint)
+		}
+	}
+
+	fmt.Fprint(buf, "const p256Precomputed = \"\" +\n\n")
+
+	// Dump the precomputed tables, flattened, little-endian.
+	// These tables are used directly by assembly on little-endian platforms.
+	// Putting the data in a const string lets it be stored readonly.
+	for i := range &pre {
+		for j, v := range &pre[i] {
+			fmt.Fprintf(buf, "\"")
+			var u8 [8]byte
+			binary.LittleEndian.PutUint64(u8[:], v)
+			for _, b := range &u8 {
+				fmt.Fprintf(buf, "\\x%02x", b)
+			}
+			fmt.Fprintf(buf, "\"")
+			if i < len(pre)-1 || j < len(pre[i])-1 {
+				fmt.Fprint(buf, "+")
+			}
+			if j%8 == 7 {
+				fmt.Fprint(buf, "\n")
+			}
+		}
+		fmt.Fprint(buf, "\n")
+	}
+
+	src := buf.Bytes()
+	fmtsrc, fmterr := format.Source(src)
+	// If formatting failed, keep the original source for debugging.
+	if fmterr == nil {
+		src = fmtsrc
+	}
+	fmt.Println(string(src))
+}
--- a/sm2/p256_asm.go
+++ b/sm2/p256_asm.go
@ -15,7 +15,6 @@ package sm2
 import (
 	"crypto/elliptic"
 	"math/big"
-	"sync"
 )

 type (
@ -29,9 +28,7 @@ type (
 )

 var (
-	p256            p256Curve
-	p256Precomputed *[43][32 * 8]uint64
-	precomputeOnce  sync.Once
+	p256 p256Curve
 )

 func initP256() {
@ -82,7 +79,7 @@ func p256LittleToBig(res []byte, in []uint64)
 func p256Select(point, table []uint64, idx int)

 //go:noescape
-func p256SelectBase(point, table []uint64, idx int)
+func p256SelectBase(point *[12]uint64, table string, idx int)

 // Montgomery multiplication modulo Ord(G)
 //go:noescape
@ -446,52 +443,10 @@ func boothW6(in uint) (int, int) {
 	return int(d), int(s & 1)
 }

-// table[i][j] = (2^(6*i))*(j+1)*G mod P
-func initTable() {
-	p256Precomputed = new([43][32 * 8]uint64)
-
-	t1 := make([]uint64, 12)
-	t2 := make([]uint64, 12)
-	copy(t2, basePoint)
-
-	zInv := make([]uint64, 4)
-	zInvSq := make([]uint64, 4)
-	for j := 0; j < 32; j++ {
-		copy(t1, t2)
-		for i := 0; i < 43; i++ {
-			// The window size is 6 so we need to double 6 times.
-			if i != 0 {
-				for k := 0; k < 6; k++ {
-					p256PointDoubleAsm(t1, t1)
-				}
-			}
-			// Convert the point to affine form. (Its values are
-			// still in Montgomery form however.)
-			p256Inverse(zInv, t1[8:12])
-			p256Sqr(zInvSq, zInv, 1)
-			p256Mul(zInv, zInv, zInvSq)
-
-			p256Mul(t1[:4], t1[:4], zInvSq)
-			p256Mul(t1[4:8], t1[4:8], zInv)
-
-			copy(t1[8:12], basePoint[8:12])
-			// Update the table entry
-			copy(p256Precomputed[i][j*8:], t1[:8])
-		}
-		if j == 0 {
-			p256PointDoubleAsm(t2, basePoint)
-		} else {
-			p256PointAddAsm(t2, t2, basePoint)
-		}
-	}
-}
-
 func (p *p256Point) p256BaseMult(scalar []uint64) {
-	precomputeOnce.Do(initTable)
-
 	wvalue := (scalar[0] << 1) & 0x7f
 	sel, sign := boothW6(uint(wvalue))
-	p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel)
+	p256SelectBase(&p.xyz, p256Precomputed, sel)
 	p256NegCond(p.xyz[4:8], sign)

 	// (This is one, in the Montgomery domain.)
@ -518,7 +473,7 @@ func (p *p256Point) p256BaseMult(scalar []uint64) {
 		}
 		index += 6
 		sel, sign = boothW6(uint(wvalue))
-		p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel)
+		p256SelectBase(&t0.xyz, p256Precomputed[i*32*8*8:], sel)
 		p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
 		zero |= sel
 	}
--- a/sm2/p256_asm_amd64.s
+++ b/sm2/p256_asm_amd64.s
@ -773,10 +773,10 @@ loop_select:
 	RET
 /* ---------------------------------------*/
 // Constant time point access to base point table.
-// func p256SelectBase(point, table []uint64, idx int)
+// func p256SelectBase(point *[12]uint64, table string, idx int)
 TEXT ·p256SelectBase(SB),NOSPLIT,$0
-	MOVQ idx+48(FP),AX
-	MOVQ table+24(FP),DI
+	MOVQ idx+24(FP),AX
+	MOVQ table+8(FP),DI
 	MOVQ point+0(FP),DX

 	PXOR X15, X15	// X15 = 0
--- a/sm2/p256_asm_table.go
+++ b/sm2/p256_asm_table.go
--- a/sm2/p256_asm_table_test.go
+++ b/sm2/p256_asm_table_test.go
@ -0,0 +1,65 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64
+// +build amd64
+
+package sm2
+
+import (
+	"encoding/binary"
+	"reflect"
+	"testing"
+)
+
+func TestP256PrecomputedTable(t *testing.T) {
+
+	basePoint := []uint64{
+		0x61328990f418029e, 0x3e7981eddca6c050, 0xd6a1ed99ac24c3c3, 0x91167a5ee1c13b05,
+		0xc1354e593c2d0ddd, 0xc1f5e5788d3295fa, 0x8d4cfb066e2a48f8, 0x63cd65d481d735bd,
+		0x0000000000000001, 0x00000000ffffffff, 0x0000000000000000, 0x0000000100000000,
+	}
+	t1 := make([]uint64, 12)
+	t2 := make([]uint64, 12)
+	copy(t2, basePoint)
+
+	zInv := make([]uint64, 4)
+	zInvSq := make([]uint64, 4)
+	for j := 0; j < 32; j++ {
+		copy(t1, t2)
+		for i := 0; i < 43; i++ {
+			// The window size is 6 so we need to double 6 times.
+			if i != 0 {
+				for k := 0; k < 6; k++ {
+					p256PointDoubleAsm(t1, t1)
+				}
+			}
+			// Convert the point to affine form. (Its values are
+			// still in Montgomery form however.)
+			p256Inverse(zInv, t1[8:12])
+			p256Sqr(zInvSq, zInv, 1)
+			p256Mul(zInv, zInv, zInvSq)
+
+			p256Mul(t1[:4], t1[:4], zInvSq)
+			p256Mul(t1[4:8], t1[4:8], zInv)
+
+			copy(t1[8:12], basePoint[8:12])
+
+			buf := make([]byte, 8*8)
+			for i, u := range t1[:8] {
+				binary.LittleEndian.PutUint64(buf[i*8:i*8+8], u)
+			}
+			start := i*32*8*8 + j*8*8
+			if got, want := p256Precomputed[start:start+64], string(buf); !reflect.DeepEqual(got, want) {
+				t.Fatalf("Unexpected table entry at [%d][%d:%d]: got %v, want %v", i, j*8, (j*8)+8, got, want)
+			}
+		}
+		if j == 0 {
+			p256PointDoubleAsm(t2, basePoint)
+		} else {
+			p256PointAddAsm(t2, t2, basePoint)
+		}
+	}
+
+}