MAGIC - step 2, completed part of asm

This commit is contained in:
Emman 2021-02-08 19:39:39 +08:00
parent 4de671cd8b
commit 1a46185db8
2 changed files with 241 additions and 71 deletions

View File

@ -24,8 +24,8 @@
#define t0 R14 #define t0 R14
#define t1 R15 #define t1 R15
DATA p256const0<>+0x00(SB)/8, $0x00000000ffffffff DATA p256const0<>+0x00(SB)/8, $0xffffffff00000000
DATA p256const1<>+0x00(SB)/8, $0xffffffff00000001 DATA p256const1<>+0x00(SB)/8, $0xfffffffeffffffff
DATA p256p<>+0x00(SB)/8, $0xffffffffffffffff DATA p256p<>+0x00(SB)/8, $0xffffffffffffffff
DATA p256p<>+0x08(SB)/8, $0xffffffff00000000 DATA p256p<>+0x08(SB)/8, $0xffffffff00000000
DATA p256p<>+0x10(SB)/8, $0xffffffffffffffff DATA p256p<>+0x10(SB)/8, $0xffffffffffffffff
@ -140,9 +140,9 @@ TEXT ·p256NegCond(SB),NOSPLIT,$0
MOVQ cond+24(FP), t0 MOVQ cond+24(FP), t0
// acc = poly // acc = poly
MOVQ $-1, acc0 MOVQ $-1, acc0
MOVQ p256const0<>(SB), acc1 MOVQ p256p<>+0x08(SB), acc1
MOVQ $0, acc2 MOVQ $-1, acc2
MOVQ p256const1<>(SB), acc3 MOVQ p256p<>+0x18(SB), acc3
// Load the original value // Load the original value
MOVQ (8*0)(res_ptr), acc5 MOVQ (8*0)(res_ptr), acc5
MOVQ (8*1)(res_ptr), x_ptr MOVQ (8*1)(res_ptr), x_ptr
@ -254,50 +254,95 @@ sqrLoop:
ADCQ DX, t1 ADCQ DX, t1
MOVQ t1, x_ptr MOVQ t1, x_ptr
// First reduction step // First reduction step
MOVQ acc0, AX MOVQ p256p<>+0x08(SB), AX
MOVQ acc0, t1 MULQ acc0
SHLQ $32, acc0 ADDQ acc0, acc1
MULQ p256const1<>(SB) ADCQ $0, DX
SHRQ $32, t1 ADDQ AX, acc1
ADDQ acc0, acc1 ADCQ $0, DX
ADCQ t1, acc2 MOVQ DX, t1
ADCQ AX, acc3 MOVQ p256p<>+0x010(SB), AX
MULQ acc0
ADDQ t1, acc2
ADCQ $0, DX
ADDQ AX, acc2
ADCQ $0, DX
MOVQ DX, t1
MOVQ p256p<>+0x018(SB), AX
MULQ acc0
ADDQ t1, acc3
ADCQ $0, DX
ADDQ AX, acc3
ADCQ $0, DX ADCQ $0, DX
MOVQ DX, acc0 MOVQ DX, acc0
// Second reduction step // Second reduction step
MOVQ acc1, AX MOVQ p256p<>+0x08(SB), AX
MOVQ acc1, t1 MULQ acc1
SHLQ $32, acc1 ADDQ acc1, acc2
MULQ p256const1<>(SB) ADCQ $0, DX
SHRQ $32, t1 ADDQ AX, acc2
ADDQ acc1, acc2 ADCQ $0, DX
ADCQ t1, acc3 MOVQ DX, t1
ADCQ AX, acc0 MOVQ p256p<>+0x010(SB), AX
MULQ acc1
ADDQ t1, acc3
ADCQ $0, DX
ADDQ AX, acc3
ADCQ $0, DX
MOVQ DX, t1
MOVQ p256p<>+0x018(SB), AX
MULQ acc1
ADDQ t1, acc0
ADCQ $0, DX
ADDQ AX, acc0
ADCQ $0, DX ADCQ $0, DX
MOVQ DX, acc1 MOVQ DX, acc1
// Third reduction step // Third reduction step
MOVQ acc2, AX MOVQ p256p<>+0x08(SB), AX
MOVQ acc2, t1 MULQ acc2
SHLQ $32, acc2 ADDQ acc2, acc3
MULQ p256const1<>(SB) ADCQ $0, DX
SHRQ $32, t1 ADDQ AX, acc3
ADDQ acc2, acc3 ADCQ $0, DX
ADCQ t1, acc0 MOVQ DX, t1
ADCQ AX, acc1 MOVQ p256p<>+0x010(SB), AX
MULQ acc2
ADDQ t1, acc0
ADCQ $0, DX
ADDQ AX, acc0
ADCQ $0, DX
MOVQ DX, t1
MOVQ p256p<>+0x018(SB), AX
MULQ acc2
ADDQ t1, acc1
ADCQ $0, DX
ADDQ AX, acc1
ADCQ $0, DX ADCQ $0, DX
MOVQ DX, acc2 MOVQ DX, acc2
// Last reduction step // Last reduction step
XORQ t0, t0 XORQ t0, t0
MOVQ acc3, AX MOVQ p256p<>+0x08(SB), AX
MOVQ acc3, t1 MULQ acc3
SHLQ $32, acc3 ADDQ acc3, acc0
MULQ p256const1<>(SB) ADCQ $0, DX
SHRQ $32, t1 ADDQ AX, acc0
ADDQ acc3, acc0 ADCQ $0, DX
ADCQ t1, acc1 MOVQ DX, t1
ADCQ AX, acc2 MOVQ p256p<>+0x010(SB), AX
MULQ acc3
ADDQ t1, acc1
ADCQ $0, DX
ADDQ AX, acc1
ADCQ $0, DX
MOVQ DX, t1
MOVQ p256p<>+0x018(SB), AX
MULQ acc3
ADDQ t1, acc2
ADCQ $0, DX
ADDQ AX, acc2
ADCQ $0, DX ADCQ $0, DX
MOVQ DX, acc3 MOVQ DX, acc3
// Add bits [511:256] of the sqr result // Add bits [511:256] of the sqr result
ADCQ acc4, acc0 ADCQ acc4, acc0
ADCQ acc5, acc1 ADCQ acc5, acc1
@ -312,7 +357,7 @@ sqrLoop:
// Subtract p256 // Subtract p256
SUBQ $-1, acc0 SUBQ $-1, acc0
SBBQ p256const0<>(SB) ,acc1 SBBQ p256const0<>(SB) ,acc1
SBBQ $0, acc2 SBBQ $-1, acc2
SBBQ p256const1<>(SB), acc3 SBBQ p256const1<>(SB), acc3
SBBQ $0, t0 SBBQ $0, t0
@ -542,47 +587,92 @@ TEXT ·p256FromMont(SB),NOSPLIT,$0
// Only reduce, no multiplications are needed // Only reduce, no multiplications are needed
// First stage // First stage
MOVQ acc0, AX MOVQ p256p<>+0x08(SB), AX
MOVQ acc0, t1 MULQ acc0
SHLQ $32, acc0 ADDQ acc0, acc1
MULQ p256const1<>(SB) ADCQ $0, DX
SHRQ $32, t1 ADDQ AX, acc1
ADDQ acc0, acc1 ADCQ $0, DX
ADCQ t1, acc2 MOVQ DX, t1
ADCQ AX, acc3 MOVQ p256p<>+0x010(SB), AX
MULQ acc0
ADDQ t1, acc2
ADCQ $0, DX
ADDQ AX, acc2
ADCQ $0, DX
MOVQ DX, t1
MOVQ p256p<>+0x018(SB), AX
MULQ acc0
ADDQ t1, acc3
ADCQ $0, DX
ADDQ AX, acc3
ADCQ DX, acc4 ADCQ DX, acc4
XORQ acc5, acc5 XORQ acc5, acc5
// Second stage // Second stage
MOVQ acc1, AX MOVQ p256p<>+0x08(SB), AX
MOVQ acc1, t1 MULQ acc1
SHLQ $32, acc1 ADDQ acc1, acc2
MULQ p256const1<>(SB) ADCQ $0, DX
SHRQ $32, t1 ADDQ AX, acc2
ADDQ acc1, acc2 ADCQ $0, DX
ADCQ t1, acc3 MOVQ DX, t1
ADCQ AX, acc4 MOVQ p256p<>+0x010(SB), AX
MULQ acc1
ADDQ t1, acc3
ADCQ $0, DX
ADDQ AX, acc3
ADCQ $0, DX
MOVQ DX, t1
MOVQ p256p<>+0x018(SB), AX
MULQ acc1
ADDQ t1, acc4
ADCQ $0, DX
ADDQ AX, acc4
ADCQ DX, acc5 ADCQ DX, acc5
XORQ acc0, acc0 XORQ acc0, acc0
// Third stage // Third stage
MOVQ acc2, AX MOVQ p256p<>+0x08(SB), AX
MOVQ acc2, t1 MULQ acc2
SHLQ $32, acc2 ADDQ acc2, acc3
MULQ p256const1<>(SB) ADCQ $0, DX
SHRQ $32, t1 ADDQ AX, acc3
ADDQ acc2, acc3 ADCQ $0, DX
ADCQ t1, acc4 MOVQ DX, t1
ADCQ AX, acc5 MOVQ p256p<>+0x010(SB), AX
MULQ acc2
ADDQ t1, acc4
ADCQ $0, DX
ADDQ AX, acc4
ADCQ $0, DX
MOVQ DX, t1
MOVQ p256p<>+0x018(SB), AX
MULQ acc2
ADDQ t1, acc5
ADCQ $0, DX
ADDQ AX, acc5
ADCQ DX, acc0 ADCQ DX, acc0
XORQ acc1, acc1 XORQ acc1, acc1
// Last stage // Last stage
MOVQ acc3, AX MOVQ p256p<>+0x08(SB), AX
MOVQ acc3, t1 MULQ acc3
SHLQ $32, acc3 ADDQ acc3, acc4
MULQ p256const1<>(SB) ADCQ $0, DX
SHRQ $32, t1 ADDQ AX, acc4
ADDQ acc3, acc4 ADCQ $0, DX
ADCQ t1, acc5 MOVQ DX, t1
ADCQ AX, acc0 MOVQ p256p<>+0x010(SB), AX
MULQ acc3
ADDQ t1, acc5
ADCQ $0, DX
ADDQ AX, acc5
ADCQ $0, DX
MOVQ DX, t1
MOVQ p256p<>+0x018(SB), AX
MULQ acc3
ADDQ t1, acc0
ADCQ $0, DX
ADDQ AX, acc0
ADCQ DX, acc1 ADCQ DX, acc1
MOVQ acc4, x_ptr MOVQ acc4, x_ptr
@ -592,7 +682,7 @@ TEXT ·p256FromMont(SB),NOSPLIT,$0
SUBQ $-1, acc4 SUBQ $-1, acc4
SBBQ p256const0<>(SB), acc5 SBBQ p256const0<>(SB), acc5
SBBQ $0, acc0 SBBQ $-1, acc0
SBBQ p256const1<>(SB), acc1 SBBQ p256const1<>(SB), acc1
CMOVQCS x_ptr, acc4 CMOVQCS x_ptr, acc4

80
sm2/p256_asm_test.go Normal file
View File

@ -0,0 +1,80 @@
// +build amd64
package sm2
import (
"encoding/hex"
"fmt"
"math/big"
"testing"
)
func toBigInt(in []uint64) *big.Int {
var valBytes = make([]byte, 32)
p256LittleToBig(valBytes, in)
return new(big.Int).SetBytes(valBytes)
}
func Test_p256NegCond(t *testing.T) {
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
var val = []uint64{0x61328990f418029e, 0x3e7981eddca6c050, 0xd6a1ed99ac24c3c3, 0x91167a5ee1c13b05}
bigVal := toBigInt(val)
p256NegCond(val, 0)
bigVal1 := toBigInt(val)
if bigVal.Cmp(bigVal1) != 0 {
t.Fatal("should be same")
}
p256NegCond(val, 1)
bigVal1 = toBigInt(val)
if bigVal.Cmp(bigVal1) == 0 {
t.Fatal("should be different")
}
bigVal2 := new(big.Int).Sub(p, bigVal)
if bigVal2.Cmp(bigVal1) != 0 {
t.Fatal("should be same")
}
}
func Test_p256FromMont(t *testing.T) {
res := make([]uint64, 4)
p256FromMont(res, []uint64{0x0000000000000001, 0x00000000ffffffff, 0x0000000000000000, 0x0000000100000000})
res1 := (res[0] ^ 0x0000000000000001) | res[1] | res[2] | res[3]
if res1 != 0 {
t.FailNow()
}
x, _ := new(big.Int).SetString("32C4AE2C1F1981195F9904466A39C9948FE30BBFF2660BE1715A4589334C74C7", 16)
x1 := make([]uint64, 4)
p256BigToLittle(x1, x.Bytes())
p256FromMont(res, []uint64{0x61328990f418029e, 0x3e7981eddca6c050, 0xd6a1ed99ac24c3c3, 0x91167a5ee1c13b05})
if (res[0]^x1[0])|(res[1]^x1[1])|(res[2]^x1[2])|(res[3]^x1[3]) != 0 {
t.FailNow()
}
}
func Test_p256Sqr(t *testing.T) {
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
x, _ := new(big.Int).SetString("32C4AE2C1F1981195F9904466A39C9948FE30BBFF2660BE1715A4589334C74C7", 16)
one := []uint64{0x0000000000000001, 0x00000000ffffffff, 0x0000000000000000, 0x0000000100000000}
res := make([]uint64, 4)
p256Sqr(res, one, 2)
if (res[0]^one[0])|(res[1]^one[1])|(res[2]^one[2])|(res[3]^one[3]) != 0 {
t.FailNow()
}
gx := []uint64{0x61328990f418029e, 0x3e7981eddca6c050, 0xd6a1ed99ac24c3c3, 0x91167a5ee1c13b05}
p256Sqr(res, gx, 1)
//p256FromMont(res, res)
resInt := toBigInt(res)
fmt.Printf("1=%s\n", hex.EncodeToString(resInt.Bytes()))
gxsqr := new(big.Int).Mul(x, x)
gxsqr = new(big.Int).Mod(gxsqr, p)
gxsqr = new(big.Int).Mul(gxsqr, r)
gxsqr = new(big.Int).Mod(gxsqr, p)
fmt.Printf("2=%s\n", hex.EncodeToString(gxsqr.Bytes()))
if resInt.Cmp(gxsqr) != 0 {
t.FailNow()
}
}