mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-27 04:36:19 +08:00
MAGIC - step 2, completed part of asm
This commit is contained in:
parent
4de671cd8b
commit
1a46185db8
@ -24,8 +24,8 @@
|
|||||||
#define t0 R14
|
#define t0 R14
|
||||||
#define t1 R15
|
#define t1 R15
|
||||||
|
|
||||||
DATA p256const0<>+0x00(SB)/8, $0x00000000ffffffff
|
DATA p256const0<>+0x00(SB)/8, $0xffffffff00000000
|
||||||
DATA p256const1<>+0x00(SB)/8, $0xffffffff00000001
|
DATA p256const1<>+0x00(SB)/8, $0xfffffffeffffffff
|
||||||
DATA p256p<>+0x00(SB)/8, $0xffffffffffffffff
|
DATA p256p<>+0x00(SB)/8, $0xffffffffffffffff
|
||||||
DATA p256p<>+0x08(SB)/8, $0xffffffff00000000
|
DATA p256p<>+0x08(SB)/8, $0xffffffff00000000
|
||||||
DATA p256p<>+0x10(SB)/8, $0xffffffffffffffff
|
DATA p256p<>+0x10(SB)/8, $0xffffffffffffffff
|
||||||
@ -140,9 +140,9 @@ TEXT ·p256NegCond(SB),NOSPLIT,$0
|
|||||||
MOVQ cond+24(FP), t0
|
MOVQ cond+24(FP), t0
|
||||||
// acc = poly
|
// acc = poly
|
||||||
MOVQ $-1, acc0
|
MOVQ $-1, acc0
|
||||||
MOVQ p256const0<>(SB), acc1
|
MOVQ p256p<>+0x08(SB), acc1
|
||||||
MOVQ $0, acc2
|
MOVQ $-1, acc2
|
||||||
MOVQ p256const1<>(SB), acc3
|
MOVQ p256p<>+0x18(SB), acc3
|
||||||
// Load the original value
|
// Load the original value
|
||||||
MOVQ (8*0)(res_ptr), acc5
|
MOVQ (8*0)(res_ptr), acc5
|
||||||
MOVQ (8*1)(res_ptr), x_ptr
|
MOVQ (8*1)(res_ptr), x_ptr
|
||||||
@ -254,50 +254,95 @@ sqrLoop:
|
|||||||
ADCQ DX, t1
|
ADCQ DX, t1
|
||||||
MOVQ t1, x_ptr
|
MOVQ t1, x_ptr
|
||||||
// First reduction step
|
// First reduction step
|
||||||
MOVQ acc0, AX
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
MOVQ acc0, t1
|
MULQ acc0
|
||||||
SHLQ $32, acc0
|
ADDQ acc0, acc1
|
||||||
MULQ p256const1<>(SB)
|
ADCQ $0, DX
|
||||||
SHRQ $32, t1
|
ADDQ AX, acc1
|
||||||
ADDQ acc0, acc1
|
ADCQ $0, DX
|
||||||
ADCQ t1, acc2
|
MOVQ DX, t1
|
||||||
ADCQ AX, acc3
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
|
MULQ acc0
|
||||||
|
ADDQ t1, acc2
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc2
|
||||||
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc0
|
||||||
|
ADDQ t1, acc3
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc3
|
||||||
ADCQ $0, DX
|
ADCQ $0, DX
|
||||||
MOVQ DX, acc0
|
MOVQ DX, acc0
|
||||||
// Second reduction step
|
// Second reduction step
|
||||||
MOVQ acc1, AX
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
MOVQ acc1, t1
|
MULQ acc1
|
||||||
SHLQ $32, acc1
|
ADDQ acc1, acc2
|
||||||
MULQ p256const1<>(SB)
|
ADCQ $0, DX
|
||||||
SHRQ $32, t1
|
ADDQ AX, acc2
|
||||||
ADDQ acc1, acc2
|
ADCQ $0, DX
|
||||||
ADCQ t1, acc3
|
MOVQ DX, t1
|
||||||
ADCQ AX, acc0
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
|
MULQ acc1
|
||||||
|
ADDQ t1, acc3
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc3
|
||||||
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc1
|
||||||
|
ADDQ t1, acc0
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc0
|
||||||
ADCQ $0, DX
|
ADCQ $0, DX
|
||||||
MOVQ DX, acc1
|
MOVQ DX, acc1
|
||||||
// Third reduction step
|
// Third reduction step
|
||||||
MOVQ acc2, AX
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
MOVQ acc2, t1
|
MULQ acc2
|
||||||
SHLQ $32, acc2
|
ADDQ acc2, acc3
|
||||||
MULQ p256const1<>(SB)
|
ADCQ $0, DX
|
||||||
SHRQ $32, t1
|
ADDQ AX, acc3
|
||||||
ADDQ acc2, acc3
|
ADCQ $0, DX
|
||||||
ADCQ t1, acc0
|
MOVQ DX, t1
|
||||||
ADCQ AX, acc1
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
|
MULQ acc2
|
||||||
|
ADDQ t1, acc0
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc0
|
||||||
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc2
|
||||||
|
ADDQ t1, acc1
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc1
|
||||||
ADCQ $0, DX
|
ADCQ $0, DX
|
||||||
MOVQ DX, acc2
|
MOVQ DX, acc2
|
||||||
// Last reduction step
|
// Last reduction step
|
||||||
XORQ t0, t0
|
XORQ t0, t0
|
||||||
MOVQ acc3, AX
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
MOVQ acc3, t1
|
MULQ acc3
|
||||||
SHLQ $32, acc3
|
ADDQ acc3, acc0
|
||||||
MULQ p256const1<>(SB)
|
ADCQ $0, DX
|
||||||
SHRQ $32, t1
|
ADDQ AX, acc0
|
||||||
ADDQ acc3, acc0
|
ADCQ $0, DX
|
||||||
ADCQ t1, acc1
|
MOVQ DX, t1
|
||||||
ADCQ AX, acc2
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
|
MULQ acc3
|
||||||
|
ADDQ t1, acc1
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc1
|
||||||
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc3
|
||||||
|
ADDQ t1, acc2
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc2
|
||||||
ADCQ $0, DX
|
ADCQ $0, DX
|
||||||
MOVQ DX, acc3
|
MOVQ DX, acc3
|
||||||
|
|
||||||
// Add bits [511:256] of the sqr result
|
// Add bits [511:256] of the sqr result
|
||||||
ADCQ acc4, acc0
|
ADCQ acc4, acc0
|
||||||
ADCQ acc5, acc1
|
ADCQ acc5, acc1
|
||||||
@ -312,7 +357,7 @@ sqrLoop:
|
|||||||
// Subtract p256
|
// Subtract p256
|
||||||
SUBQ $-1, acc0
|
SUBQ $-1, acc0
|
||||||
SBBQ p256const0<>(SB) ,acc1
|
SBBQ p256const0<>(SB) ,acc1
|
||||||
SBBQ $0, acc2
|
SBBQ $-1, acc2
|
||||||
SBBQ p256const1<>(SB), acc3
|
SBBQ p256const1<>(SB), acc3
|
||||||
SBBQ $0, t0
|
SBBQ $0, t0
|
||||||
|
|
||||||
@ -542,47 +587,92 @@ TEXT ·p256FromMont(SB),NOSPLIT,$0
|
|||||||
|
|
||||||
// Only reduce, no multiplications are needed
|
// Only reduce, no multiplications are needed
|
||||||
// First stage
|
// First stage
|
||||||
MOVQ acc0, AX
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
MOVQ acc0, t1
|
MULQ acc0
|
||||||
SHLQ $32, acc0
|
ADDQ acc0, acc1
|
||||||
MULQ p256const1<>(SB)
|
ADCQ $0, DX
|
||||||
SHRQ $32, t1
|
ADDQ AX, acc1
|
||||||
ADDQ acc0, acc1
|
ADCQ $0, DX
|
||||||
ADCQ t1, acc2
|
MOVQ DX, t1
|
||||||
ADCQ AX, acc3
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
|
MULQ acc0
|
||||||
|
ADDQ t1, acc2
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc2
|
||||||
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc0
|
||||||
|
ADDQ t1, acc3
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc3
|
||||||
ADCQ DX, acc4
|
ADCQ DX, acc4
|
||||||
XORQ acc5, acc5
|
XORQ acc5, acc5
|
||||||
|
|
||||||
// Second stage
|
// Second stage
|
||||||
MOVQ acc1, AX
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
MOVQ acc1, t1
|
MULQ acc1
|
||||||
SHLQ $32, acc1
|
ADDQ acc1, acc2
|
||||||
MULQ p256const1<>(SB)
|
ADCQ $0, DX
|
||||||
SHRQ $32, t1
|
ADDQ AX, acc2
|
||||||
ADDQ acc1, acc2
|
ADCQ $0, DX
|
||||||
ADCQ t1, acc3
|
MOVQ DX, t1
|
||||||
ADCQ AX, acc4
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
|
MULQ acc1
|
||||||
|
ADDQ t1, acc3
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc3
|
||||||
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc1
|
||||||
|
ADDQ t1, acc4
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc4
|
||||||
ADCQ DX, acc5
|
ADCQ DX, acc5
|
||||||
XORQ acc0, acc0
|
XORQ acc0, acc0
|
||||||
// Third stage
|
// Third stage
|
||||||
MOVQ acc2, AX
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
MOVQ acc2, t1
|
MULQ acc2
|
||||||
SHLQ $32, acc2
|
ADDQ acc2, acc3
|
||||||
MULQ p256const1<>(SB)
|
ADCQ $0, DX
|
||||||
SHRQ $32, t1
|
ADDQ AX, acc3
|
||||||
ADDQ acc2, acc3
|
ADCQ $0, DX
|
||||||
ADCQ t1, acc4
|
MOVQ DX, t1
|
||||||
ADCQ AX, acc5
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
|
MULQ acc2
|
||||||
|
ADDQ t1, acc4
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc4
|
||||||
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc2
|
||||||
|
ADDQ t1, acc5
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc5
|
||||||
ADCQ DX, acc0
|
ADCQ DX, acc0
|
||||||
XORQ acc1, acc1
|
XORQ acc1, acc1
|
||||||
// Last stage
|
// Last stage
|
||||||
MOVQ acc3, AX
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
MOVQ acc3, t1
|
MULQ acc3
|
||||||
SHLQ $32, acc3
|
ADDQ acc3, acc4
|
||||||
MULQ p256const1<>(SB)
|
ADCQ $0, DX
|
||||||
SHRQ $32, t1
|
ADDQ AX, acc4
|
||||||
ADDQ acc3, acc4
|
ADCQ $0, DX
|
||||||
ADCQ t1, acc5
|
MOVQ DX, t1
|
||||||
ADCQ AX, acc0
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
|
MULQ acc3
|
||||||
|
ADDQ t1, acc5
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc5
|
||||||
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc3
|
||||||
|
ADDQ t1, acc0
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc0
|
||||||
ADCQ DX, acc1
|
ADCQ DX, acc1
|
||||||
|
|
||||||
MOVQ acc4, x_ptr
|
MOVQ acc4, x_ptr
|
||||||
@ -592,7 +682,7 @@ TEXT ·p256FromMont(SB),NOSPLIT,$0
|
|||||||
|
|
||||||
SUBQ $-1, acc4
|
SUBQ $-1, acc4
|
||||||
SBBQ p256const0<>(SB), acc5
|
SBBQ p256const0<>(SB), acc5
|
||||||
SBBQ $0, acc0
|
SBBQ $-1, acc0
|
||||||
SBBQ p256const1<>(SB), acc1
|
SBBQ p256const1<>(SB), acc1
|
||||||
|
|
||||||
CMOVQCS x_ptr, acc4
|
CMOVQCS x_ptr, acc4
|
||||||
|
80
sm2/p256_asm_test.go
Normal file
80
sm2/p256_asm_test.go
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
// +build amd64
|
||||||
|
|
||||||
|
package sm2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/hex"
|
||||||
|
"fmt"
|
||||||
|
"math/big"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func toBigInt(in []uint64) *big.Int {
|
||||||
|
var valBytes = make([]byte, 32)
|
||||||
|
p256LittleToBig(valBytes, in)
|
||||||
|
return new(big.Int).SetBytes(valBytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_p256NegCond(t *testing.T) {
|
||||||
|
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
|
||||||
|
var val = []uint64{0x61328990f418029e, 0x3e7981eddca6c050, 0xd6a1ed99ac24c3c3, 0x91167a5ee1c13b05}
|
||||||
|
bigVal := toBigInt(val)
|
||||||
|
|
||||||
|
p256NegCond(val, 0)
|
||||||
|
bigVal1 := toBigInt(val)
|
||||||
|
if bigVal.Cmp(bigVal1) != 0 {
|
||||||
|
t.Fatal("should be same")
|
||||||
|
}
|
||||||
|
p256NegCond(val, 1)
|
||||||
|
bigVal1 = toBigInt(val)
|
||||||
|
if bigVal.Cmp(bigVal1) == 0 {
|
||||||
|
t.Fatal("should be different")
|
||||||
|
}
|
||||||
|
bigVal2 := new(big.Int).Sub(p, bigVal)
|
||||||
|
if bigVal2.Cmp(bigVal1) != 0 {
|
||||||
|
t.Fatal("should be same")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_p256FromMont(t *testing.T) {
|
||||||
|
res := make([]uint64, 4)
|
||||||
|
p256FromMont(res, []uint64{0x0000000000000001, 0x00000000ffffffff, 0x0000000000000000, 0x0000000100000000})
|
||||||
|
res1 := (res[0] ^ 0x0000000000000001) | res[1] | res[2] | res[3]
|
||||||
|
if res1 != 0 {
|
||||||
|
t.FailNow()
|
||||||
|
}
|
||||||
|
x, _ := new(big.Int).SetString("32C4AE2C1F1981195F9904466A39C9948FE30BBFF2660BE1715A4589334C74C7", 16)
|
||||||
|
x1 := make([]uint64, 4)
|
||||||
|
p256BigToLittle(x1, x.Bytes())
|
||||||
|
|
||||||
|
p256FromMont(res, []uint64{0x61328990f418029e, 0x3e7981eddca6c050, 0xd6a1ed99ac24c3c3, 0x91167a5ee1c13b05})
|
||||||
|
if (res[0]^x1[0])|(res[1]^x1[1])|(res[2]^x1[2])|(res[3]^x1[3]) != 0 {
|
||||||
|
t.FailNow()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_p256Sqr(t *testing.T) {
|
||||||
|
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
|
||||||
|
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
|
||||||
|
x, _ := new(big.Int).SetString("32C4AE2C1F1981195F9904466A39C9948FE30BBFF2660BE1715A4589334C74C7", 16)
|
||||||
|
one := []uint64{0x0000000000000001, 0x00000000ffffffff, 0x0000000000000000, 0x0000000100000000}
|
||||||
|
res := make([]uint64, 4)
|
||||||
|
p256Sqr(res, one, 2)
|
||||||
|
if (res[0]^one[0])|(res[1]^one[1])|(res[2]^one[2])|(res[3]^one[3]) != 0 {
|
||||||
|
t.FailNow()
|
||||||
|
}
|
||||||
|
gx := []uint64{0x61328990f418029e, 0x3e7981eddca6c050, 0xd6a1ed99ac24c3c3, 0x91167a5ee1c13b05}
|
||||||
|
p256Sqr(res, gx, 1)
|
||||||
|
//p256FromMont(res, res)
|
||||||
|
resInt := toBigInt(res)
|
||||||
|
fmt.Printf("1=%s\n", hex.EncodeToString(resInt.Bytes()))
|
||||||
|
gxsqr := new(big.Int).Mul(x, x)
|
||||||
|
gxsqr = new(big.Int).Mod(gxsqr, p)
|
||||||
|
gxsqr = new(big.Int).Mul(gxsqr, r)
|
||||||
|
gxsqr = new(big.Int).Mod(gxsqr, p)
|
||||||
|
fmt.Printf("2=%s\n", hex.EncodeToString(gxsqr.Bytes()))
|
||||||
|
if resInt.Cmp(gxsqr) != 0 {
|
||||||
|
t.FailNow()
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user