From 1aed89febd7ae7b56da61b0ea2e8637f80e543b5 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Mon, 17 Feb 2025 08:32:20 +0800 Subject: [PATCH] subtle: combine xor_.go files #304 --- docs/sm2.md | 4 +- internal/subtle/xor_arm64.go | 10 - internal/subtle/{xor_amd64.go => xor_asm.go} | 2 +- internal/subtle/xor_ppc64x.go | 10 - internal/subtle/xor_riscv64.go | 10 - internal/subtle/xor_riscv64.s | 196 ++++++++++++++----- internal/subtle/xor_s390x.go | 10 - internal/subtle/xor_test.go | 22 ++- 8 files changed, 176 insertions(+), 88 deletions(-) delete mode 100644 internal/subtle/xor_arm64.go rename internal/subtle/{xor_amd64.go => xor_asm.go} (78%) delete mode 100644 internal/subtle/xor_ppc64x.go delete mode 100644 internal/subtle/xor_riscv64.go delete mode 100644 internal/subtle/xor_s390x.go diff --git a/docs/sm2.md b/docs/sm2.md index e73633d..beea2e0 100644 --- a/docs/sm2.md +++ b/docs/sm2.md @@ -186,7 +186,9 @@ func ExampleVerifyASN1WithSM2() { } ``` -### 如何处理不用UID的签名、验签? +### 如何处理不用Z的签名、验签? +所谓**Z**,就是用户可识别标识符和用户公钥、SM2椭圆曲线参数的杂凑值。其它签名算法如ECDSA是没有这个**Z**的,这也是SM2签名算法难以融入以ECDSA签名算法为主的体系的主因。 + #### 签名 也是使用sm2私钥的`Sign`方法,只是```SignerOpts```传入`nil`或者其它非`SM2SignerOption`即可,那么,你自己负责预先计算杂凑值,当然如何计算杂凑值,由你自己说了算了。 diff --git a/internal/subtle/xor_arm64.go b/internal/subtle/xor_arm64.go deleted file mode 100644 index 6fbacf9..0000000 --- a/internal/subtle/xor_arm64.go +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. -// -//go:build !purego - -package subtle - -//go:noescape -func xorBytes(dst, a, b *byte, n int) diff --git a/internal/subtle/xor_amd64.go b/internal/subtle/xor_asm.go similarity index 78% rename from internal/subtle/xor_amd64.go rename to internal/subtle/xor_asm.go index c5c4ed1..0ef0886 100644 --- a/internal/subtle/xor_amd64.go +++ b/internal/subtle/xor_asm.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // -//go:build !purego +//go:build !purego && (amd64 || arm64 || ppc64 || ppc64le || riscv64 || s390x) package subtle diff --git a/internal/subtle/xor_ppc64x.go b/internal/subtle/xor_ppc64x.go deleted file mode 100644 index 760463c..0000000 --- a/internal/subtle/xor_ppc64x.go +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build (ppc64 || ppc64le) && !purego - -package subtle - -//go:noescape -func xorBytes(dst, a, b *byte, n int) diff --git a/internal/subtle/xor_riscv64.go b/internal/subtle/xor_riscv64.go deleted file mode 100644 index ea8dd38..0000000 --- a/internal/subtle/xor_riscv64.go +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright 2024 Sun Yimin. All rights reserved. -// Use of this source code is governed by a MIT-style -// license that can be found in the LICENSE file. - -//go:build !purego - -package subtle - -//go:noescape -func xorBytes(dst, a, b *byte, n int) diff --git a/internal/subtle/xor_riscv64.s b/internal/subtle/xor_riscv64.s index 86752dd..008f257 100644 --- a/internal/subtle/xor_riscv64.s +++ b/internal/subtle/xor_riscv64.s @@ -8,56 +8,162 @@ // func xorBytes(dst, a, b *byte, n int) TEXT ·xorBytes(SB), NOSPLIT|NOFRAME, $0 - MOV dst+0(FP), X5 - MOV a+8(FP), X6 - MOV b+16(FP), X7 - MOV n+24(FP), X8 + MOV dst+0(FP), X10 + MOV a+8(FP), X11 + MOV b+16(FP), X12 + MOV n+24(FP), X13 - MOV $8, X9 - BLTU X8, X9, tail + MOV $32, X15 + BLT X13, X15, loop4_check -loop: - MOV (X6), X10 - MOV (X7), X11 - XOR X10, X11, X10 - MOV X10, (X5) - ADD $8, X5 - ADD $8, X6 - ADD $8, X7 - SUB $8, X8 - BGEU X8, X9, loop + // Check alignment - if alignment differs we have to do one byte at a time. + AND $7, X10, X5 + AND $7, X11, X6 + AND $7, X12, X7 + BNE X5, X6, loop4_check + BNE X5, X7, loop4_check + BEQZ X5, loop64_check -tail: - BEQZ X8, done - MOV $4, X9 - BLTU X8, X9, less_than4 - MOVWU (X6), X10 - MOVWU (X7), X11 - XOR X10, X11, X10 - MOVW X10, (X5) - ADD $4, X5 - ADD $4, X6 - ADD $4, X7 - SUB $4, X8 + // Check one byte at a time until we reach 8 byte alignment. + MOV $8, X8 + SUB X5, X8 + SUB X8, X13 +align: + MOVBU 0(X11), X16 + MOVBU 0(X12), X17 + XOR X16, X17 + MOVB X17, 0(X10) + ADD $1, X10 + ADD $1, X11 + ADD $1, X12 + SUB $1, X8 + BNEZ X8, align -less_than4: - MOV $2, X9 - BLTU X8, X9, less_than2 - MOVHU (X6), X10 - MOVHU (X7), X11 - XOR X10, X11, X10 - MOVH X10, (X5) - ADD $2, X5 - ADD $2, X6 - ADD $2, X7 - SUB $2, X8 +loop64_check: + MOV $64, X15 + BLT X13, X15, tail32_check + PCALIGN $16 +loop64: + MOV 0(X11), X16 + MOV 0(X12), X17 + MOV 8(X11), X18 + MOV 8(X12), X19 + XOR X16, X17 + XOR X18, X19 + MOV X17, 0(X10) + MOV X19, 8(X10) + MOV 16(X11), X20 + MOV 16(X12), X21 + MOV 24(X11), X22 + MOV 24(X12), X23 + XOR X20, X21 + XOR X22, X23 + MOV X21, 16(X10) + MOV X23, 24(X10) + MOV 32(X11), X16 + MOV 32(X12), X17 + MOV 40(X11), X18 + MOV 40(X12), X19 + XOR X16, X17 + XOR X18, X19 + MOV X17, 32(X10) + MOV X19, 40(X10) + MOV 48(X11), X20 + MOV 48(X12), X21 + MOV 56(X11), X22 + MOV 56(X12), X23 + XOR X20, X21 + XOR X22, X23 + MOV X21, 48(X10) + MOV X23, 56(X10) + ADD $64, X10 + ADD $64, X11 + ADD $64, X12 + SUB $64, X13 + BGE X13, X15, loop64 + BEQZ X13, done -less_than2: - BEQZ X8, done - MOVBU (X6), X10 - MOVBU (X7), X11 - XOR X10, X11, X10 - MOVB X10, (X5) +tail32_check: + MOV $32, X15 + BLT X13, X15, tail16_check + MOV 0(X11), X16 + MOV 0(X12), X17 + MOV 8(X11), X18 + MOV 8(X12), X19 + XOR X16, X17 + XOR X18, X19 + MOV X17, 0(X10) + MOV X19, 8(X10) + MOV 16(X11), X20 + MOV 16(X12), X21 + MOV 24(X11), X22 + MOV 24(X12), X23 + XOR X20, X21 + XOR X22, X23 + MOV X21, 16(X10) + MOV X23, 24(X10) + ADD $32, X10 + ADD $32, X11 + ADD $32, X12 + SUB $32, X13 + BEQZ X13, done + +tail16_check: + MOV $16, X15 + BLT X13, X15, loop4_check + MOV 0(X11), X16 + MOV 0(X12), X17 + MOV 8(X11), X18 + MOV 8(X12), X19 + XOR X16, X17 + XOR X18, X19 + MOV X17, 0(X10) + MOV X19, 8(X10) + ADD $16, X10 + ADD $16, X11 + ADD $16, X12 + SUB $16, X13 + BEQZ X13, done + +loop4_check: + MOV $4, X15 + BLT X13, X15, loop1 + PCALIGN $16 +loop4: + MOVBU 0(X11), X16 + MOVBU 0(X12), X17 + MOVBU 1(X11), X18 + MOVBU 1(X12), X19 + XOR X16, X17 + XOR X18, X19 + MOVB X17, 0(X10) + MOVB X19, 1(X10) + MOVBU 2(X11), X20 + MOVBU 2(X12), X21 + MOVBU 3(X11), X22 + MOVBU 3(X12), X23 + XOR X20, X21 + XOR X22, X23 + MOVB X21, 2(X10) + MOVB X23, 3(X10) + ADD $4, X10 + ADD $4, X11 + ADD $4, X12 + SUB $4, X13 + BGE X13, X15, loop4 + + PCALIGN $16 +loop1: + BEQZ X13, done + MOVBU 0(X11), X16 + MOVBU 0(X12), X17 + XOR X16, X17 + MOVB X17, 0(X10) + ADD $1, X10 + ADD $1, X11 + ADD $1, X12 + SUB $1, X13 + JMP loop1 done: RET diff --git a/internal/subtle/xor_s390x.go b/internal/subtle/xor_s390x.go deleted file mode 100644 index ea8dd38..0000000 --- a/internal/subtle/xor_s390x.go +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright 2024 Sun Yimin. All rights reserved. -// Use of this source code is governed by a MIT-style -// license that can be found in the LICENSE file. - -//go:build !purego - -package subtle - -//go:noescape -func xorBytes(dst, a, b *byte, n int) diff --git a/internal/subtle/xor_test.go b/internal/subtle/xor_test.go index b98b3a4..dfca2c9 100644 --- a/internal/subtle/xor_test.go +++ b/internal/subtle/xor_test.go @@ -72,7 +72,7 @@ func BenchmarkXORBytes(b *testing.B) { dst := make([]byte, 1<<15) data0 := make([]byte, 1<<15) data1 := make([]byte, 1<<15) - sizes := []int64{1 << 3, 1 << 4, 1 << 5, 1 << 7, 1 << 11, 1 << 15} + sizes := []int64{1 << 3, 1 << 4, 1 << 5, 1 << 7, 1 << 11, 1 << 13, 1 << 15} for _, size := range sizes { b.Run(fmt.Sprintf("%dBytes", size), func(b *testing.B) { s0 := data0[:size] @@ -85,6 +85,26 @@ func BenchmarkXORBytes(b *testing.B) { } } +func BenchmarkXORBytesAlignment(b *testing.B) { + dst := make([]byte, 8+1<<11) + data0 := make([]byte, 8+1<<11) + data1 := make([]byte, 8+1<<11) + sizes := []int64{1 << 3, 1 << 7, 1 << 11} + for _, size := range sizes { + for offset := int64(0); offset < 8; offset++ { + b.Run(fmt.Sprintf("%dBytes%dOffset", size, offset), func(b *testing.B) { + d := dst[offset : offset+size] + s0 := data0[offset : offset+size] + s1 := data1[offset : offset+size] + b.SetBytes(int64(size)) + for i := 0; i < b.N; i++ { + subtle.XORBytes(d, s0, s1) + } + }) + } + } +} + func mustPanic(t *testing.T, expected string, f func()) { t.Helper() defer func() {