From 7ec46d700d3e4b9e3210780582dc91b2ffe81766 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Thu, 13 Mar 2025 15:20:05 +0800 Subject: [PATCH] internal/subtle: remove xor related codes #315 --- internal/subtle/xor.go | 32 ------- internal/subtle/xor_amd64.s | 112 ---------------------- internal/subtle/xor_arm64.s | 69 -------------- internal/subtle/xor_asm.go | 14 --- internal/subtle/xor_generic.go | 64 ------------- internal/subtle/xor_ppc64x.s | 142 --------------------------- internal/subtle/xor_riscv64.s | 169 --------------------------------- internal/subtle/xor_s390x.s | 98 ------------------- internal/subtle/xor_test.go | 123 ------------------------ 9 files changed, 823 deletions(-) delete mode 100644 internal/subtle/xor.go delete mode 100644 internal/subtle/xor_amd64.s delete mode 100644 internal/subtle/xor_arm64.s delete mode 100644 internal/subtle/xor_asm.go delete mode 100644 internal/subtle/xor_generic.go delete mode 100644 internal/subtle/xor_ppc64x.s delete mode 100644 internal/subtle/xor_riscv64.s delete mode 100644 internal/subtle/xor_s390x.s delete mode 100644 internal/subtle/xor_test.go diff --git a/internal/subtle/xor.go b/internal/subtle/xor.go deleted file mode 100644 index 93f1e7a..0000000 --- a/internal/subtle/xor.go +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2022 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package subtle - -import "github.com/emmansun/gmsm/internal/alias" - -// XORBytes sets dst[i] = x[i] ^ y[i] for all i < n = min(len(x), len(y)), -// returning n, the number of bytes written to dst. -// If dst does not have length at least n, -// XORBytes panics without writing anything to dst. -// -// dst and x or y may overlap exactly or not at all, -// otherwise XORBytes may panic. -func XORBytes(dst, x, y []byte) int { - n := len(x) - if len(y) < n { - n = len(y) - } - if n == 0 { - return 0 - } - if n > len(dst) { - panic("subtle.XORBytes: dst too short") - } - if alias.InexactOverlap(dst[:n], x[:n]) || alias.InexactOverlap(dst[:n], y[:n]) { - panic("subtle.XORBytes: invalid overlap") - } - xorBytes(&dst[0], &x[0], &y[0], n) // arch-specific - return n -} diff --git a/internal/subtle/xor_amd64.s b/internal/subtle/xor_amd64.s deleted file mode 100644 index 2a29beb..0000000 --- a/internal/subtle/xor_amd64.s +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. -// -//go:build !purego - -#include "textflag.h" - -// func xorBytes(dst, a, b *byte, n int) -TEXT ·xorBytes(SB), NOSPLIT, $0 - MOVQ dst+0(FP), BX - MOVQ a+8(FP), SI - MOVQ b+16(FP), CX - MOVQ n+24(FP), DX - CMPQ DX, $32 // if len less than 32, non avx2. - JL non_avx2 - CMPB ·useAVX2(SB), $1 - JE avx2 - -non_avx2: - TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned. - JNZ not_aligned - -aligned: - MOVQ $0, AX // position in slices - -loop16b: - MOVOU (SI)(AX*1), X0 // XOR 16byte forwards. - MOVOU (CX)(AX*1), X1 - PXOR X1, X0 - MOVOU X0, (BX)(AX*1) - ADDQ $16, AX - CMPQ DX, AX - JNE loop16b - RET - -loop_1b: - SUBQ $1, DX // XOR 1byte backwards. - MOVB (SI)(DX*1), DI - MOVB (CX)(DX*1), AX - XORB AX, DI - MOVB DI, (BX)(DX*1) - TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b. - JNZ loop_1b - CMPQ DX, $0 // if len is 0, ret. - JE ret - TESTQ $15, DX // AND 15 & len, if zero jump to aligned. - JZ aligned - -not_aligned: - TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b. - JNE loop_1b - SUBQ $8, DX // XOR 8bytes backwards. - MOVQ (SI)(DX*1), DI - MOVQ (CX)(DX*1), AX - XORQ AX, DI - MOVQ DI, (BX)(DX*1) - CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned. - JGE aligned - -ret: - RET - -avx2: - TESTQ $31, DX // AND 31 & len, if not zero jump to avx2_not_aligned. - JNZ avx2_not_aligned - -avx2_aligned: // input length = 16*n, where n is greater or equal 2. - TESTQ $16, DX // AND 16 & len, if zero jump to loop32b_start. - JE loop32b_start - SUBQ $16, DX // XOR 16bytes backwards. - VMOVDQU (SI)(DX*1), X0 - VPXOR (CX)(DX*1), X0, X0 - VMOVDQU X0, (BX)(DX*1) - -loop32b_start: - MOVQ $0, AX // position in slices - -loop32b: - VMOVDQU (SI)(AX*1), Y0 // XOR 32byte forwards. - VPXOR (CX)(AX*1), Y0, Y0 - VMOVDQU Y0, (BX)(AX*1) - ADDQ $32, AX - CMPQ DX, AX - JNE loop32b - -avx2_ret: - VZEROUPPER - RET - -avx2_loop_1b: - SUBQ $1, DX // XOR 1byte backwards. - MOVB (SI)(DX*1), DI - MOVB (CX)(DX*1), AX - XORB AX, DI - MOVB DI, (BX)(DX*1) - TESTQ $7, DX // AND 7 & len, if not zero jump to avx2_loop_1b. - JNZ avx2_loop_1b - TESTQ $15, DX // AND 15 & len, if zero jump to aligned. - JZ avx2_aligned - -avx2_not_aligned: - TESTQ $7, DX // AND $7 & len, if not zero jump to avx2_loop_1b. - JNE avx2_loop_1b - TESTQ $8, DX // AND $8 & len, if zero jump to avx2_aligned. - JE avx2_aligned - SUBQ $8, DX // XOR 8bytes backwards. - MOVQ (SI)(DX*1), DI - MOVQ (CX)(DX*1), AX - XORQ AX, DI - MOVQ DI, (BX)(DX*1) - JMP avx2_aligned diff --git a/internal/subtle/xor_arm64.s b/internal/subtle/xor_arm64.s deleted file mode 100644 index 8f7ac7f..0000000 --- a/internal/subtle/xor_arm64.s +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2020 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. -// -//go:build !purego - -#include "textflag.h" - -// func xorBytes(dst, a, b *byte, n int) -TEXT ·xorBytes(SB), NOSPLIT|NOFRAME, $0 - MOVD dst+0(FP), R0 - MOVD a+8(FP), R1 - MOVD b+16(FP), R2 - MOVD n+24(FP), R3 - CMP $64, R3 - BLT tail -loop_64: - VLD1.P 64(R1), [V0.B16, V1.B16, V2.B16, V3.B16] - VLD1.P 64(R2), [V4.B16, V5.B16, V6.B16, V7.B16] - VEOR V0.B16, V4.B16, V4.B16 - VEOR V1.B16, V5.B16, V5.B16 - VEOR V2.B16, V6.B16, V6.B16 - VEOR V3.B16, V7.B16, V7.B16 - VST1.P [V4.B16, V5.B16, V6.B16, V7.B16], 64(R0) - SUBS $64, R3 - CMP $64, R3 - BGE loop_64 -tail: - // quick end - CBZ R3, end - TBZ $5, R3, less_than32 - VLD1.P 32(R1), [V0.B16, V1.B16] - VLD1.P 32(R2), [V2.B16, V3.B16] - VEOR V0.B16, V2.B16, V2.B16 - VEOR V1.B16, V3.B16, V3.B16 - VST1.P [V2.B16, V3.B16], 32(R0) -less_than32: - TBZ $4, R3, less_than16 - LDP.P 16(R1), (R11, R12) - LDP.P 16(R2), (R13, R14) - EOR R11, R13, R13 - EOR R12, R14, R14 - STP.P (R13, R14), 16(R0) -less_than16: - TBZ $3, R3, less_than8 - MOVD.P 8(R1), R11 - MOVD.P 8(R2), R12 - EOR R11, R12, R12 - MOVD.P R12, 8(R0) -less_than8: - TBZ $2, R3, less_than4 - MOVWU.P 4(R1), R13 - MOVWU.P 4(R2), R14 - EORW R13, R14, R14 - MOVWU.P R14, 4(R0) -less_than4: - TBZ $1, R3, less_than2 - MOVHU.P 2(R1), R15 - MOVHU.P 2(R2), R16 - EORW R15, R16, R16 - MOVHU.P R16, 2(R0) -less_than2: - TBZ $0, R3, end - MOVBU (R1), R17 - MOVBU (R2), R19 - EORW R17, R19, R19 - MOVBU R19, (R0) -end: - RET diff --git a/internal/subtle/xor_asm.go b/internal/subtle/xor_asm.go deleted file mode 100644 index 0b781fd..0000000 --- a/internal/subtle/xor_asm.go +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. -// -//go:build !purego && (amd64 || arm64 || ppc64 || ppc64le || riscv64 || s390x) - -package subtle - -import "github.com/emmansun/gmsm/internal/cpu" - -var useAVX2 = cpu.X86.HasAVX2 - -//go:noescape -func xorBytes(dst, a, b *byte, n int) diff --git a/internal/subtle/xor_generic.go b/internal/subtle/xor_generic.go deleted file mode 100644 index 118244e..0000000 --- a/internal/subtle/xor_generic.go +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. -// -//go:build purego || !(amd64 || arm64 || s390x || ppc64 || ppc64le || riscv64) - -package subtle - -import ( - "runtime" - "unsafe" -) - -const wordSize = unsafe.Sizeof(uintptr(0)) - -const supportsUnaligned = runtime.GOARCH == "386" || - runtime.GOARCH == "amd64" || - runtime.GOARCH == "ppc64" || - runtime.GOARCH == "ppc64le" || - runtime.GOARCH == "s390x" - -func xorBytes(dstb, xb, yb *byte, n int) { - // xorBytes assembly is written using pointers and n. Back to slices. - dst := unsafe.Slice(dstb, n) - x := unsafe.Slice(xb, n) - y := unsafe.Slice(yb, n) - - if supportsUnaligned || aligned(dstb, xb, yb) { - xorLoop(words(dst), words(x), words(y)) - if uintptr(n)%wordSize == 0 { - return - } - done := n &^ int(wordSize-1) - dst = dst[done:] - x = x[done:] - y = y[done:] - } - xorLoop(dst, x, y) -} - -// aligned reports whether dst, x, and y are all word-aligned pointers. -func aligned(dst, x, y *byte) bool { - return (uintptr(unsafe.Pointer(dst))|uintptr(unsafe.Pointer(x))|uintptr(unsafe.Pointer(y)))&(wordSize-1) == 0 -} - -// words returns a []uintptr pointing at the same data as x, -// with any trailing partial word removed. -func words(x []byte) []uintptr { - n := uintptr(len(x)) / wordSize - if n == 0 { - // Avoid creating a *uintptr that refers to data smaller than a uintptr; - // see issue 59334. - return nil - } - return unsafe.Slice((*uintptr)(unsafe.Pointer(&x[0])), n) -} - -func xorLoop[T byte | uintptr](dst, x, y []T) { - x = x[:len(dst)] // remove bounds check in loop - y = y[:len(dst)] // remove bounds check in loop - for i := range dst { - dst[i] = x[i] ^ y[i] - } -} diff --git a/internal/subtle/xor_ppc64x.s b/internal/subtle/xor_ppc64x.s deleted file mode 100644 index c1f72c5..0000000 --- a/internal/subtle/xor_ppc64x.s +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build (ppc64 || ppc64le) && !purego - -#include "textflag.h" - -// func xorBytes(dst, a, b *byte, n int) -TEXT ·xorBytes(SB), NOSPLIT, $0 - MOVD dst+0(FP), R3 // R3 = dst - MOVD a+8(FP), R4 // R4 = a - MOVD b+16(FP), R5 // R5 = b - MOVD n+24(FP), R6 // R6 = n - - CMPU R6, $64, CR7 // Check if n ≥ 64 bytes - MOVD R0, R8 // R8 = index - CMPU R6, $8, CR6 // Check if 8 ≤ n < 64 bytes - BLE CR6, small // <= 8 - BLT CR7, xor32 // Case for 32 ≤ n < 64 bytes - - // Case for n ≥ 64 bytes -preloop64: - SRD $6, R6, R7 // Set up loop counter - MOVD R7, CTR - MOVD $16, R10 - MOVD $32, R14 - MOVD $48, R15 - ANDCC $63, R6, R9 // Check for tailing bytes for later - PCALIGN $16 - // Case for >= 64 bytes - // Process 64 bytes per iteration - // Load 4 vectors of a and b - // XOR the corresponding vectors - // from a and b and store the result -loop64: - LXVD2X (R4)(R8), VS32 - LXVD2X (R4)(R10), VS34 - LXVD2X (R4)(R14), VS36 - LXVD2X (R4)(R15), VS38 - LXVD2X (R5)(R8), VS33 - LXVD2X (R5)(R10), VS35 - LXVD2X (R5)(R14), VS37 - LXVD2X (R5)(R15), VS39 - XXLXOR VS32, VS33, VS32 - XXLXOR VS34, VS35, VS34 - XXLXOR VS36, VS37, VS36 - XXLXOR VS38, VS39, VS38 - STXVD2X VS32, (R3)(R8) - STXVD2X VS34, (R3)(R10) - STXVD2X VS36, (R3)(R14) - STXVD2X VS38, (R3)(R15) - ADD $64, R8 - ADD $64, R10 - ADD $64, R14 - ADD $64, R15 - BDNZ loop64 - BC 12,2,LR // BEQLR - MOVD R9, R6 - CMP R6, $8 - BLE small - // Case for 8 <= n < 64 bytes - // Process 32 bytes if available -xor32: - CMP R6, $32 - BLT xor16 - ADD $16, R8, R9 - LXVD2X (R4)(R8), VS32 - LXVD2X (R4)(R9), VS33 - LXVD2X (R5)(R8), VS34 - LXVD2X (R5)(R9), VS35 - XXLXOR VS32, VS34, VS32 - XXLXOR VS33, VS35, VS33 - STXVD2X VS32, (R3)(R8) - STXVD2X VS33, (R3)(R9) - ADD $32, R8 - ADD $-32, R6 - CMP R6, $8 - BLE small - // Case for 8 <= n < 32 bytes - // Process 16 bytes if available -xor16: - CMP R6, $16 - BLT xor8 - LXVD2X (R4)(R8), VS32 - LXVD2X (R5)(R8), VS33 - XXLXOR VS32, VS33, VS32 - STXVD2X VS32, (R3)(R8) - ADD $16, R8 - ADD $-16, R6 -small: - CMP R6, $0 - BC 12,2,LR // BEQLR -xor8: -#ifdef GOPPC64_power10 - SLD $56,R6,R17 - ADD R4,R8,R18 - ADD R5,R8,R19 - ADD R3,R8,R20 - LXVL R18,R17,V0 - LXVL R19,R17,V1 - VXOR V0,V1,V1 - STXVL V1,R20,R17 - RET -#else - CMP R6, $8 - BLT xor4 - // Case for 8 ≤ n < 16 bytes - MOVD (R4)(R8), R14 // R14 = a[i,...,i+7] - MOVD (R5)(R8), R15 // R15 = b[i,...,i+7] - XOR R14, R15, R16 // R16 = a[] ^ b[] - SUB $8, R6 // n = n - 8 - MOVD R16, (R3)(R8) // Store to dst - ADD $8, R8 -xor4: - CMP R6, $4 - BLT xor2 - MOVWZ (R4)(R8), R14 - MOVWZ (R5)(R8), R15 - XOR R14, R15, R16 - MOVW R16, (R3)(R8) - ADD $4,R8 - ADD $-4,R6 -xor2: - CMP R6, $2 - BLT xor1 - MOVHZ (R4)(R8), R14 - MOVHZ (R5)(R8), R15 - XOR R14, R15, R16 - MOVH R16, (R3)(R8) - ADD $2,R8 - ADD $-2,R6 -xor1: - CMP R6, $0 - BC 12,2,LR // BEQLR - MOVBZ (R4)(R8), R14 // R14 = a[i] - MOVBZ (R5)(R8), R15 // R15 = b[i] - XOR R14, R15, R16 // R16 = a[i] ^ b[i] - MOVB R16, (R3)(R8) // Store to dst -#endif -done: - RET diff --git a/internal/subtle/xor_riscv64.s b/internal/subtle/xor_riscv64.s deleted file mode 100644 index 008f257..0000000 --- a/internal/subtle/xor_riscv64.s +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright 2024 Sun Yimin. All rights reserved. -// Use of this source code is governed by a MIT-style -// license that can be found in the LICENSE file. - -//go:build !purego - -#include "textflag.h" - -// func xorBytes(dst, a, b *byte, n int) -TEXT ·xorBytes(SB), NOSPLIT|NOFRAME, $0 - MOV dst+0(FP), X10 - MOV a+8(FP), X11 - MOV b+16(FP), X12 - MOV n+24(FP), X13 - - MOV $32, X15 - BLT X13, X15, loop4_check - - // Check alignment - if alignment differs we have to do one byte at a time. - AND $7, X10, X5 - AND $7, X11, X6 - AND $7, X12, X7 - BNE X5, X6, loop4_check - BNE X5, X7, loop4_check - BEQZ X5, loop64_check - - // Check one byte at a time until we reach 8 byte alignment. - MOV $8, X8 - SUB X5, X8 - SUB X8, X13 -align: - MOVBU 0(X11), X16 - MOVBU 0(X12), X17 - XOR X16, X17 - MOVB X17, 0(X10) - ADD $1, X10 - ADD $1, X11 - ADD $1, X12 - SUB $1, X8 - BNEZ X8, align - -loop64_check: - MOV $64, X15 - BLT X13, X15, tail32_check - PCALIGN $16 -loop64: - MOV 0(X11), X16 - MOV 0(X12), X17 - MOV 8(X11), X18 - MOV 8(X12), X19 - XOR X16, X17 - XOR X18, X19 - MOV X17, 0(X10) - MOV X19, 8(X10) - MOV 16(X11), X20 - MOV 16(X12), X21 - MOV 24(X11), X22 - MOV 24(X12), X23 - XOR X20, X21 - XOR X22, X23 - MOV X21, 16(X10) - MOV X23, 24(X10) - MOV 32(X11), X16 - MOV 32(X12), X17 - MOV 40(X11), X18 - MOV 40(X12), X19 - XOR X16, X17 - XOR X18, X19 - MOV X17, 32(X10) - MOV X19, 40(X10) - MOV 48(X11), X20 - MOV 48(X12), X21 - MOV 56(X11), X22 - MOV 56(X12), X23 - XOR X20, X21 - XOR X22, X23 - MOV X21, 48(X10) - MOV X23, 56(X10) - ADD $64, X10 - ADD $64, X11 - ADD $64, X12 - SUB $64, X13 - BGE X13, X15, loop64 - BEQZ X13, done - -tail32_check: - MOV $32, X15 - BLT X13, X15, tail16_check - MOV 0(X11), X16 - MOV 0(X12), X17 - MOV 8(X11), X18 - MOV 8(X12), X19 - XOR X16, X17 - XOR X18, X19 - MOV X17, 0(X10) - MOV X19, 8(X10) - MOV 16(X11), X20 - MOV 16(X12), X21 - MOV 24(X11), X22 - MOV 24(X12), X23 - XOR X20, X21 - XOR X22, X23 - MOV X21, 16(X10) - MOV X23, 24(X10) - ADD $32, X10 - ADD $32, X11 - ADD $32, X12 - SUB $32, X13 - BEQZ X13, done - -tail16_check: - MOV $16, X15 - BLT X13, X15, loop4_check - MOV 0(X11), X16 - MOV 0(X12), X17 - MOV 8(X11), X18 - MOV 8(X12), X19 - XOR X16, X17 - XOR X18, X19 - MOV X17, 0(X10) - MOV X19, 8(X10) - ADD $16, X10 - ADD $16, X11 - ADD $16, X12 - SUB $16, X13 - BEQZ X13, done - -loop4_check: - MOV $4, X15 - BLT X13, X15, loop1 - PCALIGN $16 -loop4: - MOVBU 0(X11), X16 - MOVBU 0(X12), X17 - MOVBU 1(X11), X18 - MOVBU 1(X12), X19 - XOR X16, X17 - XOR X18, X19 - MOVB X17, 0(X10) - MOVB X19, 1(X10) - MOVBU 2(X11), X20 - MOVBU 2(X12), X21 - MOVBU 3(X11), X22 - MOVBU 3(X12), X23 - XOR X20, X21 - XOR X22, X23 - MOVB X21, 2(X10) - MOVB X23, 3(X10) - ADD $4, X10 - ADD $4, X11 - ADD $4, X12 - SUB $4, X13 - BGE X13, X15, loop4 - - PCALIGN $16 -loop1: - BEQZ X13, done - MOVBU 0(X11), X16 - MOVBU 0(X12), X17 - XOR X16, X17 - MOVB X17, 0(X10) - ADD $1, X10 - ADD $1, X11 - ADD $1, X12 - SUB $1, X13 - JMP loop1 - -done: - RET diff --git a/internal/subtle/xor_s390x.s b/internal/subtle/xor_s390x.s deleted file mode 100644 index a5ad498..0000000 --- a/internal/subtle/xor_s390x.s +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2024 Sun Yimin. All rights reserved. -// Use of this source code is governed by a MIT-style -// license that can be found in the LICENSE file. - -//go:build !purego - -#include "textflag.h" - -// func xorBytes(dst, a, b *byte, n int) -TEXT ·xorBytes(SB),NOSPLIT,$0-32 - MOVD dst+0(FP), R1 - MOVD a+8(FP), R2 - MOVD b+16(FP), R3 - MOVD n+24(FP), R4 - - MOVD $0, R5 - CMPBLT R4, $64, tail - -loop_64: - VL 0(R2)(R5*1), V0 - VL 16(R2)(R5*1), V1 - VL 32(R2)(R5*1), V2 - VL 48(R2)(R5*1), V3 - VL 0(R3)(R5*1), V4 - VL 16(R3)(R5*1), V5 - VL 32(R3)(R5*1), V6 - VL 48(R3)(R5*1), V7 - VX V0, V4, V4 - VX V1, V5, V5 - VX V2, V6, V6 - VX V3, V7, V7 - VST V4, 0(R1)(R5*1) - VST V5, 16(R1)(R5*1) - VST V6, 32(R1)(R5*1) - VST V7, 48(R1)(R5*1) - LAY 64(R5), R5 - SUB $64, R4 - CMPBGE R4, $64, loop_64 - -tail: - CMPBEQ R4, $0, done - CMPBLT R4, $32, less_than32 - VL 0(R2)(R5*1), V0 - VL 16(R2)(R5*1), V1 - VL 0(R3)(R5*1), V2 - VL 16(R3)(R5*1), V3 - VX V0, V2, V2 - VX V1, V3, V3 - VST V2, 0(R1)(R5*1) - VST V3, 16(R1)(R5*1) - LAY 32(R5), R5 - SUB $32, R4 - -less_than32: - CMPBLT R4, $16, less_than16 - VL 0(R2)(R5*1), V0 - VL 0(R3)(R5*1), V1 - VX V0, V1, V1 - VST V1, 0(R1)(R5*1) - LAY 16(R5), R5 - SUB $16, R4 - -less_than16: - CMPBLT R4, $8, less_than8 - MOVD 0(R2)(R5*1), R7 - MOVD 0(R3)(R5*1), R8 - XOR R7, R8 - MOVD R8, 0(R1)(R5*1) - LAY 8(R5), R5 - SUB $8, R4 - -less_than8: - CMPBLT R4, $4, less_than4 - MOVWZ 0(R2)(R5*1), R7 - MOVWZ 0(R3)(R5*1), R8 - XOR R7, R8 - MOVW R8, 0(R1)(R5*1) - LAY 4(R5), R5 - SUB $4, R4 - -less_than4: - CMPBLT R4, $2, less_than2 - MOVHZ 0(R2)(R5*1), R7 - MOVHZ 0(R3)(R5*1), R8 - XOR R7, R8 - MOVH R8, 0(R1)(R5*1) - LAY 2(R5), R5 - SUB $2, R4 - -less_than2: - CMPBEQ R4, $0, done - MOVB 0(R2)(R5*1), R7 - MOVB 0(R3)(R5*1), R8 - XOR R7, R8 - MOVB R8, 0(R1)(R5*1) - -done: - RET diff --git a/internal/subtle/xor_test.go b/internal/subtle/xor_test.go deleted file mode 100644 index dfca2c9..0000000 --- a/internal/subtle/xor_test.go +++ /dev/null @@ -1,123 +0,0 @@ -package subtle_test - -import ( - "bytes" - "crypto/rand" - "fmt" - "io" - "testing" - - "github.com/emmansun/gmsm/internal/subtle" -) - -func TestXORBytes(t *testing.T) { - for n := 1; n <= 1024; n++ { - if n > 16 && testing.Short() { - n += n >> 3 - } - for alignP := 0; alignP < 8; alignP++ { - for alignQ := 0; alignQ < 8; alignQ++ { - for alignD := 0; alignD < 8; alignD++ { - p := make([]byte, alignP+n, alignP+n+10)[alignP:] - q := make([]byte, alignQ+n, alignQ+n+10)[alignQ:] - if n&1 != 0 { - p = p[:n] - } else { - q = q[:n] - } - if _, err := io.ReadFull(rand.Reader, p); err != nil { - t.Fatal(err) - } - if _, err := io.ReadFull(rand.Reader, q); err != nil { - t.Fatal(err) - } - - d := make([]byte, alignD+n, alignD+n+10) - for i := range d { - d[i] = 0xdd - } - want := make([]byte, len(d), cap(d)) - copy(want[:cap(want)], d[:cap(d)]) - for i := 0; i < n; i++ { - want[alignD+i] = p[i] ^ q[i] - } - - if subtle.XORBytes(d[alignD:], p, q); !bytes.Equal(d, want) { - t.Fatalf("n=%d alignP=%d alignQ=%d alignD=%d:\n\tp = %x\n\tq = %x\n\td = %x\n\twant %x\n", n, alignP, alignQ, alignD, p, q, d, want) - } - } - } - } - } -} - -func TestXorBytesPanic(t *testing.T) { - mustPanic(t, "subtle.XORBytes: dst too short", func() { - subtle.XORBytes(nil, make([]byte, 1), make([]byte, 1)) - }) - mustPanic(t, "subtle.XORBytes: dst too short", func() { - subtle.XORBytes(make([]byte, 1), make([]byte, 2), make([]byte, 3)) - }) - mustPanic(t, "subtle.XORBytes: invalid overlap", func() { - x := make([]byte, 3) - subtle.XORBytes(x, x[1:], make([]byte, 2)) - }) - mustPanic(t, "subtle.XORBytes: invalid overlap", func() { - x := make([]byte, 3) - subtle.XORBytes(x, make([]byte, 2), x[1:]) - }) -} - -func BenchmarkXORBytes(b *testing.B) { - dst := make([]byte, 1<<15) - data0 := make([]byte, 1<<15) - data1 := make([]byte, 1<<15) - sizes := []int64{1 << 3, 1 << 4, 1 << 5, 1 << 7, 1 << 11, 1 << 13, 1 << 15} - for _, size := range sizes { - b.Run(fmt.Sprintf("%dBytes", size), func(b *testing.B) { - s0 := data0[:size] - s1 := data1[:size] - b.SetBytes(int64(size)) - for i := 0; i < b.N; i++ { - subtle.XORBytes(dst, s0, s1) - } - }) - } -} - -func BenchmarkXORBytesAlignment(b *testing.B) { - dst := make([]byte, 8+1<<11) - data0 := make([]byte, 8+1<<11) - data1 := make([]byte, 8+1<<11) - sizes := []int64{1 << 3, 1 << 7, 1 << 11} - for _, size := range sizes { - for offset := int64(0); offset < 8; offset++ { - b.Run(fmt.Sprintf("%dBytes%dOffset", size, offset), func(b *testing.B) { - d := dst[offset : offset+size] - s0 := data0[offset : offset+size] - s1 := data1[offset : offset+size] - b.SetBytes(int64(size)) - for i := 0; i < b.N; i++ { - subtle.XORBytes(d, s0, s1) - } - }) - } - } -} - -func mustPanic(t *testing.T, expected string, f func()) { - t.Helper() - defer func() { - switch msg := recover().(type) { - case nil: - t.Errorf("expected panic(%q), but did not panic", expected) - case string: - if msg != expected { - t.Errorf("expected panic(%q), but got panic(%q)", expected, msg) - } - default: - t.Errorf("expected panic(%q), but got panic(%T%v)", expected, msg, msg) - } - }() - f() -}