mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-27 04:36:19 +08:00
MAGIC - optimize cbc decryption
This commit is contained in:
parent
b0889c3432
commit
dddebb8c1d
@ -212,3 +212,52 @@ loop:
|
||||
done_sm4:
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
||||
// func xorBytesSSE2(dst, a, b *byte, n int)
|
||||
TEXT ·xorBytesSSE2(SB), NOSPLIT, $0
|
||||
MOVQ dst+0(FP), BX
|
||||
MOVQ a+8(FP), SI
|
||||
MOVQ b+16(FP), CX
|
||||
MOVQ n+24(FP), DX
|
||||
TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned.
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, AX // position in slices
|
||||
|
||||
loop16b:
|
||||
MOVOU (SI)(AX*1), X0 // XOR 16byte forwards.
|
||||
MOVOU (CX)(AX*1), X1
|
||||
PXOR X1, X0
|
||||
MOVOU X0, (BX)(AX*1)
|
||||
ADDQ $16, AX
|
||||
CMPQ DX, AX
|
||||
JNE loop16b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
SUBQ $1, DX // XOR 1byte backwards.
|
||||
MOVB (SI)(DX*1), DI
|
||||
MOVB (CX)(DX*1), AX
|
||||
XORB AX, DI
|
||||
MOVB DI, (BX)(DX*1)
|
||||
TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b.
|
||||
JNZ loop_1b
|
||||
CMPQ DX, $0 // if len is 0, ret.
|
||||
JE ret
|
||||
TESTQ $15, DX // AND 15 & len, if zero jump to aligned.
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b.
|
||||
JNE loop_1b
|
||||
SUBQ $8, DX // XOR 8bytes backwards.
|
||||
MOVQ (SI)(DX*1), DI
|
||||
MOVQ (CX)(DX*1), AX
|
||||
XORQ AX, DI
|
||||
MOVQ DI, (BX)(DX*1)
|
||||
CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned.
|
||||
JGE aligned
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
62
sm4/cbc_amd64.go
Normal file
62
sm4/cbc_amd64.go
Normal file
@ -0,0 +1,62 @@
|
||||
package sm4
|
||||
|
||||
import "crypto/cipher"
|
||||
|
||||
type cbc struct {
|
||||
b *sm4CipherAsm
|
||||
iv [BlockSize]byte
|
||||
}
|
||||
|
||||
func (b *sm4CipherAsm) NewCBCDecrypter(iv []byte) cipher.BlockMode {
|
||||
var c cbc
|
||||
c.b = b
|
||||
copy(c.iv[:], iv)
|
||||
return &c
|
||||
}
|
||||
|
||||
func (x *cbc) BlockSize() int { return BlockSize }
|
||||
|
||||
func (x *cbc) CryptBlocks(dst, src []byte) {
|
||||
if len(src)%BlockSize != 0 {
|
||||
panic("crypto/cipher: input not full blocks")
|
||||
}
|
||||
if len(dst) < len(src) {
|
||||
panic("crypto/cipher: output smaller than input")
|
||||
}
|
||||
if InexactOverlap(dst[:len(src)], src) {
|
||||
panic("crypto/cipher: invalid buffer overlap")
|
||||
}
|
||||
if len(src) == 0 {
|
||||
return
|
||||
}
|
||||
end := len(src)
|
||||
start := end - FourBlocksSize
|
||||
var temp []byte = make([]byte, FourBlocksSize)
|
||||
var src64 []byte = make([]byte, FourBlocksSize)
|
||||
for start > 0 {
|
||||
encryptBlocksAsm(&x.b.dec[0], &temp[0], &src[start:end][0])
|
||||
xorBytes(dst[end-BlockSize:end], temp[FourBlocksSize-BlockSize:FourBlocksSize], src[end-2*BlockSize:end-BlockSize])
|
||||
xorBytes(dst[end-2*BlockSize:end-BlockSize], temp[FourBlocksSize-2*BlockSize:FourBlocksSize-BlockSize], src[end-3*BlockSize:end-2*BlockSize])
|
||||
xorBytes(dst[end-3*BlockSize:end-2*BlockSize], temp[FourBlocksSize-3*BlockSize:FourBlocksSize-2*BlockSize], src[end-4*BlockSize:end-3*BlockSize])
|
||||
xorBytes(dst[end-4*BlockSize:end-3*BlockSize], temp[:BlockSize], src[end-5*BlockSize:end-4*BlockSize])
|
||||
|
||||
end = start
|
||||
start -= FourBlocksSize
|
||||
}
|
||||
|
||||
copy(src64, src[:end])
|
||||
encryptBlocksAsm(&x.b.dec[0], &temp[0], &src[:end][0])
|
||||
count := end / BlockSize
|
||||
for i := count; i > 1; i-- {
|
||||
xorBytes(dst[end-BlockSize:end], temp[end-BlockSize:end], src[end-2*BlockSize:end-BlockSize])
|
||||
end -= BlockSize
|
||||
}
|
||||
xorBytes(dst[0:end], temp[0:end], x.iv[:])
|
||||
}
|
||||
|
||||
func (x *cbc) SetIV(iv []byte) {
|
||||
if len(iv) != BlockSize {
|
||||
panic("cipher: incorrect length IV")
|
||||
}
|
||||
copy(x.iv[:], iv)
|
||||
}
|
10
sm4/modes.go
Normal file
10
sm4/modes.go
Normal file
@ -0,0 +1,10 @@
|
||||
package sm4
|
||||
|
||||
import "crypto/cipher"
|
||||
|
||||
// cbcDecAble is implemented by cipher.Blocks that can provide an optimized
|
||||
// implementation of CBC decryption through the cipher.BlockMode interface.
|
||||
// See crypto/cipher/cbc.go.
|
||||
type cbcDecAble interface {
|
||||
NewCBCDecrypter(iv []byte) cipher.BlockMode
|
||||
}
|
23
sm4/xor_amd64.go
Normal file
23
sm4/xor_amd64.go
Normal file
@ -0,0 +1,23 @@
|
||||
package sm4
|
||||
|
||||
// xorBytes xors the bytes in a and b. The destination should have enough
|
||||
// space, otherwise xorBytes will panic. Returns the number of bytes xor'd.
|
||||
func xorBytes(dst, a, b []byte) int {
|
||||
n := len(a)
|
||||
if len(b) < n {
|
||||
n = len(b)
|
||||
}
|
||||
if n == 0 {
|
||||
return 0
|
||||
}
|
||||
_ = dst[n-1]
|
||||
xorBytesSSE2(&dst[0], &a[0], &b[0], n) // amd64 must have SSE2
|
||||
return n
|
||||
}
|
||||
|
||||
func xorWords(dst, a, b []byte) {
|
||||
xorBytes(dst, a, b)
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func xorBytesSSE2(dst, a, b *byte, n int)
|
@ -50,6 +50,31 @@ var cbcSM4Tests = []struct {
|
||||
[]byte("Hello World"),
|
||||
[]byte{0x0a, 0x67, 0x06, 0x2f, 0x0c, 0xd2, 0xdc, 0xe2, 0x6a, 0x7b, 0x97, 0x8e, 0xbf, 0x21, 0x34, 0xf9},
|
||||
},
|
||||
{
|
||||
"Three blocks",
|
||||
[]byte("0123456789ABCDEF"),
|
||||
[]byte("0123456789ABCDEF"),
|
||||
[]byte("Hello World Hello World Hello World Hello Worldd"),
|
||||
[]byte{
|
||||
0xd3, 0x1e, 0x36, 0x83, 0xe4, 0xfc, 0x9b, 0x51, 0x6a, 0x2c, 0x0f, 0x98, 0x36, 0x76, 0xa9, 0xeb,
|
||||
0x1f, 0xdc, 0xc3, 0x2a, 0xf3, 0x84, 0x08, 0x97, 0x81, 0x57, 0xa2, 0x06, 0x5d, 0xe3, 0x4c, 0x6a,
|
||||
0x06, 0x8d, 0x0f, 0xef, 0x4e, 0x2b, 0xfa, 0xb4, 0xbc, 0xab, 0xa6, 0x64, 0x41, 0xfd, 0xe0, 0xfe,
|
||||
0x92, 0xc1, 0x64, 0xec, 0xa1, 0x70, 0x24, 0x75, 0x72, 0xde, 0x12, 0x02, 0x95, 0x2e, 0xc7, 0x27,
|
||||
},
|
||||
},
|
||||
{
|
||||
"Four blocks",
|
||||
[]byte("0123456789ABCDEF"),
|
||||
[]byte("0123456789ABCDEF"),
|
||||
[]byte("Hello World Hello World Hello World Hello World Hello World Hell"),
|
||||
[]byte{
|
||||
0xd3, 0x1e, 0x36, 0x83, 0xe4, 0xfc, 0x9b, 0x51, 0x6a, 0x2c, 0x0f, 0x98, 0x36, 0x76, 0xa9, 0xeb,
|
||||
0x1f, 0xdc, 0xc3, 0x2a, 0xf3, 0x84, 0x08, 0x97, 0x81, 0x57, 0xa2, 0x06, 0x5d, 0xe3, 0x4c, 0x6a,
|
||||
0xe0, 0x02, 0xd6, 0xe4, 0xf5, 0x66, 0x87, 0xc4, 0xcc, 0x54, 0x1d, 0x1f, 0x1c, 0xc4, 0x2f, 0xe6,
|
||||
0xe5, 0x1d, 0xea, 0x52, 0xb8, 0x0c, 0xc8, 0xbe, 0xae, 0xcc, 0x44, 0xa8, 0x51, 0x81, 0x08, 0x60,
|
||||
0x34, 0x6e, 0x9d, 0xad, 0xe1, 0x8a, 0xf4, 0xa1, 0x83, 0x69, 0x57, 0xb9, 0x37, 0x26, 0x7e, 0x03,
|
||||
},
|
||||
},
|
||||
{
|
||||
"A.1",
|
||||
[]byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10},
|
||||
|
Loading…
x
Reference in New Issue
Block a user