diff --git a/sm4/asm_amd64.s b/sm4/asm_amd64.s index 22afc1d..799b21d 100644 --- a/sm4/asm_amd64.s +++ b/sm4/asm_amd64.s @@ -210,7 +210,6 @@ loop: MOVL R8, 56(BX) done_sm4: - VZEROUPPER RET // func xorBytesSSE2(dst, a, b *byte, n int) diff --git a/sm4/cbc_amd64.go b/sm4/cbc_amd64.go index 006d9bf..6dbf124 100644 --- a/sm4/cbc_amd64.go +++ b/sm4/cbc_amd64.go @@ -2,15 +2,21 @@ package sm4 import "crypto/cipher" +// Assert that sm4CipherAsm implements the cbcDecAble interfaces. +var _ cbcDecAble = (*sm4CipherAsm)(nil) + type cbc struct { - b *sm4CipherAsm - iv [BlockSize]byte + b *sm4CipherAsm + iv []byte + tmp []byte } func (b *sm4CipherAsm) NewCBCDecrypter(iv []byte) cipher.BlockMode { var c cbc c.b = b - copy(c.iv[:], iv) + c.iv = make([]byte, BlockSize) + c.tmp = make([]byte, BlockSize) + copy(c.iv, iv) return &c } @@ -30,6 +36,7 @@ func (x *cbc) CryptBlocks(dst, src []byte) { return } end := len(src) + copy(x.tmp, src[end-BlockSize:end]) start := end - FourBlocksSize var temp []byte = make([]byte, FourBlocksSize) var src64 []byte = make([]byte, FourBlocksSize) @@ -52,6 +59,8 @@ func (x *cbc) CryptBlocks(dst, src []byte) { end -= BlockSize } xorBytes(dst[0:end], temp[0:end], x.iv[:]) + // Set the new iv to the first block we copied earlier. + x.iv, x.tmp = x.tmp, x.iv } func (x *cbc) SetIV(iv []byte) { diff --git a/sm4/ctr_amd64.go b/sm4/ctr_amd64.go new file mode 100644 index 0000000..ae30413 --- /dev/null +++ b/sm4/ctr_amd64.go @@ -0,0 +1,91 @@ +package sm4 + +import "crypto/cipher" + +// Assert that sm4CipherAsm implements the ctrAble interface. +var _ ctrAble = (*sm4CipherAsm)(nil) + +type ctr struct { + b *sm4CipherAsm + ctr []byte + out []byte + outUsed int +} + +const streamBufferSize = 512 + +// NewCTR returns a Stream which encrypts/decrypts using the AES block +// cipher in counter mode. The length of iv must be the same as BlockSize. +func (c *sm4CipherAsm) NewCTR(iv []byte) cipher.Stream { + if len(iv) != BlockSize { + panic("cipher.NewCTR: IV length must equal block size") + } + bufSize := streamBufferSize + if bufSize < BlockSize { + bufSize = BlockSize + } + s := &ctr{ + b: c, + ctr: make([]byte, 4*len(iv)), + out: make([]byte, 0, bufSize), + outUsed: 0, + } + copy(s.ctr, iv) + s.genCtr(BlockSize) + s.genCtr(2 * BlockSize) + s.genCtr(3 * BlockSize) + return s + +} + +func (x *ctr) genCtr(start int) { + if start > 0 { + copy(x.ctr[start:], x.ctr[start-BlockSize:start]) + } else { + copy(x.ctr[start:], x.ctr[len(x.ctr)-BlockSize:]) + } + // Increment counter + end := start + BlockSize + for i := end - 1; i >= 0; i-- { + x.ctr[i]++ + if x.ctr[i] != 0 { + break + } + } +} + +func (x *ctr) refill() { + remain := len(x.out) - x.outUsed + copy(x.out, x.out[x.outUsed:]) + x.out = x.out[:cap(x.out)] + for remain <= len(x.out)-FourBlocksSize { + encryptBlocksAsm(&x.b.enc[0], &x.out[remain:][0], &x.ctr[0]) + remain += FourBlocksSize + + // Increment counter + x.genCtr(0) + x.genCtr(BlockSize) + x.genCtr(2 * BlockSize) + x.genCtr(3 * BlockSize) + } + x.out = x.out[:remain] + x.outUsed = 0 +} + +func (x *ctr) XORKeyStream(dst, src []byte) { + if len(dst) < len(src) { + panic("crypto/cipher: output smaller than input") + } + if InexactOverlap(dst[:len(src)], src) { + panic("crypto/cipher: invalid buffer overlap") + } + for len(src) > 0 { + if x.outUsed >= len(x.out)-BlockSize { + x.refill() + } + n := xorBytes(dst, src, x.out[x.outUsed:]) + dst = dst[n:] + src = src[n:] + x.outUsed += n + } +} diff --git a/sm4/modes.go b/sm4/modes.go index 77c4b85..7ea3d65 100644 --- a/sm4/modes.go +++ b/sm4/modes.go @@ -8,3 +8,17 @@ import "crypto/cipher" type cbcDecAble interface { NewCBCDecrypter(iv []byte) cipher.BlockMode } + +// ctrAble is implemented by cipher.Blocks that can provide an optimized +// implementation of CTR through the cipher.Stream interface. +// See crypto/cipher/ctr.go. +type ctrAble interface { + NewCTR(iv []byte) cipher.Stream +} + +// gcmAble is implemented by cipher.Blocks that can provide an optimized +// implementation of GCM through the AEAD interface. +// See crypto/cipher/gcm.go. +type gcmAble interface { + NewGCM(nonceSize, tagSize int) (cipher.AEAD, error) +}