From 85b3ecb1296d09c86a8676a81774bce7696f8fad Mon Sep 17 00:00:00 2001 From: Emman Date: Wed, 31 Mar 2021 11:55:52 +0800 Subject: [PATCH] MAGIC - support ccm mode --- cipher/ccm.go | 237 +++++++++++++++++++++++++++++++ cipher/ccm_test.go | 263 +++++++++++++++++++++++++++++++++++ cipher/utils.go | 39 ++++++ {sm4 => cipher}/xor_amd64.go | 12 +- cipher/xor_amd64.s | 50 +++++++ cipher/xor_generic.go | 87 ++++++++++++ sm4/asm_amd64.s | 49 ------- sm4/cbc_amd64.go | 20 +-- sm4/cipher.go | 28 +--- sm4/cipher_asm.go | 5 +- sm4/ctr_amd64.go | 10 +- sm4/gcm_amd64.go | 31 ++--- sm4/sm4_gcm.go | 10 +- sm4_test/benchmark_test.go | 91 ++++++++++++ 14 files changed, 814 insertions(+), 118 deletions(-) create mode 100644 cipher/ccm.go create mode 100644 cipher/ccm_test.go create mode 100644 cipher/utils.go rename {sm4 => cipher}/xor_amd64.go (53%) create mode 100644 cipher/xor_amd64.s create mode 100644 cipher/xor_generic.go diff --git a/cipher/ccm.go b/cipher/ccm.go new file mode 100644 index 0000000..bd39acb --- /dev/null +++ b/cipher/ccm.go @@ -0,0 +1,237 @@ +package cipher + +import ( + goCipher "crypto/cipher" + "crypto/subtle" + "encoding/binary" + "math" + + "errors" +) + +const ( + ccmBlockSize = 16 + ccmTagSize = 16 + ccmMinimumTagSize = 4 + ccmStandardNonceSize = 12 +) + +// ccmAble is an interface implemented by ciphers that have a specific optimized +// implementation of CCM. +type ccmAble interface { + NewCCM(nonceSize, tagSize int) (goCipher.AEAD, error) +} + +type ccm struct { + cipher goCipher.Block + nonceSize int + tagSize int +} + +func (c *ccm) NonceSize() int { + return c.nonceSize +} + +func (c *ccm) Overhead() int { + return c.tagSize +} + +func (c *ccm) MaxLength() int { + return maxlen(15-c.NonceSize(), c.Overhead()) +} + +func maxlen(L, tagsize int) int { + max := (uint64(1) << (8 * L)) - 1 + if m64 := uint64(math.MaxInt64) - uint64(tagsize); L > 8 || max > m64 { + max = m64 // The maximum lentgh on a 64bit arch + } + if max != uint64(int(max)) { + return math.MaxInt32 - tagsize // We have only 32bit int's + } + return int(max) +} + +// NewCCM returns the given 128-bit, block cipher wrapped in CCM +// with the standard nonce length. +func NewCCM(cipher goCipher.Block) (goCipher.AEAD, error) { + return NewCCMWithNonceAndTagSize(cipher, ccmStandardNonceSize, ccmTagSize) +} + +// NewCCMWithNonceSize returns the given 128-bit, block cipher wrapped in CCM, +// which accepts nonces of the given length. The length must not +// be zero. +func NewCCMWithNonceSize(cipher goCipher.Block, size int) (goCipher.AEAD, error) { + return NewCCMWithNonceAndTagSize(cipher, size, ccmTagSize) +} + +// NewCCMWithTagSize returns the given 128-bit, block cipher wrapped in CCM, +// which generates tags with the given length. +// +// Tag sizes between 8 and 16 bytes are allowed. +// +func NewCCMWithTagSize(cipher goCipher.Block, tagSize int) (goCipher.AEAD, error) { + return NewCCMWithNonceAndTagSize(cipher, ccmStandardNonceSize, tagSize) +} + +// https://tools.ietf.org/html/rfc3610 +func NewCCMWithNonceAndTagSize(cipher goCipher.Block, nonceSize, tagSize int) (goCipher.AEAD, error) { + if tagSize < ccmMinimumTagSize || tagSize > ccmBlockSize || tagSize&1 != 0 { + return nil, errors.New("cipher: incorrect tag size given to CCM") + } + + if nonceSize <= 0 { + return nil, errors.New("cipher: the nonce can't have zero length, or the security of the key will be immediately compromised") + } + + lenSize := 15 - nonceSize + if lenSize < 2 || lenSize > 8 { + return nil, errors.New("cipher: invalid ccm nounce size, should be in [7,13]") + } + + if cipher, ok := cipher.(ccmAble); ok { + return cipher.NewCCM(nonceSize, tagSize) + } + + if cipher.BlockSize() != ccmBlockSize { + return nil, errors.New("cipher: NewCCM requires 128-bit block cipher") + } + + c := &ccm{cipher: cipher, nonceSize: nonceSize, tagSize: tagSize} + + return c, nil +} + +// https://tools.ietf.org/html/rfc3610 +func (c *ccm) deriveCounter(counter *[ccmBlockSize]byte, nonce []byte) { + counter[0] = byte(14 - c.nonceSize) + copy(counter[1:], nonce) +} + +func (c *ccm) cmac(out, data []byte) { + for len(data) >= ccmBlockSize { + XorBytes(out, out, data) + c.cipher.Encrypt(out, out) + data = data[ccmBlockSize:] + } + if len(data) > 0 { + var block [ccmBlockSize]byte + copy(block[:], data) + XorBytes(out, out, data) + c.cipher.Encrypt(out, out) + } +} + +// https://tools.ietf.org/html/rfc3610 2.2. Authentication +func (c *ccm) auth(nonce, plaintext, additionalData []byte, tagMask *[ccmBlockSize]byte) []byte { + var out [ccmTagSize]byte + if len(additionalData) > 0 { + out[0] = 1 << 6 // 64*Adata + } + out[0] |= byte(c.tagSize-2) << 2 + out[0] |= byte(14 - c.nonceSize) + binary.BigEndian.PutUint64(out[ccmBlockSize-8:], uint64(len(plaintext))) + copy(out[1:], nonce) + c.cipher.Encrypt(out[:], out[:]) + + var block [ccmBlockSize]byte + if n := uint64(len(additionalData)); n > 0 { + // First adata block includes adata length + i := 2 + if n <= 0xfeff { + binary.BigEndian.PutUint16(block[:i], uint16(n)) + } else { + block[0] = 0xfe + block[1] = 0xff + if n < uint64(1<<32) { + i = 2 + 4 + binary.BigEndian.PutUint32(block[2:i], uint32(n)) + } else { + i = 2 + 8 + binary.BigEndian.PutUint64(block[2:i], uint64(n)) + } + } + i = copy(block[i:], additionalData) + c.cmac(out[:], block[:]) + c.cmac(out[:], additionalData[i:]) + } + if len(plaintext) > 0 { + c.cmac(out[:], plaintext) + } + XorWords(out[:], out[:], tagMask[:]) + return out[:c.tagSize] +} + +func (c *ccm) Seal(dst, nonce, plaintext, data []byte) []byte { + if len(nonce) != c.nonceSize { + panic("cipher: incorrect nonce length given to CCM") + } + if uint64(len(plaintext)) > uint64(c.MaxLength()) { + panic("cipher: message too large for CCM") + } + ret, out := SliceForAppend(dst, len(plaintext)+c.tagSize) + if InexactOverlap(out, plaintext) { + panic("cipher: invalid buffer overlap") + } + + var counter, tagMask [ccmBlockSize]byte + c.deriveCounter(&counter, nonce) + c.cipher.Encrypt(tagMask[:], counter[:]) + + counter[len(counter)-1] |= 1 + ctr := goCipher.NewCTR(c.cipher, counter[:]) + ctr.XORKeyStream(out, plaintext) + + tag := c.auth(nonce, plaintext, data, &tagMask) + copy(out[len(plaintext):], tag) + + return ret +} + +var errOpen = errors.New("cipher: message authentication failed") + +func (c *ccm) Open(dst, nonce, ciphertext, data []byte) ([]byte, error) { + if len(nonce) != c.nonceSize { + panic("cipher: incorrect nonce length given to CCM") + } + // Sanity check to prevent the authentication from always succeeding if an implementation + // leaves tagSize uninitialized, for example. + if c.tagSize < ccmMinimumTagSize { + panic("cipher: incorrect CCM tag size") + } + + if len(ciphertext) < c.tagSize { + return nil, errOpen + } + + if len(ciphertext) > c.MaxLength()+c.Overhead() { + return nil, errOpen + } + + tag := ciphertext[len(ciphertext)-c.tagSize:] + ciphertext = ciphertext[:len(ciphertext)-c.tagSize] + + var counter, tagMask [ccmBlockSize]byte + c.deriveCounter(&counter, nonce) + c.cipher.Encrypt(tagMask[:], counter[:]) + + ret, out := SliceForAppend(dst, len(ciphertext)) + if InexactOverlap(out, ciphertext) { + panic("cipher: invalid buffer overlap") + } + + counter[len(counter)-1] |= 1 + ctr := goCipher.NewCTR(c.cipher, counter[:]) + ctr.XORKeyStream(out, ciphertext) + expectedTag := c.auth(nonce, out, data, &tagMask) + if subtle.ConstantTimeCompare(expectedTag, tag) != 1 { + // The AESNI code decrypts and authenticates concurrently, and + // so overwrites dst in the event of a tag mismatch. That + // behavior is mimicked here in order to be consistent across + // platforms. + for i := range out { + out[i] = 0 + } + return nil, errOpen + } + return ret, nil +} diff --git a/cipher/ccm_test.go b/cipher/ccm_test.go new file mode 100644 index 0000000..b0febb2 --- /dev/null +++ b/cipher/ccm_test.go @@ -0,0 +1,263 @@ +package cipher + +import ( + "crypto/aes" + "encoding/hex" + "testing" +) + +var aesCCMTests = []struct { + key, nonce, plaintext, ad, result string + tagSize int +}{ + { + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf", + "00000003020100a0a1a2a3a4a5", + "08090a0b0c0d0e0f101112131415161718191a1b1c1d1e", + "0001020304050607", + "588c979a61c663d2f066d0c2c0f989806d5f6b61dac38417e8d12cfdf926e0", + 8, + }, + { + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf", + "00000004030201a0a1a2a3a4a5", + "08090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", + "0001020304050607", + "72c91a36e135f8cf291ca894085c87e3cc15c439c9e43a3ba091d56e10400916", + 8, + }, + { + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf", + "00000005040302a0a1a2a3a4a5", + "08090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20", + "0001020304050607", + "51b1e5f44a197d1da46b0f8e2d282ae871e838bb64da8596574adaa76fbd9fb0c5", + 8, + }, + { + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf", + "00000006050403a0a1a2a3a4a5", + "0c0d0e0f101112131415161718191a1b1c1d1e", + "000102030405060708090a0b", + "a28c6865939a9a79faaa5c4c2a9d4a91cdac8c96c861b9c9e61ef1", + 8, + }, + { + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf", + "00000007060504a0a1a2a3a4a5", + "0c0d0e0f101112131415161718191a1b1c1d1e1f", + "000102030405060708090a0b", + "dcf1fb7b5d9e23fb9d4e131253658ad86ebdca3e51e83f077d9c2d93", + 8, + }, + { + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf", + "00000008070605a0a1a2a3a4a5", + "0c0d0e0f101112131415161718191a1b1c1d1e1f20", + "000102030405060708090a0b", + "6fc1b011f006568b5171a42d953d469b2570a4bd87405a0443ac91cb94", + 8, + }, + { + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf", + "00000009080706a0a1a2a3a4a5", + "08090a0b0c0d0e0f101112131415161718191a1b1c1d1e", + "0001020304050607", + "0135d1b2c95f41d5d1d4fec185d166b8094e999dfed96c048c56602c97acbb7490", + 10, + }, + { + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf", + "0000000a090807a0a1a2a3a4a5", + "08090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", + "0001020304050607", + "7b75399ac0831dd2f0bbd75879a2fd8f6cae6b6cd9b7db24c17b4433f434963f34b4", + 10, + }, + { + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf", + "0000000b0a0908a0a1a2a3a4a5", + "08090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20", + "0001020304050607", + "82531a60cc24945a4b8279181ab5c84df21ce7f9b73f42e197ea9c07e56b5eb17e5f4e", + 10, + }, + { + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf", + "0000000c0b0a09a0a1a2a3a4a5", + "0c0d0e0f101112131415161718191a1b1c1d1e", + "000102030405060708090a0b", + "07342594157785152b074098330abb141b947b566aa9406b4d999988dd", + 10, + }, + { + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf", + "0000000d0c0b0aa0a1a2a3a4a5", + "0c0d0e0f101112131415161718191a1b1c1d1e1f", + "000102030405060708090a0b", + "676bb20380b0e301e8ab79590a396da78b834934f53aa2e9107a8b6c022c", + 10, + }, + { + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf", + "0000000e0d0c0ba0a1a2a3a4a5", + "0c0d0e0f101112131415161718191a1b1c1d1e1f20", + "000102030405060708090a0b", + "c0ffa0d6f05bdb67f24d43a4338d2aa4bed7b20e43cd1aa31662e7ad65d6db", + 10, + }, + { + "d7828d13b2b0bdc325a76236df93cc6b", + "00412b4ea9cdbe3c9696766cfa", + "08e8cf97d820ea258460e96ad9cf5289054d895ceac47c", + "0be1a88bace018b1", + "4cb97f86a2a4689a877947ab8091ef5386a6ffbdd080f8e78cf7cb0cddd7b3", + 8, + }, + { + "d7828d13b2b0bdc325a76236df93cc6b", + "0033568ef7b2633c9696766cfa", + "9020ea6f91bdd85afa0039ba4baff9bfb79c7028949cd0ec", + "63018f76dc8a1bcb", + "4ccb1e7ca981befaa0726c55d378061298c85c92814abc33c52ee81d7d77c08a", + 8, + }, + { + "d7828d13b2b0bdc325a76236df93cc6b", + "00f8b678094e3b3c9696766cfa", + "e88b6a46c78d63e52eb8c546efb5de6f75e9cc0d", + "77b60f011c03e1525899bcae", + "5545ff1a085ee2efbf52b2e04bee1e2336c73e3f762c0c7744fe7e3c", + 8, + }, + { + "d7828d13b2b0bdc325a76236df93cc6b", + "00d560912d3f703c9696766cfa", + "6435acbafb11a82e2f071d7ca4a5ebd93a803ba87f", + "cd9044d2b71fdb8120ea60c0", + "009769ecabdf48625594c59251e6035722675e04c847099e5ae0704551", + 8, + }, + { + "d7828d13b2b0bdc325a76236df93cc6b", + "0042fff8f1951c3c9696766cfa", + "8a19b950bcf71a018e5e6701c91787659809d67dbedd18", + "d85bc7e69f944fb8", + "bc218daa947427b6db386a99ac1aef23ade0b52939cb6a637cf9bec2408897c6ba", + 10, + }, + { + "d7828d13b2b0bdc325a76236df93cc6b", + "00920f40e56cdc3c9696766cfa", + "1761433c37c5a35fc1f39f406302eb907c6163be38c98437", + "74a0ebc9069f5b37", + "5810e6fd25874022e80361a478e3e9cf484ab04f447efff6f0a477cc2fc9bf548944", + 10, + }, + { + "d7828d13b2b0bdc325a76236df93cc6b", + "0027ca0c7120bc3c9696766cfa", + "a434a8e58500c6e41530538862d686ea9e81301b5ae4226bfa", + "44a3aa3aae6475ca", + "f2beed7bc5098e83feb5b31608f8e29c38819a89c8e776f1544d4151a4ed3a8b87b9ce", + 10, + }, + { + "d7828d13b2b0bdc325a76236df93cc6b", + "005b8ccbcd9af83c9696766cfa", + "b96b49e21d621741632875db7f6c9243d2d7c2", + "ec46bb63b02520c33c49fd70", + "31d750a09da3ed7fddd49a2032aabf17ec8ebf7d22c8088c666be5c197", + 10, + }, + { + "d7828d13b2b0bdc325a76236df93cc6b", + "003ebe94044b9a3c9696766cfa", + "e2fcfbb880442c731bf95167c8ffd7895e337076", + "47a65ac78b3d594227e85e71", + "e882f1dbd38ce3eda7c23f04dd65071eb41342acdf7e00dccec7ae52987d", + 10, + }, + { + "d7828d13b2b0bdc325a76236df93cc6b", + "008d493b30ae8b3c9696766cfa", + "abf21c0b02feb88f856df4a37381bce3cc128517d4", + "6e37a6ef546d955d34ab6059", + "f32905b88a641b04b9c9ffb58cc390900f3da12ab16dce9e82efa16da62059", + 10, + }, +} + +func TestCCM(t *testing.T) { + for i, tt := range aesCCMTests { + nonce, _ := hex.DecodeString(tt.nonce) + plaintext, _ := hex.DecodeString(tt.plaintext) + ad, _ := hex.DecodeString(tt.ad) + key, _ := hex.DecodeString(tt.key) + c, err := aes.NewCipher(key) + if err != nil { + t.Fatal(err) + } + aesccm, err := NewCCMWithNonceAndTagSize(c, len(nonce), tt.tagSize) + if err != nil { + t.Fatal(err) + } + ct := aesccm.Seal(nil, nonce, plaintext, ad) + if ctHex := hex.EncodeToString(ct); ctHex != tt.result { + t.Errorf("#%d: got %s, want %s", i, ctHex, tt.result) + continue + } + + //func (c *ccm) Open(dst, nonce, ciphertext, data []byte) ([]byte, error) + pt, err := aesccm.Open(nil, nonce, ct, ad) + if err != nil { + t.Fatal(err) + } + if ptHex := hex.EncodeToString(pt); ptHex != tt.plaintext { + t.Errorf("#%d: got %s, want %s", i, ptHex, tt.plaintext) + continue + } + } +} + +func TestCCMInvalidTagSize(t *testing.T) { + key, _ := hex.DecodeString("ab72c77b97cb5fe9a382d9fe81ffdbed") + + c, _ := aes.NewCipher(key) + + for _, tagSize := range []int{0, 1, c.BlockSize() + 1} { + aesccm, err := NewCCMWithTagSize(c, tagSize) + if aesccm != nil || err == nil { + t.Fatalf("NewCCMWithNonceAndTagSize was successful with an invalid %d-byte tag size", tagSize) + } + } +} + +func TestTagFailureOverwrite(t *testing.T) { + key, _ := hex.DecodeString("ab72c77b97cb5fe9a382d9fe81ffdbed") + nonce, _ := hex.DecodeString("54cc7dc2c37ec006bcc6d1db") + ciphertext, _ := hex.DecodeString("0e1bde206a07a9c2c1b65300f8c649972b4401346697138c7a4891ee59867d0c") + + c, _ := aes.NewCipher(key) + aesccm, _ := NewCCM(c) + + dst := make([]byte, len(ciphertext)-16) + for i := range dst { + dst[i] = 42 + } + + result, err := aesccm.Open(dst[:0], nonce, ciphertext, nil) + if err == nil { + t.Fatal("Bad Open still resulted in nil error.") + } + + if result != nil { + t.Fatal("Failed Open returned non-nil result.") + } + + for i := range dst { + if dst[i] != 0 { + t.Fatal("Failed Open didn't zero dst buffer") + } + } +} diff --git a/cipher/utils.go b/cipher/utils.go new file mode 100644 index 0000000..eee6a26 --- /dev/null +++ b/cipher/utils.go @@ -0,0 +1,39 @@ +package cipher + +import "unsafe" + +// AnyOverlap reports whether x and y share memory at any (not necessarily +// corresponding) index. The memory beyond the slice length is ignored. +func AnyOverlap(x, y []byte) bool { + return len(x) > 0 && len(y) > 0 && + uintptr(unsafe.Pointer(&x[0])) <= uintptr(unsafe.Pointer(&y[len(y)-1])) && + uintptr(unsafe.Pointer(&y[0])) <= uintptr(unsafe.Pointer(&x[len(x)-1])) +} + +// InexactOverlap reports whether x and y share memory at any non-corresponding +// index. The memory beyond the slice length is ignored. Note that x and y can +// have different lengths and still not have any inexact overlap. +// +// InexactOverlap can be used to implement the requirements of the crypto/cipher +// AEAD, Block, BlockMode and Stream interfaces. +func InexactOverlap(x, y []byte) bool { + if len(x) == 0 || len(y) == 0 || &x[0] == &y[0] { + return false + } + return AnyOverlap(x, y) +} + +// SliceForAppend takes a slice and a requested number of bytes. It returns a +// slice with the contents of the given slice followed by that many bytes and a +// second slice that aliases into it and contains only the extra bytes. If the +// original slice has sufficient capacity then no allocation is performed. +func SliceForAppend(in []byte, n int) (head, tail []byte) { + if total := len(in) + n; cap(in) >= total { + head = in[:total] + } else { + head = make([]byte, total) + copy(head, in) + } + tail = head[len(in):] + return +} diff --git a/sm4/xor_amd64.go b/cipher/xor_amd64.go similarity index 53% rename from sm4/xor_amd64.go rename to cipher/xor_amd64.go index ca0ec52..4db7b3f 100644 --- a/sm4/xor_amd64.go +++ b/cipher/xor_amd64.go @@ -1,8 +1,8 @@ -package sm4 +package cipher -// xorBytes xors the bytes in a and b. The destination should have enough +// XorBytes xors the bytes in a and b. The destination should have enough // space, otherwise xorBytes will panic. Returns the number of bytes xor'd. -func xorBytes(dst, a, b []byte) int { +func XorBytes(dst, a, b []byte) int { n := len(a) if len(b) < n { n = len(b) @@ -15,9 +15,9 @@ func xorBytes(dst, a, b []byte) int { return n } -func xorWords(dst, a, b []byte) { - xorBytes(dst, a, b) +func XorWords(dst, a, b []byte) { + XorBytes(dst, a, b) } //go:noescape -func xorBytesSSE2(dst, a, b *byte, n int) \ No newline at end of file +func xorBytesSSE2(dst, a, b *byte, n int) diff --git a/cipher/xor_amd64.s b/cipher/xor_amd64.s new file mode 100644 index 0000000..dfd40b4 --- /dev/null +++ b/cipher/xor_amd64.s @@ -0,0 +1,50 @@ +#include "textflag.h" + +// func xorBytesSSE2(dst, a, b *byte, n int) +TEXT ·xorBytesSSE2(SB), NOSPLIT, $0 + MOVQ dst+0(FP), BX + MOVQ a+8(FP), SI + MOVQ b+16(FP), CX + MOVQ n+24(FP), DX + TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned. + JNZ not_aligned + +aligned: + MOVQ $0, AX // position in slices + +loop16b: + MOVOU (SI)(AX*1), X0 // XOR 16byte forwards. + MOVOU (CX)(AX*1), X1 + PXOR X1, X0 + MOVOU X0, (BX)(AX*1) + ADDQ $16, AX + CMPQ DX, AX + JNE loop16b + RET + +loop_1b: + SUBQ $1, DX // XOR 1byte backwards. + MOVB (SI)(DX*1), DI + MOVB (CX)(DX*1), AX + XORB AX, DI + MOVB DI, (BX)(DX*1) + TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b. + JNZ loop_1b + CMPQ DX, $0 // if len is 0, ret. + JE ret + TESTQ $15, DX // AND 15 & len, if zero jump to aligned. + JZ aligned + +not_aligned: + TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b. + JNE loop_1b + SUBQ $8, DX // XOR 8bytes backwards. + MOVQ (SI)(DX*1), DI + MOVQ (CX)(DX*1), AX + XORQ AX, DI + MOVQ DI, (BX)(DX*1) + CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned. + JGE aligned + +ret: + RET diff --git a/cipher/xor_generic.go b/cipher/xor_generic.go new file mode 100644 index 0000000..cac5843 --- /dev/null +++ b/cipher/xor_generic.go @@ -0,0 +1,87 @@ +// +build !amd64 + +package cipher + +import ( + "runtime" + "unsafe" +) + +// xorBytes xors the bytes in a and b. The destination should have enough +// space, otherwise xorBytes will panic. Returns the number of bytes xor'd. +func XorBytes(dst, a, b []byte) int { + n := len(a) + if len(b) < n { + n = len(b) + } + if n == 0 { + return 0 + } + + switch { + case supportsUnaligned: + fastXORBytes(dst, a, b, n) + default: + // TODO(hanwen): if (dst, a, b) have common alignment + // we could still try fastXORBytes. It is not clear + // how often this happens, and it's only worth it if + // the block encryption itself is hardware + // accelerated. + safeXORBytes(dst, a, b, n) + } + return n +} + +const wordSize = int(unsafe.Sizeof(uintptr(0))) +const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x" + +// fastXORBytes xors in bulk. It only works on architectures that +// support unaligned read/writes. +// n needs to be smaller or equal than the length of a and b. +func fastXORBytes(dst, a, b []byte, n int) { + // Assert dst has enough space + _ = dst[n-1] + + w := n / wordSize + if w > 0 { + dw := *(*[]uintptr)(unsafe.Pointer(&dst)) + aw := *(*[]uintptr)(unsafe.Pointer(&a)) + bw := *(*[]uintptr)(unsafe.Pointer(&b)) + for i := 0; i < w; i++ { + dw[i] = aw[i] ^ bw[i] + } + } + + for i := (n - n%wordSize); i < n; i++ { + dst[i] = a[i] ^ b[i] + } +} + +// n needs to be smaller or equal than the length of a and b. +func safeXORBytes(dst, a, b []byte, n int) { + for i := 0; i < n; i++ { + dst[i] = a[i] ^ b[i] + } +} + +// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.) +// The arguments are assumed to be of equal length. +func fastXORWords(dst, a, b []byte) { + dw := *(*[]uintptr)(unsafe.Pointer(&dst)) + aw := *(*[]uintptr)(unsafe.Pointer(&a)) + bw := *(*[]uintptr)(unsafe.Pointer(&b)) + n := len(b) / wordSize + for i := 0; i < n; i++ { + dw[i] = aw[i] ^ bw[i] + } +} + +// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.) +// The slice arguments a and b are assumed to be of equal length. +func XorWords(dst, a, b []byte) { + if supportsUnaligned { + fastXORWords(dst, a, b) + } else { + safeXORBytes(dst, a, b, len(b)) + } +} \ No newline at end of file diff --git a/sm4/asm_amd64.s b/sm4/asm_amd64.s index 5e20765..bf52d92 100644 --- a/sm4/asm_amd64.s +++ b/sm4/asm_amd64.s @@ -358,52 +358,3 @@ loop: MOVL R8, 12(BX) done_sm4: RET - -// func xorBytesSSE2(dst, a, b *byte, n int) -TEXT ·xorBytesSSE2(SB), NOSPLIT, $0 - MOVQ dst+0(FP), BX - MOVQ a+8(FP), SI - MOVQ b+16(FP), CX - MOVQ n+24(FP), DX - TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned. - JNZ not_aligned - -aligned: - MOVQ $0, AX // position in slices - -loop16b: - MOVOU (SI)(AX*1), X0 // XOR 16byte forwards. - MOVOU (CX)(AX*1), X1 - PXOR X1, X0 - MOVOU X0, (BX)(AX*1) - ADDQ $16, AX - CMPQ DX, AX - JNE loop16b - RET - -loop_1b: - SUBQ $1, DX // XOR 1byte backwards. - MOVB (SI)(DX*1), DI - MOVB (CX)(DX*1), AX - XORB AX, DI - MOVB DI, (BX)(DX*1) - TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b. - JNZ loop_1b - CMPQ DX, $0 // if len is 0, ret. - JE ret - TESTQ $15, DX // AND 15 & len, if zero jump to aligned. - JZ aligned - -not_aligned: - TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b. - JNE loop_1b - SUBQ $8, DX // XOR 8bytes backwards. - MOVQ (SI)(DX*1), DI - MOVQ (CX)(DX*1), AX - XORQ AX, DI - MOVQ DI, (BX)(DX*1) - CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned. - JGE aligned - -ret: - RET diff --git a/sm4/cbc_amd64.go b/sm4/cbc_amd64.go index 6dbf124..eeeb2ad 100644 --- a/sm4/cbc_amd64.go +++ b/sm4/cbc_amd64.go @@ -1,6 +1,10 @@ package sm4 -import "crypto/cipher" +import ( + "crypto/cipher" + + smcipher "github.com/emmansun/gmsm/cipher" +) // Assert that sm4CipherAsm implements the cbcDecAble interfaces. var _ cbcDecAble = (*sm4CipherAsm)(nil) @@ -29,7 +33,7 @@ func (x *cbc) CryptBlocks(dst, src []byte) { if len(dst) < len(src) { panic("crypto/cipher: output smaller than input") } - if InexactOverlap(dst[:len(src)], src) { + if smcipher.InexactOverlap(dst[:len(src)], src) { panic("crypto/cipher: invalid buffer overlap") } if len(src) == 0 { @@ -42,10 +46,10 @@ func (x *cbc) CryptBlocks(dst, src []byte) { var src64 []byte = make([]byte, FourBlocksSize) for start > 0 { encryptBlocksAsm(&x.b.dec[0], &temp[0], &src[start:end][0]) - xorBytes(dst[end-BlockSize:end], temp[FourBlocksSize-BlockSize:FourBlocksSize], src[end-2*BlockSize:end-BlockSize]) - xorBytes(dst[end-2*BlockSize:end-BlockSize], temp[FourBlocksSize-2*BlockSize:FourBlocksSize-BlockSize], src[end-3*BlockSize:end-2*BlockSize]) - xorBytes(dst[end-3*BlockSize:end-2*BlockSize], temp[FourBlocksSize-3*BlockSize:FourBlocksSize-2*BlockSize], src[end-4*BlockSize:end-3*BlockSize]) - xorBytes(dst[end-4*BlockSize:end-3*BlockSize], temp[:BlockSize], src[end-5*BlockSize:end-4*BlockSize]) + smcipher.XorBytes(dst[end-BlockSize:end], temp[FourBlocksSize-BlockSize:FourBlocksSize], src[end-2*BlockSize:end-BlockSize]) + smcipher.XorBytes(dst[end-2*BlockSize:end-BlockSize], temp[FourBlocksSize-2*BlockSize:FourBlocksSize-BlockSize], src[end-3*BlockSize:end-2*BlockSize]) + smcipher.XorBytes(dst[end-3*BlockSize:end-2*BlockSize], temp[FourBlocksSize-3*BlockSize:FourBlocksSize-2*BlockSize], src[end-4*BlockSize:end-3*BlockSize]) + smcipher.XorBytes(dst[end-4*BlockSize:end-3*BlockSize], temp[:BlockSize], src[end-5*BlockSize:end-4*BlockSize]) end = start start -= FourBlocksSize @@ -55,10 +59,10 @@ func (x *cbc) CryptBlocks(dst, src []byte) { encryptBlocksAsm(&x.b.dec[0], &temp[0], &src[:end][0]) count := end / BlockSize for i := count; i > 1; i-- { - xorBytes(dst[end-BlockSize:end], temp[end-BlockSize:end], src[end-2*BlockSize:end-BlockSize]) + smcipher.XorBytes(dst[end-BlockSize:end], temp[end-BlockSize:end], src[end-2*BlockSize:end-BlockSize]) end -= BlockSize } - xorBytes(dst[0:end], temp[0:end], x.iv[:]) + smcipher.XorBytes(dst[0:end], temp[0:end], x.iv[:]) // Set the new iv to the first block we copied earlier. x.iv, x.tmp = x.tmp, x.iv } diff --git a/sm4/cipher.go b/sm4/cipher.go index 9f62120..3e3a175 100644 --- a/sm4/cipher.go +++ b/sm4/cipher.go @@ -3,7 +3,8 @@ package sm4 import ( "crypto/cipher" "fmt" - "unsafe" + + smcipher "github.com/emmansun/gmsm/cipher" ) // BlockSize the sm4 block size in bytes. @@ -47,7 +48,7 @@ func (c *sm4Cipher) Encrypt(dst, src []byte) { if len(dst) < BlockSize { panic("sm4: output not full block") } - if InexactOverlap(dst[:BlockSize], src[:BlockSize]) { + if smcipher.InexactOverlap(dst[:BlockSize], src[:BlockSize]) { panic("sm4: invalid buffer overlap") } encryptBlockGo(c.enc, dst, src) @@ -60,29 +61,8 @@ func (c *sm4Cipher) Decrypt(dst, src []byte) { if len(dst) < BlockSize { panic("sm4: output not full block") } - if InexactOverlap(dst[:BlockSize], src[:BlockSize]) { + if smcipher.InexactOverlap(dst[:BlockSize], src[:BlockSize]) { panic("sm4: invalid buffer overlap") } decryptBlockGo(c.dec, dst, src) } - -// AnyOverlap reports whether x and y share memory at any (not necessarily -// corresponding) index. The memory beyond the slice length is ignored. -func AnyOverlap(x, y []byte) bool { - return len(x) > 0 && len(y) > 0 && - uintptr(unsafe.Pointer(&x[0])) <= uintptr(unsafe.Pointer(&y[len(y)-1])) && - uintptr(unsafe.Pointer(&y[0])) <= uintptr(unsafe.Pointer(&x[len(x)-1])) -} - -// InexactOverlap reports whether x and y share memory at any non-corresponding -// index. The memory beyond the slice length is ignored. Note that x and y can -// have different lengths and still not have any inexact overlap. -// -// InexactOverlap can be used to implement the requirements of the crypto/cipher -// AEAD, Block, BlockMode and Stream interfaces. -func InexactOverlap(x, y []byte) bool { - if len(x) == 0 || len(y) == 0 || &x[0] == &y[0] { - return false - } - return AnyOverlap(x, y) -} diff --git a/sm4/cipher_asm.go b/sm4/cipher_asm.go index 9cfc967..7817144 100644 --- a/sm4/cipher_asm.go +++ b/sm4/cipher_asm.go @@ -5,6 +5,7 @@ package sm4 import ( "crypto/cipher" + smcipher "github.com/emmansun/gmsm/cipher" "golang.org/x/sys/cpu" ) @@ -47,7 +48,7 @@ func (c *sm4CipherAsm) Encrypt(dst, src []byte) { if len(dst) < BlockSize { panic("sm4: output not full block") } - if InexactOverlap(dst[:BlockSize], src[:BlockSize]) { + if smcipher.InexactOverlap(dst[:BlockSize], src[:BlockSize]) { panic("sm4: invalid buffer overlap") } encryptBlockAsm(&c.enc[0], &dst[0], &src[0]) @@ -60,7 +61,7 @@ func (c *sm4CipherAsm) Decrypt(dst, src []byte) { if len(dst) < BlockSize { panic("sm4: output not full block") } - if InexactOverlap(dst[:BlockSize], src[:BlockSize]) { + if smcipher.InexactOverlap(dst[:BlockSize], src[:BlockSize]) { panic("sm4: invalid buffer overlap") } encryptBlockAsm(&c.dec[0], &dst[0], &src[0]) diff --git a/sm4/ctr_amd64.go b/sm4/ctr_amd64.go index e0dad97..6a1f2c8 100644 --- a/sm4/ctr_amd64.go +++ b/sm4/ctr_amd64.go @@ -1,6 +1,10 @@ package sm4 -import "crypto/cipher" +import ( + "crypto/cipher" + + smcipher "github.com/emmansun/gmsm/cipher" +) // Assert that sm4CipherAsm implements the ctrAble interface. var _ ctrAble = (*sm4CipherAsm)(nil) @@ -76,14 +80,14 @@ func (x *ctr) XORKeyStream(dst, src []byte) { if len(dst) < len(src) { panic("crypto/cipher: output smaller than input") } - if InexactOverlap(dst[:len(src)], src) { + if smcipher.InexactOverlap(dst[:len(src)], src) { panic("crypto/cipher: invalid buffer overlap") } for len(src) > 0 { if x.outUsed >= len(x.out)-BlockSize { x.refill() } - n := xorBytes(dst, src, x.out[x.outUsed:]) + n := smcipher.XorBytes(dst, src, x.out[x.outUsed:]) dst = dst[n:] src = src[n:] x.outUsed += n diff --git a/sm4/gcm_amd64.go b/sm4/gcm_amd64.go index 3b216c5..79e3c90 100644 --- a/sm4/gcm_amd64.go +++ b/sm4/gcm_amd64.go @@ -5,6 +5,8 @@ import ( "crypto/subtle" "encoding/binary" "errors" + + smcipher "github.com/emmansun/gmsm/cipher" ) // Assert that sm4CipherAsm implements the gcmAble interface. @@ -80,8 +82,8 @@ func (g *gcm) Seal(dst, nonce, plaintext, data []byte) []byte { panic("crypto/cipher: message too large for GCM") } - ret, out := sliceForAppend(dst, len(plaintext)+g.tagSize) - if InexactOverlap(out, plaintext) { + ret, out := smcipher.SliceForAppend(dst, len(plaintext)+g.tagSize) + if smcipher.InexactOverlap(out, plaintext) { panic("crypto/cipher: invalid buffer overlap") } @@ -131,8 +133,8 @@ func (g *gcm) Open(dst, nonce, ciphertext, data []byte) ([]byte, error) { var expectedTag [gcmTagSize]byte g.auth(expectedTag[:], ciphertext, data, &tagMask) - ret, out := sliceForAppend(dst, len(ciphertext)) - if InexactOverlap(out, ciphertext) { + ret, out := smcipher.SliceForAppend(dst, len(ciphertext)) + if smcipher.InexactOverlap(out, ciphertext) { panic("crypto/cipher: invalid buffer overlap") } @@ -257,21 +259,6 @@ func gcmInc32(counterBlock *[16]byte) { binary.BigEndian.PutUint32(ctr, binary.BigEndian.Uint32(ctr)+1) } -// sliceForAppend takes a slice and a requested number of bytes. It returns a -// slice with the contents of the given slice followed by that many bytes and a -// second slice that aliases into it and contains only the extra bytes. If the -// original slice has sufficient capacity then no allocation is performed. -func sliceForAppend(in []byte, n int) (head, tail []byte) { - if total := len(in) + n; cap(in) >= total { - head = in[:total] - } else { - head = make([]byte, total) - copy(head, in) - } - tail = head[len(in):] - return -} - // counterCrypt crypts in to out using g.cipher in counter mode. func (g *gcm) counterCrypt(out, in []byte, counter *[gcmBlockSize]byte) { var mask [FourBlocksSize]byte @@ -288,7 +275,7 @@ func (g *gcm) counterCrypt(out, in []byte, counter *[gcmBlockSize]byte) { encryptBlocksAsm(&g.cipher.enc[0], &mask[0], &couters[0]) gcmInc32(counter) - xorWords(out, in, mask[:]) + smcipher.XorWords(out, in, mask[:]) out = out[FourBlocksSize:] in = in[FourBlocksSize:] } @@ -300,7 +287,7 @@ func (g *gcm) counterCrypt(out, in []byte, counter *[gcmBlockSize]byte) { gcmInc32(counter) } encryptBlocksAsm(&g.cipher.enc[0], &mask[0], &couters[0]) - xorBytes(out, in, mask[:blocks*gcmBlockSize]) + smcipher.XorBytes(out, in, mask[:blocks*gcmBlockSize]) } } @@ -342,5 +329,5 @@ func (g *gcm) auth(out, ciphertext, additionalData []byte, tagMask *[gcmTagSize] binary.BigEndian.PutUint64(out, y.low) binary.BigEndian.PutUint64(out[8:], y.high) - xorWords(out, out, tagMask[:]) + smcipher.XorWords(out, out, tagMask[:]) } diff --git a/sm4/sm4_gcm.go b/sm4/sm4_gcm.go index 85761dc..bf19f44 100644 --- a/sm4/sm4_gcm.go +++ b/sm4/sm4_gcm.go @@ -4,6 +4,8 @@ package sm4 import ( "crypto/cipher" "crypto/subtle" + + smcipher "github.com/emmansun/gmsm/cipher" ) // sm4CipherGCM implements crypto/cipher.gcmAble so that crypto/cipher.NewGCM @@ -80,8 +82,8 @@ func (g *gcmAsm) Seal(dst, nonce, plaintext, data []byte) []byte { gcmSm4Data(&g.bytesProductTable, data, &tagOut) - ret, out := sliceForAppend(dst, len(plaintext)+g.tagSize) - if InexactOverlap(out[:len(plaintext)], plaintext) { + ret, out := smcipher.SliceForAppend(dst, len(plaintext)+g.tagSize) + if smcipher.InexactOverlap(out[:len(plaintext)], plaintext) { panic("crypto/cipher: invalid buffer overlap") } @@ -136,8 +138,8 @@ func (g *gcmAsm) Open(dst, nonce, ciphertext, data []byte) ([]byte, error) { var expectedTag [gcmTagSize]byte gcmSm4Data(&g.bytesProductTable, data, &expectedTag) - ret, out := sliceForAppend(dst, len(ciphertext)) - if InexactOverlap(out, ciphertext) { + ret, out := smcipher.SliceForAppend(dst, len(ciphertext)) + if smcipher.InexactOverlap(out, ciphertext) { panic("crypto/cipher: invalid buffer overlap") } if len(ciphertext) > 0 { diff --git a/sm4_test/benchmark_test.go b/sm4_test/benchmark_test.go index 686aee0..a7a9523 100644 --- a/sm4_test/benchmark_test.go +++ b/sm4_test/benchmark_test.go @@ -5,6 +5,7 @@ import ( "crypto/cipher" "testing" + smcipher "github.com/emmansun/gmsm/cipher" "github.com/emmansun/gmsm/sm4" ) @@ -270,3 +271,93 @@ func BenchmarkAESGCMOpen8K(b *testing.B) { func BenchmarkSM4GCMOpen8K(b *testing.B) { benchmarkSM4GCMOpen(b, make([]byte, 8*1024)) } + +func benchmarkAESCCMSign(b *testing.B, buf []byte) { + var key [16]byte + c, _ := aes.NewCipher(key[:]) + aesccm, _ := smcipher.NewCCM(c) + benchmarkGCMSign(b, aesccm, buf) +} + +func benchmarkSM4CCMSign(b *testing.B, buf []byte) { + var key [16]byte + c, _ := sm4.NewCipher(key[:]) + sm4ccm, _ := smcipher.NewCCM(c) + benchmarkGCMSign(b, sm4ccm, buf) +} + +func BenchmarkAESCCMSign1K(b *testing.B) { + benchmarkAESCCMSign(b, make([]byte, 1024)) +} + +func BenchmarkSM4CCMSign1K(b *testing.B) { + benchmarkSM4CCMSign(b, make([]byte, 1024)) +} + +func BenchmarkAESCCMSeal1K(b *testing.B) { + benchmarkAESCCMSeal(b, make([]byte, 1024)) +} + +func BenchmarkSM4CCMSeal1K(b *testing.B) { + benchmarkSM4CCMSeal(b, make([]byte, 1024)) +} + +func BenchmarkAESCCMOpen1K(b *testing.B) { + benchmarkAESCCMOpen(b, make([]byte, 1024)) +} + +func BenchmarkSM4CCMOpen1K(b *testing.B) { + benchmarkSM4CCMOpen(b, make([]byte, 1024)) +} + +func BenchmarkAESCCMSign8K(b *testing.B) { + benchmarkAESCCMSign(b, make([]byte, 8*1024)) +} + +func BenchmarkSM4CCMSign8K(b *testing.B) { + benchmarkSM4CCMSign(b, make([]byte, 8*1024)) +} + +func BenchmarkAESCCMSeal8K(b *testing.B) { + benchmarkAESCCMSeal(b, make([]byte, 8*1024)) +} + +func BenchmarkSM4CCMSeal8K(b *testing.B) { + benchmarkSM4CCMSeal(b, make([]byte, 8*1024)) +} + +func BenchmarkAESCCMOpen8K(b *testing.B) { + benchmarkAESCCMOpen(b, make([]byte, 8*1024)) +} + +func BenchmarkSM4CCMOpen8K(b *testing.B) { + benchmarkSM4CCMOpen(b, make([]byte, 8*1024)) +} + +func benchmarkAESCCMSeal(b *testing.B, buf []byte) { + var key [16]byte + c, _ := aes.NewCipher(key[:]) + sm4gcm, _ := smcipher.NewCCM(c) + benchmarkGCMSeal(b, sm4gcm, buf) +} + +func benchmarkSM4CCMSeal(b *testing.B, buf []byte) { + var key [16]byte + c, _ := sm4.NewCipher(key[:]) + sm4gcm, _ := smcipher.NewCCM(c) + benchmarkGCMSeal(b, sm4gcm, buf) +} + +func benchmarkAESCCMOpen(b *testing.B, buf []byte) { + var key [16]byte + c, _ := aes.NewCipher(key[:]) + sm4gcm, _ := smcipher.NewCCM(c) + benchmarkGCMOpen(b, sm4gcm, buf) +} + +func benchmarkSM4CCMOpen(b *testing.B, buf []byte) { + var key [16]byte + c, _ := sm4.NewCipher(key[:]) + sm4gcm, _ := smcipher.NewCCM(c) + benchmarkGCMOpen(b, sm4gcm, buf) +}