diff --git a/doc/crypto.dox b/doc/crypto.dox
index f02c8979..bfcee113 100644
--- a/doc/crypto.dox
+++ b/doc/crypto.dox
@@ -86,12 +86,15 @@ Ardunino Mega 2560 running at 16 MHz are similar:
|
AEAD Algorithm | Encryption (per byte) | Decryption (per byte) | Key Setup | State Size (bytes) |
ChaChaPoly | 41.20us | 41.19us | 902.36us | 221 |
-GCM<AES128> | 183.25us | 182.80us | 1272.73us | 284 |
-GCM<AES192> | 189.92us | 189.47us | 1492.60us | 316 |
-GCM<AES256> | 196.59us | 196.13us | 1767.33us | 348 |
-EAX<AES128> | 71.14us | 71.14us | 1329.44us | 268 |
-EAX<Speck> (128-bit key) | 26.01us | 26.01us | 735.46us | 362 |
-EAX<SpeckLowMemory> (128-bit key) | 75.08us | 75.07us | 1243.66us | 122 |
+GCM<AES128> | 109.71us | 109.26us | 1265.69us | 284 |
+GCM<AES192> | 116.38us | 115.92us | 1485.56us | 316 |
+GCM<AES256> | 123.04us | 122.59us | 1760.28us | 348 |
+GCM<Speck> (256-bit key) | 87.78us | 87.32us | 714.41us | 378 |
+GCM<SpeckLowMemory> (256-bit key) | 114.30us | 113.84us | 1270.32us | 138 |
+EAX<AES128> | 71.14us | 71.14us | 1311.97us | 268 |
+EAX<AES256> | 97.80us | 97.80us | 1806.57us | 332 |
+EAX<Speck> (256-bit key) | 27.27us | 27.26us | 760.74us | 362 |
+EAX<SpeckLowMemory> (256-bit key) | 80.31us | 80.31us | 1316.60us | 122 |
|
Hash Algorithm | Hashing (per byte) | Finalization | | State Size (bytes) |
SHA256 | 43.85us | 2841.04us | | 107 |
@@ -105,7 +108,7 @@ Ardunino Mega 2560 running at 16 MHz are similar:
SHA256 (HMAC mode) | 43.85us | 8552.61us | 2836.49us | 107 |
BLAKE2s (HMAC mode) | 20.65us | 4055.56us | 1350.00us | 107 |
Poly1305 | 26.26us | 489.11us | 17.06us | 53 |
-GHASH | 148.14us | 17.09us | 21.87us | 33 |
+GHASH | 74.59us | 15.91us | 14.79us | 33 |
|
Public Key Operation | Time (per operation) | Comment |
Curve25519::eval() | 3119ms | Raw curve evaluation |
@@ -141,12 +144,15 @@ All figures are for the Arduino Due running at 84 MHz:
|
AEAD Algorithm | Encryption (per byte) | Decryption (per byte) | Key Setup | State Size (bytes) |
ChaChaPoly | 1.71us | 1.71us | 45.08us | 240 |
-GCM<AES128> | 10.29us | 10.29us | 223.82us | 312 |
-GCM<AES192> | 11.50us | 11.51us | 265.62us | 344 |
-GCM<AES256> | 12.67us | 12.67us | 313.06us | 376 |
-EAX<AES128> | 12.29us | 12.29us | 236.47us | 280 |
-EAX<Speck> (128-bit key) | 2.65us | 2.65us | 79.46us | 384 |
-EAX<SpeckLowMemory> (128-bit key) | 6.29us | 6.29us | 106.60us | 144 |
+GCM<AES128> | 10.90us | 10.90us | 248.83us | 312 |
+GCM<AES192> | 12.30us | 12.31us | 296.83us | 344 |
+GCM<AES256> | 13.66us | 13.67us | 350.25us | 376 |
+GCM<Speck> (256-bit key) | 5.27us | 5.28us | 75.31us | 408 |
+GCM<SpeckLowMemory> (256-bit key) | 7.06us | 7.07us | 94.20us | 168 |
+EAX<AES128> | 12.33us | 12.33us | 234.91us | 280 |
+EAX<AES256> | 16.99us | 16.99us | 322.92us | 344 |
+EAX<Speck> (256-bit key) | 2.80us | 2.80us | 81.63us | 384 |
+EAX<SpeckLowMemory> (256-bit key) | 6.69us | 6.69us | 110.91us | 144 |
|
Hash Algorithm | Hashing (per byte) | Finalization | | State Size (bytes) |
SHA256 | 1.15us | 76.60us | | 120 |
@@ -160,7 +166,7 @@ All figures are for the Arduino Due running at 84 MHz:
SHA256 (HMAC mode) | 1.15us | 238.98us | 80.44us | 120 |
BLAKE2s (HMAC mode) | 0.72us | 157.75us | 57.18us | 120 |
Poly1305 | 0.81us | 19.01us | 2.57us | 60 |
-GHASH | 4.37us | 1.50us | 4.37us | 36 |
+GHASH | 4.47us | 1.52us | 2.60us | 36 |
|
Public Key Operation | Time (per operation) | Comment |
Curve25519::eval() | 103ms | Raw curve evaluation |
diff --git a/libraries/Crypto/EAX.cpp b/libraries/Crypto/EAX.cpp
index 42db3b37..dc14b2ae 100644
--- a/libraries/Crypto/EAX.cpp
+++ b/libraries/Crypto/EAX.cpp
@@ -21,6 +21,7 @@
*/
#include "EAX.h"
+#include "GF128.h"
#include "Crypto.h"
#include
@@ -145,19 +146,6 @@ void EAXCommon::clear()
clean(state);
}
-// Doubles a 128-bit value in the GF(2^128) field.
-static void gfDouble(uint8_t value[16])
-{
- uint16_t temp = 0;
- for (uint8_t index = 16; index > 0; ) {
- --index;
- temp |= (((uint16_t)(value[index])) << 1);
- value[index] = (uint8_t)temp;
- temp >>= 8;
- }
- value[15] ^= (uint8_t)((-temp) & 0x87);
-}
-
/**
* \brief Initialises the first OMAC hashing context and creates the B value.
*
@@ -175,7 +163,7 @@ void EAXCommon::omacInitFirst(uint8_t omac[16])
// Generate the B value from the encrypted block of zeroes.
// We will need this later when finalising the OMAC hashes.
memcpy(state.b, omac, 16);
- gfDouble(state.b);
+ GF128::dblEAX(state.b);
}
/**
@@ -230,17 +218,17 @@ void EAXCommon::omacFinal(uint8_t omac[16])
// Apply padding if necessary.
if (state.authPosn != 16) {
// Need padding: XOR with P = 2 * B.
- uint8_t p[16];
+ uint32_t p[4];
memcpy(p, state.b, 16);
- gfDouble(p);
+ GF128::dblEAX(p);
omac[state.authPosn] ^= 0x80;
for (uint8_t index = 0; index < 16; ++index)
- omac[index] ^= p[index];
+ omac[index] ^= ((const uint8_t *)p)[index];
clean(p);
} else {
// No padding necessary: XOR with B.
for (uint8_t index = 0; index < 16; ++index)
- omac[index] ^= state.b[index];
+ omac[index] ^= ((const uint8_t *)(state.b))[index];
}
// Encrypt the hash to get the final OMAC value.
diff --git a/libraries/Crypto/EAX.h b/libraries/Crypto/EAX.h
index 1499b744..32e2a83c 100644
--- a/libraries/Crypto/EAX.h
+++ b/libraries/Crypto/EAX.h
@@ -59,7 +59,7 @@ private:
uint8_t stream[16];
uint8_t tag[16];
uint8_t hash[16];
- uint8_t b[16];
+ uint32_t b[4];
uint8_t encPosn;
uint8_t authPosn;
uint8_t authMode;
diff --git a/libraries/Crypto/GF128.cpp b/libraries/Crypto/GF128.cpp
new file mode 100644
index 00000000..3924392d
--- /dev/null
+++ b/libraries/Crypto/GF128.cpp
@@ -0,0 +1,480 @@
+/*
+ * Copyright (C) 2016 Southern Storm Software, Pty Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "GF128.h"
+#include "utility/EndianUtil.h"
+#include
+
+/**
+ * \class GF128 GF128.h
+ * \brief Operations in the Galois field GF(2^128).
+ *
+ * This class contains helper functions for performing operations in
+ * the Galois field GF(2^128) which is used as the basis of GCM and GHASH.
+ * These functions are provided for use by other cryptographic protocols
+ * that make use of GF(2^128).
+ *
+ * Most of the functions in this class use the field, polynomial, and
+ * byte ordering conventions described in NIST SP 800-38D (GCM). The one
+ * exception is dblEAX() which uses the conventions of EAX mode instead.
+ *
+ * References: NIST SP 800-38D
+ *
+ * \sa GCM, GHASH
+ */
+
+/**
+ * \brief Initialize multiplication in the GF(2^128) field.
+ *
+ * \param H The hash state to be initialized.
+ * \param key Points to the 16 byte authentication key which is assumed
+ * to be in big-endian byte order.
+ *
+ * This function and the companion mul() are intended for use by other
+ * classes that need access to the raw GF(2^128) field multiplication of
+ * GHASH without the overhead of GHASH itself.
+ *
+ * \sa mul(), dbl()
+ */
+void GF128::mulInit(uint32_t H[4], const void *key)
+{
+#if defined(__AVR__)
+ // Copy the key into H but leave it in big endian order because
+ // we can correct for the byte order in mul() below.
+ memcpy(H, key, 16);
+#else
+ // Copy the key into H and convert from big endian to host order.
+ memcpy(H, key, 16);
+#if defined(CRYPTO_LITTLE_ENDIAN)
+ H[0] = be32toh(H[0]);
+ H[1] = be32toh(H[1]);
+ H[2] = be32toh(H[2]);
+ H[3] = be32toh(H[3]);
+#endif
+#endif
+}
+
+/**
+ * \brief Perform a multiplication in the GF(2^128) field.
+ *
+ * \param Y The first value to multiply, and the result. This array is
+ * assumed to be in big-endian order on entry and exit.
+ * \param H The second value to multiply, which must have been initialized
+ * by the mulInit() function.
+ *
+ * This function and the companion mulInit() are intended for use by other
+ * classes that need access to the raw GF(2^128) field multiplication of
+ * GHASH without the overhead of GHASH itself.
+ *
+ * \sa mulInit(), dbl()
+ */
+void GF128::mul(uint32_t Y[4], const uint32_t H[4])
+{
+#if defined(__AVR__)
+ uint32_t Z[4] = {0, 0, 0, 0}; // Z = 0
+ uint32_t V0 = H[0]; // V = H
+ uint32_t V1 = H[1];
+ uint32_t V2 = H[2];
+ uint32_t V3 = H[3];
+
+ // Multiply Z by V for the set bits in Y, starting at the top.
+ // This is a very simple bit by bit version that may not be very
+ // fast but it should be resistant to cache timing attacks.
+ for (uint8_t posn = 0; posn < 16; ++posn) {
+ uint8_t value = ((const uint8_t *)Y)[posn];
+ for (uint8_t bit = 0; bit < 8; ++bit) {
+ __asm__ __volatile__ (
+ // Extract the high bit of "value" and turn it into a mask.
+ "ldd r24,%8\n"
+ "lsl r24\n"
+ "std %8,r24\n"
+ "mov __tmp_reg__,__zero_reg__\n"
+ "sbc __tmp_reg__,__zero_reg__\n"
+
+ // XOR V with Z if the bit is 1.
+ "mov r24,%D0\n" // Z0 ^= (V0 & mask)
+ "and r24,__tmp_reg__\n"
+ "ldd r25,%D4\n"
+ "eor r25,r24\n"
+ "std %D4,r25\n"
+ "mov r24,%C0\n"
+ "and r24,__tmp_reg__\n"
+ "ldd r25,%C4\n"
+ "eor r25,r24\n"
+ "std %C4,r25\n"
+ "mov r24,%B0\n"
+ "and r24,__tmp_reg__\n"
+ "ldd r25,%B4\n"
+ "eor r25,r24\n"
+ "std %B4,r25\n"
+ "mov r24,%A0\n"
+ "and r24,__tmp_reg__\n"
+ "ldd r25,%A4\n"
+ "eor r25,r24\n"
+ "std %A4,r25\n"
+ "mov r24,%D1\n" // Z1 ^= (V1 & mask)
+ "and r24,__tmp_reg__\n"
+ "ldd r25,%D5\n"
+ "eor r25,r24\n"
+ "std %D5,r25\n"
+ "mov r24,%C1\n"
+ "and r24,__tmp_reg__\n"
+ "ldd r25,%C5\n"
+ "eor r25,r24\n"
+ "std %C5,r25\n"
+ "mov r24,%B1\n"
+ "and r24,__tmp_reg__\n"
+ "ldd r25,%B5\n"
+ "eor r25,r24\n"
+ "std %B5,r25\n"
+ "mov r24,%A1\n"
+ "and r24,__tmp_reg__\n"
+ "ldd r25,%A5\n"
+ "eor r25,r24\n"
+ "std %A5,r25\n"
+ "mov r24,%D2\n" // Z2 ^= (V2 & mask)
+ "and r24,__tmp_reg__\n"
+ "ldd r25,%D6\n"
+ "eor r25,r24\n"
+ "std %D6,r25\n"
+ "mov r24,%C2\n"
+ "and r24,__tmp_reg__\n"
+ "ldd r25,%C6\n"
+ "eor r25,r24\n"
+ "std %C6,r25\n"
+ "mov r24,%B2\n"
+ "and r24,__tmp_reg__\n"
+ "ldd r25,%B6\n"
+ "eor r25,r24\n"
+ "std %B6,r25\n"
+ "mov r24,%A2\n"
+ "and r24,__tmp_reg__\n"
+ "ldd r25,%A6\n"
+ "eor r25,r24\n"
+ "std %A6,r25\n"
+ "mov r24,%D3\n" // Z3 ^= (V3 & mask)
+ "and r24,__tmp_reg__\n"
+ "ldd r25,%D7\n"
+ "eor r25,r24\n"
+ "std %D7,r25\n"
+ "mov r24,%C3\n"
+ "and r24,__tmp_reg__\n"
+ "ldd r25,%C7\n"
+ "eor r25,r24\n"
+ "std %C7,r25\n"
+ "mov r24,%B3\n"
+ "and r24,__tmp_reg__\n"
+ "ldd r25,%B7\n"
+ "eor r25,r24\n"
+ "std %B7,r25\n"
+ "mov r24,%A3\n"
+ "and r24,__tmp_reg__\n"
+ "ldd r25,%A7\n"
+ "eor r25,r24\n"
+ "std %A7,r25\n"
+
+ // Rotate V right by 1 bit.
+ "lsr %A0\n"
+ "ror %B0\n"
+ "ror %C0\n"
+ "ror %D0\n"
+ "ror %A1\n"
+ "ror %B1\n"
+ "ror %C1\n"
+ "ror %D1\n"
+ "ror %A2\n"
+ "ror %B2\n"
+ "ror %C2\n"
+ "ror %D2\n"
+ "ror %A3\n"
+ "ror %B3\n"
+ "ror %C3\n"
+ "ror %D3\n"
+ "mov r24,__zero_reg__\n"
+ "sbc r24,__zero_reg__\n"
+ "andi r24,0xE1\n"
+ "eor %A0,r24\n"
+ : "+r"(V0), "+r"(V1), "+r"(V2), "+r"(V3)
+ : "Q"(Z[0]), "Q"(Z[1]), "Q"(Z[2]), "Q"(Z[3]), "Q"(value)
+ : "r24", "r25"
+ );
+ }
+ }
+
+ // We have finished the block so copy Z into Y and byte-swap.
+ __asm__ __volatile__ (
+ "ldd __tmp_reg__,%A0\n"
+ "st X+,__tmp_reg__\n"
+ "ldd __tmp_reg__,%B0\n"
+ "st X+,__tmp_reg__\n"
+ "ldd __tmp_reg__,%C0\n"
+ "st X+,__tmp_reg__\n"
+ "ldd __tmp_reg__,%D0\n"
+ "st X+,__tmp_reg__\n"
+ "ldd __tmp_reg__,%A1\n"
+ "st X+,__tmp_reg__\n"
+ "ldd __tmp_reg__,%B1\n"
+ "st X+,__tmp_reg__\n"
+ "ldd __tmp_reg__,%C1\n"
+ "st X+,__tmp_reg__\n"
+ "ldd __tmp_reg__,%D1\n"
+ "st X+,__tmp_reg__\n"
+ "ldd __tmp_reg__,%A2\n"
+ "st X+,__tmp_reg__\n"
+ "ldd __tmp_reg__,%B2\n"
+ "st X+,__tmp_reg__\n"
+ "ldd __tmp_reg__,%C2\n"
+ "st X+,__tmp_reg__\n"
+ "ldd __tmp_reg__,%D2\n"
+ "st X+,__tmp_reg__\n"
+ "ldd __tmp_reg__,%A3\n"
+ "st X+,__tmp_reg__\n"
+ "ldd __tmp_reg__,%B3\n"
+ "st X+,__tmp_reg__\n"
+ "ldd __tmp_reg__,%C3\n"
+ "st X+,__tmp_reg__\n"
+ "ldd __tmp_reg__,%D3\n"
+ "st X,__tmp_reg__\n"
+ : : "Q"(Z[0]), "Q"(Z[1]), "Q"(Z[2]), "Q"(Z[3]), "x"(Y)
+ );
+#else // !__AVR__
+ uint32_t Z0 = 0; // Z = 0
+ uint32_t Z1 = 0;
+ uint32_t Z2 = 0;
+ uint32_t Z3 = 0;
+ uint32_t V0 = H[0]; // V = H
+ uint32_t V1 = H[1];
+ uint32_t V2 = H[2];
+ uint32_t V3 = H[3];
+
+ // Multiply Z by V for the set bits in Y, starting at the top.
+ // This is a very simple bit by bit version that may not be very
+ // fast but it should be resistant to cache timing attacks.
+ for (uint8_t posn = 0; posn < 16; ++posn) {
+ uint8_t value = ((const uint8_t *)Y)[posn];
+ for (uint8_t bit = 0; bit < 8; ++bit, value <<= 1) {
+ // Extract the high bit of "value" and turn it into a mask.
+ uint32_t mask = (~((uint32_t)(value >> 7))) + 1;
+
+ // XOR V with Z if the bit is 1.
+ Z0 ^= (V0 & mask);
+ Z1 ^= (V1 & mask);
+ Z2 ^= (V2 & mask);
+ Z3 ^= (V3 & mask);
+
+ // Rotate V right by 1 bit.
+ mask = ((~(V3 & 0x01)) + 1) & 0xE1000000;
+ V3 = (V3 >> 1) | (V2 << 31);
+ V2 = (V2 >> 1) | (V1 << 31);
+ V1 = (V1 >> 1) | (V0 << 31);
+ V0 = (V0 >> 1) ^ mask;
+ }
+ }
+
+ // We have finished the block so copy Z into Y and byte-swap.
+ Y[0] = htobe32(Z0);
+ Y[1] = htobe32(Z1);
+ Y[2] = htobe32(Z2);
+ Y[3] = htobe32(Z3);
+#endif // !__AVR__
+}
+
+/**
+ * \brief Doubles a value in the GF(2^128) field.
+ *
+ * \param V The value to double, and the result. This array is
+ * assumed to be in big-endian order on entry and exit.
+ *
+ * Block cipher modes such as XEX
+ * are similar to CTR mode but instead of incrementing the nonce every
+ * block, the modes multiply the nonce by 2 in the GF(2^128) field every
+ * block. This function is provided to help with implementing such modes.
+ *
+ * \sa dblEAX(), mul()
+ */
+void GF128::dbl(uint32_t V[4])
+{
+#if defined(__AVR__)
+ __asm__ __volatile__ (
+ "ld r16,Z\n"
+ "ldd r17,Z+1\n"
+ "ldd r18,Z+2\n"
+ "ldd r19,Z+3\n"
+ "lsr r16\n"
+ "ror r17\n"
+ "ror r18\n"
+ "ror r19\n"
+ "std Z+1,r17\n"
+ "std Z+2,r18\n"
+ "std Z+3,r19\n"
+ "ldd r17,Z+4\n"
+ "ldd r18,Z+5\n"
+ "ldd r19,Z+6\n"
+ "ldd r20,Z+7\n"
+ "ror r17\n"
+ "ror r18\n"
+ "ror r19\n"
+ "ror r20\n"
+ "std Z+4,r17\n"
+ "std Z+5,r18\n"
+ "std Z+6,r19\n"
+ "std Z+7,r20\n"
+ "ldd r17,Z+8\n"
+ "ldd r18,Z+9\n"
+ "ldd r19,Z+10\n"
+ "ldd r20,Z+11\n"
+ "ror r17\n"
+ "ror r18\n"
+ "ror r19\n"
+ "ror r20\n"
+ "std Z+8,r17\n"
+ "std Z+9,r18\n"
+ "std Z+10,r19\n"
+ "std Z+11,r20\n"
+ "ldd r17,Z+12\n"
+ "ldd r18,Z+13\n"
+ "ldd r19,Z+14\n"
+ "ldd r20,Z+15\n"
+ "ror r17\n"
+ "ror r18\n"
+ "ror r19\n"
+ "ror r20\n"
+ "std Z+12,r17\n"
+ "std Z+13,r18\n"
+ "std Z+14,r19\n"
+ "std Z+15,r20\n"
+ "mov r17,__zero_reg__\n"
+ "sbc r17,__zero_reg__\n"
+ "andi r17,0xE1\n"
+ "eor r16,r17\n"
+ "st Z,r16\n"
+ : : "z"(V)
+ : "r16", "r17", "r18", "r19", "r20"
+ );
+#else
+ uint32_t V0 = be32toh(V[0]);
+ uint32_t V1 = be32toh(V[1]);
+ uint32_t V2 = be32toh(V[2]);
+ uint32_t V3 = be32toh(V[3]);
+ uint32_t mask = ((~(V3 & 0x01)) + 1) & 0xE1000000;
+ V3 = (V3 >> 1) | (V2 << 31);
+ V2 = (V2 >> 1) | (V1 << 31);
+ V1 = (V1 >> 1) | (V0 << 31);
+ V0 = (V0 >> 1) ^ mask;
+ V[0] = htobe32(V0);
+ V[1] = htobe32(V1);
+ V[2] = htobe32(V2);
+ V[3] = htobe32(V3);
+#endif
+}
+
+/**
+ * \brief Doubles a value in the GF(2^128) field using EAX conventions.
+ *
+ * \param V The value to double, and the result. This array is
+ * assumed to be in big-endian order on entry and exit.
+ *
+ * This function differs from dbl() that it uses the conventions of EAX mode
+ * instead of those of NIST SP 800-38D (GCM). The two operations have
+ * equivalent security but the bits are ordered differently with the
+ * value shifted left instead of right.
+ *
+ * References: https://en.wikipedia.org/wiki/EAX_mode,
+ * http://web.cs.ucdavis.edu/~rogaway/papers/eax.html
+ *
+ * \sa dbl(), mul()
+ */
+void GF128::dblEAX(uint32_t V[4])
+{
+#if defined(__AVR__)
+ __asm__ __volatile__ (
+ "ldd r16,Z+15\n"
+ "ldd r17,Z+14\n"
+ "ldd r18,Z+13\n"
+ "ldd r19,Z+12\n"
+ "lsl r16\n"
+ "rol r17\n"
+ "rol r18\n"
+ "rol r19\n"
+ "std Z+14,r17\n"
+ "std Z+13,r18\n"
+ "std Z+12,r19\n"
+ "ldd r17,Z+11\n"
+ "ldd r18,Z+10\n"
+ "ldd r19,Z+9\n"
+ "ldd r20,Z+8\n"
+ "rol r17\n"
+ "rol r18\n"
+ "rol r19\n"
+ "rol r20\n"
+ "std Z+11,r17\n"
+ "std Z+10,r18\n"
+ "std Z+9,r19\n"
+ "std Z+8,r20\n"
+ "ldd r17,Z+7\n"
+ "ldd r18,Z+6\n"
+ "ldd r19,Z+5\n"
+ "ldd r20,Z+4\n"
+ "rol r17\n"
+ "rol r18\n"
+ "rol r19\n"
+ "rol r20\n"
+ "std Z+7,r17\n"
+ "std Z+6,r18\n"
+ "std Z+5,r19\n"
+ "std Z+4,r20\n"
+ "ldd r17,Z+3\n"
+ "ldd r18,Z+2\n"
+ "ldd r19,Z+1\n"
+ "ld r20,Z\n"
+ "rol r17\n"
+ "rol r18\n"
+ "rol r19\n"
+ "rol r20\n"
+ "std Z+3,r17\n"
+ "std Z+2,r18\n"
+ "std Z+1,r19\n"
+ "st Z,r20\n"
+ "mov r17,__zero_reg__\n"
+ "sbc r17,__zero_reg__\n"
+ "andi r17,0x87\n"
+ "eor r16,r17\n"
+ "std Z+15,r16\n"
+ : : "z"(V)
+ : "r16", "r17", "r18", "r19", "r20"
+ );
+#else
+ uint32_t V0 = be32toh(V[0]);
+ uint32_t V1 = be32toh(V[1]);
+ uint32_t V2 = be32toh(V[2]);
+ uint32_t V3 = be32toh(V[3]);
+ uint32_t mask = ((~(V0 >> 31)) + 1) & 0x00000087;
+ V0 = (V0 << 1) | (V1 >> 31);
+ V1 = (V1 << 1) | (V2 >> 31);
+ V2 = (V2 << 1) | (V3 >> 31);
+ V3 = (V3 << 1) ^ mask;
+ V[0] = htobe32(V0);
+ V[1] = htobe32(V1);
+ V[2] = htobe32(V2);
+ V[3] = htobe32(V3);
+#endif
+}
diff --git a/libraries/Crypto/GF128.h b/libraries/Crypto/GF128.h
new file mode 100644
index 00000000..715daa98
--- /dev/null
+++ b/libraries/Crypto/GF128.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2016 Southern Storm Software, Pty Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef CRYPTO_GF128_h
+#define CRYPTO_GF128_h
+
+#include
+
+class GF128
+{
+private:
+ GF128() {}
+ ~GF128() {}
+
+public:
+ static void mulInit(uint32_t H[4], const void *key);
+ static void mul(uint32_t Y[4], const uint32_t H[4]);
+ static void dbl(uint32_t V[4]);
+ static void dblEAX(uint32_t V[4]);
+};
+
+#endif
diff --git a/libraries/Crypto/GHASH.cpp b/libraries/Crypto/GHASH.cpp
index f8fd6797..6853f081 100644
--- a/libraries/Crypto/GHASH.cpp
+++ b/libraries/Crypto/GHASH.cpp
@@ -21,8 +21,8 @@
*/
#include "GHASH.h"
+#include "GF128.h"
#include "Crypto.h"
-#include "utility/EndianUtil.h"
#include
/**
@@ -66,16 +66,7 @@ GHASH::~GHASH()
*/
void GHASH::reset(const void *key)
{
- // Copy the key into H and convert from big endian to host order.
- memcpy(state.H, key, 16);
-#if defined(CRYPTO_LITTLE_ENDIAN)
- state.H[0] = be32toh(state.H[0]);
- state.H[1] = be32toh(state.H[1]);
- state.H[2] = be32toh(state.H[2]);
- state.H[3] = be32toh(state.H[3]);
-#endif
-
- // Reset the hash.
+ GF128::mulInit(state.H, key);
memset(state.Y, 0, sizeof(state.Y));
state.posn = 0;
}
@@ -106,7 +97,7 @@ void GHASH::update(const void *data, size_t len)
len -= size;
d += size;
if (state.posn == 16) {
- processChunk();
+ GF128::mul(state.Y, state.H);
state.posn = 0;
}
}
@@ -148,7 +139,7 @@ void GHASH::pad()
if (state.posn != 0) {
// Padding involves XOR'ing the rest of state.Y with zeroes,
// which does nothing. Immediately process the next chunk.
- processChunk();
+ GF128::mul(state.Y, state.H);
state.posn = 0;
}
}
@@ -160,45 +151,3 @@ void GHASH::clear()
{
clean(state);
}
-
-void GHASH::processChunk()
-{
- uint32_t Z0 = 0; // Z = 0
- uint32_t Z1 = 0;
- uint32_t Z2 = 0;
- uint32_t Z3 = 0;
- uint32_t V0 = state.H[0]; // V = H
- uint32_t V1 = state.H[1];
- uint32_t V2 = state.H[2];
- uint32_t V3 = state.H[3];
-
- // Multiply Z by V for the set bits in Y, starting at the top.
- // This is a very simple bit by bit version that may not be very
- // fast but it should be resistant to cache timing attacks.
- for (uint8_t posn = 0; posn < 16; ++posn) {
- uint8_t value = ((const uint8_t *)state.Y)[posn];
- for (uint8_t bit = 0; bit < 8; ++bit, value <<= 1) {
- // Extract the high bit of "value" and turn it into a mask.
- uint32_t mask = (~((uint32_t)(value >> 7))) + 1;
-
- // XOR V with Z if the bit is 1.
- Z0 ^= (V0 & mask);
- Z1 ^= (V1 & mask);
- Z2 ^= (V2 & mask);
- Z3 ^= (V3 & mask);
-
- // Rotate V right by 1 bit.
- mask = ((~(V3 & 0x01)) + 1) & 0xE1000000;
- V3 = (V3 >> 1) | (V2 << 31);
- V2 = (V2 >> 1) | (V1 << 31);
- V1 = (V1 >> 1) | (V0 << 31);
- V0 = (V0 >> 1) ^ mask;
- }
- }
-
- // We have finished the block so copy Z into Y and byte-swap.
- state.Y[0] = htobe32(Z0);
- state.Y[1] = htobe32(Z1);
- state.Y[2] = htobe32(Z2);
- state.Y[3] = htobe32(Z3);
-}
diff --git a/libraries/Crypto/GHASH.h b/libraries/Crypto/GHASH.h
index 07a84bfa..20677685 100644
--- a/libraries/Crypto/GHASH.h
+++ b/libraries/Crypto/GHASH.h
@@ -46,8 +46,6 @@ private:
uint32_t Y[4];
uint8_t posn;
} state;
-
- void processChunk();
};
#endif
diff --git a/libraries/Crypto/examples/TestEAX/TestEAX.ino b/libraries/Crypto/examples/TestEAX/TestEAX.ino
index 89e6f5f4..e6ac415f 100644
--- a/libraries/Crypto/examples/TestEAX/TestEAX.ino
+++ b/libraries/Crypto/examples/TestEAX/TestEAX.ino
@@ -233,6 +233,7 @@ static TestVector const testVectorEAX10 PROGMEM = {
TestVector testVector;
EAX *eax;
+EAX *eax256;
EAX *eaxSpeck;
EAX *eaxSpeckLowMemory;
@@ -353,7 +354,7 @@ void perfCipherSetKey(AuthenticatedCipher *cipher, const struct TestVector *test
start = micros();
for (count = 0; count < 1000; ++count) {
- cipher->setKey(test->key, 16);
+ cipher->setKey(test->key, cipher->keySize());
cipher->setIV(test->iv, test->ivsize);
}
elapsed = micros() - start;
@@ -378,7 +379,7 @@ void perfCipherEncrypt(AuthenticatedCipher *cipher, const struct TestVector *tes
Serial.print(test->name);
Serial.print(" Encrypt ... ");
- cipher->setKey(test->key, 16);
+ cipher->setKey(test->key, cipher->keySize());
cipher->setIV(test->iv, test->ivsize);
start = micros();
for (count = 0; count < 500; ++count) {
@@ -406,7 +407,7 @@ void perfCipherDecrypt(AuthenticatedCipher *cipher, const struct TestVector *tes
Serial.print(test->name);
Serial.print(" Decrypt ... ");
- cipher->setKey(test->key, 16);
+ cipher->setKey(test->key, cipher->keySize());
cipher->setIV(test->iv, test->ivsize);
start = micros();
for (count = 0; count < 500; ++count) {
@@ -434,7 +435,7 @@ void perfCipherAddAuthData(AuthenticatedCipher *cipher, const struct TestVector
Serial.print(test->name);
Serial.print(" AddAuthData ... ");
- cipher->setKey(test->key, 16);
+ cipher->setKey(test->key, cipher->keySize());
cipher->setIV(test->iv, test->ivsize);
start = micros();
memset(buffer, 0xBA, 128);
@@ -463,7 +464,7 @@ void perfCipherComputeTag(AuthenticatedCipher *cipher, const struct TestVector *
Serial.print(test->name);
Serial.print(" ComputeTag ... ");
- cipher->setKey(test->key, 16);
+ cipher->setKey(test->key, cipher->keySize());
cipher->setIV(test->iv, test->ivsize);
start = micros();
for (count = 0; count < 1000; ++count) {
@@ -495,6 +496,8 @@ void setup()
Serial.println("State Sizes:");
Serial.print("EAX ... ");
Serial.println(sizeof(*eax));
+ Serial.print("EAX ... ");
+ Serial.println(sizeof(*eax256));
Serial.print("EAX ... ");
Serial.println(sizeof(*eaxSpeck));
Serial.print("EAX ... ");
@@ -520,6 +523,10 @@ void setup()
perfCipher(eax, &testVectorEAX1, "AES-128");
Serial.println();
delete eax;
+ eax256 = new EAX();
+ perfCipher(eax, &testVectorEAX1, "AES-256");
+ Serial.println();
+ delete eax256;
eaxSpeck = new EAX();
perfCipher(eaxSpeck, &testVectorEAX1, "Speck");
Serial.println();
diff --git a/libraries/Crypto/examples/TestGCM/TestGCM.ino b/libraries/Crypto/examples/TestGCM/TestGCM.ino
index dd015615..7084c6f8 100644
--- a/libraries/Crypto/examples/TestGCM/TestGCM.ino
+++ b/libraries/Crypto/examples/TestGCM/TestGCM.ino
@@ -26,10 +26,19 @@ This example runs tests on the GCM implementation to verify correct behaviour.
#include
#include
+#include
+#include
#include
#include
#include
+// There isn't enough memory to test both AES and Speck on the Uno,
+// so disable Speck testing on AVR platforms unless explicitly enabled.
+// When enabled, some of the AES tests are disabled to reclaim memory.
+#if defined(__AVR__)
+//#define TEST_SPECK 1
+#endif
+
#define MAX_PLAINTEXT_LEN 64
struct TestVector
@@ -65,6 +74,7 @@ static TestVector const testVectorGCM1 PROGMEM = {
.tagsize = 16,
.ivsize = 12
};
+#ifndef TEST_SPECK
static TestVector const testVectorGCM2 PROGMEM = {
.name = "AES-128 GCM #2",
.key = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -176,6 +186,7 @@ static TestVector const testVectorGCM5 PROGMEM = {
.tagsize = 16,
.ivsize = 8
};
+#endif // !TEST_SPECK
static TestVector const testVectorGCM10 PROGMEM = {
.name = "AES-192 GCM #10",
.key = {0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
@@ -249,6 +260,8 @@ TestVector testVector;
GCM *gcmaes128 = 0;
GCM *gcmaes192 = 0;
GCM *gcmaes256 = 0;
+GCM *gcmspeck = 0;
+GCM *gcmspecklm = 0;
byte buffer[128];
@@ -348,7 +361,7 @@ void testCipher(AuthenticatedCipher *cipher, const struct TestVector *test)
Serial.println("Failed");
}
-void perfCipherSetKey(AuthenticatedCipher *cipher, const struct TestVector *test)
+void perfCipherSetKey(AuthenticatedCipher *cipher, const struct TestVector *test, const char *name)
{
unsigned long start;
unsigned long elapsed;
@@ -357,7 +370,7 @@ void perfCipherSetKey(AuthenticatedCipher *cipher, const struct TestVector *test
memcpy_P(&testVector, test, sizeof(TestVector));
test = &testVector;
- Serial.print(test->name);
+ Serial.print(name);
Serial.print(" SetKey ... ");
start = micros();
@@ -373,7 +386,7 @@ void perfCipherSetKey(AuthenticatedCipher *cipher, const struct TestVector *test
Serial.println(" per second");
}
-void perfCipherEncrypt(AuthenticatedCipher *cipher, const struct TestVector *test)
+void perfCipherEncrypt(AuthenticatedCipher *cipher, const struct TestVector *test, const char *name)
{
unsigned long start;
unsigned long elapsed;
@@ -382,7 +395,7 @@ void perfCipherEncrypt(AuthenticatedCipher *cipher, const struct TestVector *tes
memcpy_P(&testVector, test, sizeof(TestVector));
test = &testVector;
- Serial.print(test->name);
+ Serial.print(name);
Serial.print(" Encrypt ... ");
cipher->setKey(test->key, cipher->keySize());
@@ -399,7 +412,7 @@ void perfCipherEncrypt(AuthenticatedCipher *cipher, const struct TestVector *tes
Serial.println(" bytes per second");
}
-void perfCipherDecrypt(AuthenticatedCipher *cipher, const struct TestVector *test)
+void perfCipherDecrypt(AuthenticatedCipher *cipher, const struct TestVector *test, const char *name)
{
unsigned long start;
unsigned long elapsed;
@@ -408,7 +421,7 @@ void perfCipherDecrypt(AuthenticatedCipher *cipher, const struct TestVector *tes
memcpy_P(&testVector, test, sizeof(TestVector));
test = &testVector;
- Serial.print(test->name);
+ Serial.print(name);
Serial.print(" Decrypt ... ");
cipher->setKey(test->key, cipher->keySize());
@@ -425,7 +438,7 @@ void perfCipherDecrypt(AuthenticatedCipher *cipher, const struct TestVector *tes
Serial.println(" bytes per second");
}
-void perfCipherAddAuthData(AuthenticatedCipher *cipher, const struct TestVector *test)
+void perfCipherAddAuthData(AuthenticatedCipher *cipher, const struct TestVector *test, const char *name)
{
unsigned long start;
unsigned long elapsed;
@@ -434,7 +447,7 @@ void perfCipherAddAuthData(AuthenticatedCipher *cipher, const struct TestVector
memcpy_P(&testVector, test, sizeof(TestVector));
test = &testVector;
- Serial.print(test->name);
+ Serial.print(name);
Serial.print(" AddAuthData ... ");
cipher->setKey(test->key, cipher->keySize());
@@ -452,7 +465,7 @@ void perfCipherAddAuthData(AuthenticatedCipher *cipher, const struct TestVector
Serial.println(" bytes per second");
}
-void perfCipherComputeTag(AuthenticatedCipher *cipher, const struct TestVector *test)
+void perfCipherComputeTag(AuthenticatedCipher *cipher, const struct TestVector *test, const char *name)
{
unsigned long start;
unsigned long elapsed;
@@ -461,7 +474,7 @@ void perfCipherComputeTag(AuthenticatedCipher *cipher, const struct TestVector *
memcpy_P(&testVector, test, sizeof(TestVector));
test = &testVector;
- Serial.print(test->name);
+ Serial.print(name);
Serial.print(" ComputeTag ... ");
cipher->setKey(test->key, cipher->keySize());
@@ -478,13 +491,13 @@ void perfCipherComputeTag(AuthenticatedCipher *cipher, const struct TestVector *
Serial.println(" per second");
}
-void perfCipher(AuthenticatedCipher *cipher, const struct TestVector *test)
+void perfCipher(AuthenticatedCipher *cipher, const struct TestVector *test, const char *name)
{
- perfCipherSetKey(cipher, test);
- perfCipherEncrypt(cipher, test);
- perfCipherDecrypt(cipher, test);
- perfCipherAddAuthData(cipher, test);
- perfCipherComputeTag(cipher, test);
+ perfCipherSetKey(cipher, test, name);
+ perfCipherEncrypt(cipher, test, name);
+ perfCipherDecrypt(cipher, test, name);
+ perfCipherAddAuthData(cipher, test, name);
+ perfCipherComputeTag(cipher, test, name);
}
void setup()
@@ -493,6 +506,7 @@ void setup()
Serial.println();
+#ifndef TEST_SPECK
Serial.println("State Sizes:");
Serial.print("GCM ... ");
Serial.println(sizeof(*gcmaes128));
@@ -500,15 +514,22 @@ void setup()
Serial.println(sizeof(*gcmaes192));
Serial.print("GCM ... ");
Serial.println(sizeof(*gcmaes256));
+ Serial.print("GCM ... ");
+ Serial.println(sizeof(*gcmspeck));
+ Serial.print("GCM ... ");
+ Serial.println(sizeof(*gcmspecklm));
Serial.println();
+#endif
Serial.println("Test Vectors:");
gcmaes128 = new GCM();
testCipher(gcmaes128, &testVectorGCM1);
+#ifndef TEST_SPECK
testCipher(gcmaes128, &testVectorGCM2);
testCipher(gcmaes128, &testVectorGCM3);
testCipher(gcmaes128, &testVectorGCM4);
testCipher(gcmaes128, &testVectorGCM5);
+#endif
delete gcmaes128;
gcmaes192 = new GCM();
testCipher(gcmaes192, &testVectorGCM10);
@@ -520,15 +541,25 @@ void setup()
Serial.println();
Serial.println("Performance Tests:");
+#ifndef TEST_SPECK
gcmaes128 = new GCM();
- perfCipher(gcmaes128, &testVectorGCM1);
+ perfCipher(gcmaes128, &testVectorGCM1, testVectorGCM1.name);
delete gcmaes128;
gcmaes192 = new GCM();
- perfCipher(gcmaes192, &testVectorGCM10);
+ perfCipher(gcmaes192, &testVectorGCM10, testVectorGCM10.name);
delete gcmaes192;
gcmaes256 = new GCM();
- perfCipher(gcmaes256, &testVectorGCM16);
+ perfCipher(gcmaes256, &testVectorGCM16, testVectorGCM16.name);
delete gcmaes256;
+#endif
+#if defined(TEST_SPECK) || !defined(__AVR__)
+ gcmspeck = new GCM();
+ perfCipher(gcmspeck, &testVectorGCM16, "GCM-Speck-256");
+ delete gcmspeck;
+ gcmspecklm = new GCM();
+ perfCipher(gcmspecklm, &testVectorGCM16, "GCM-SpeckLowMemory-256");
+ delete gcmspecklm;
+#endif
}
void loop()