diff --git a/doc/crypto.dox b/doc/crypto.dox index f02c8979..bfcee113 100644 --- a/doc/crypto.dox +++ b/doc/crypto.dox @@ -86,12 +86,15 @@ Ardunino Mega 2560 running at 16 MHz are similar: AEAD AlgorithmEncryption (per byte)Decryption (per byte)Key SetupState Size (bytes) ChaChaPoly41.20us41.19us902.36us221 -GCM<AES128>183.25us182.80us1272.73us284 -GCM<AES192>189.92us189.47us1492.60us316 -GCM<AES256>196.59us196.13us1767.33us348 -EAX<AES128>71.14us71.14us1329.44us268 -EAX<Speck> (128-bit key)26.01us26.01us735.46us362 -EAX<SpeckLowMemory> (128-bit key)75.08us75.07us1243.66us122 +GCM<AES128>109.71us109.26us1265.69us284 +GCM<AES192>116.38us115.92us1485.56us316 +GCM<AES256>123.04us122.59us1760.28us348 +GCM<Speck> (256-bit key)87.78us87.32us714.41us378 +GCM<SpeckLowMemory> (256-bit key)114.30us113.84us1270.32us138 +EAX<AES128>71.14us71.14us1311.97us268 +EAX<AES256>97.80us97.80us1806.57us332 +EAX<Speck> (256-bit key)27.27us27.26us760.74us362 +EAX<SpeckLowMemory> (256-bit key)80.31us80.31us1316.60us122 Hash AlgorithmHashing (per byte)Finalization State Size (bytes) SHA25643.85us2841.04us 107 @@ -105,7 +108,7 @@ Ardunino Mega 2560 running at 16 MHz are similar: SHA256 (HMAC mode)43.85us8552.61us2836.49us107 BLAKE2s (HMAC mode)20.65us4055.56us1350.00us107 Poly130526.26us489.11us17.06us53 -GHASH148.14us17.09us21.87us33 +GHASH74.59us15.91us14.79us33 Public Key OperationTime (per operation)Comment Curve25519::eval()3119msRaw curve evaluation @@ -141,12 +144,15 @@ All figures are for the Arduino Due running at 84 MHz: AEAD AlgorithmEncryption (per byte)Decryption (per byte)Key SetupState Size (bytes) ChaChaPoly1.71us1.71us45.08us240 -GCM<AES128>10.29us10.29us223.82us312 -GCM<AES192>11.50us11.51us265.62us344 -GCM<AES256>12.67us12.67us313.06us376 -EAX<AES128>12.29us12.29us236.47us280 -EAX<Speck> (128-bit key)2.65us2.65us79.46us384 -EAX<SpeckLowMemory> (128-bit key)6.29us6.29us106.60us144 +GCM<AES128>10.90us10.90us248.83us312 +GCM<AES192>12.30us12.31us296.83us344 +GCM<AES256>13.66us13.67us350.25us376 +GCM<Speck> (256-bit key)5.27us5.28us75.31us408 +GCM<SpeckLowMemory> (256-bit key)7.06us7.07us94.20us168 +EAX<AES128>12.33us12.33us234.91us280 +EAX<AES256>16.99us16.99us322.92us344 +EAX<Speck> (256-bit key)2.80us2.80us81.63us384 +EAX<SpeckLowMemory> (256-bit key)6.69us6.69us110.91us144 Hash AlgorithmHashing (per byte)Finalization State Size (bytes) SHA2561.15us76.60us 120 @@ -160,7 +166,7 @@ All figures are for the Arduino Due running at 84 MHz: SHA256 (HMAC mode)1.15us238.98us80.44us120 BLAKE2s (HMAC mode)0.72us157.75us57.18us120 Poly13050.81us19.01us2.57us60 -GHASH4.37us1.50us4.37us36 +GHASH4.47us1.52us2.60us36 Public Key OperationTime (per operation)Comment Curve25519::eval()103msRaw curve evaluation diff --git a/libraries/Crypto/EAX.cpp b/libraries/Crypto/EAX.cpp index 42db3b37..dc14b2ae 100644 --- a/libraries/Crypto/EAX.cpp +++ b/libraries/Crypto/EAX.cpp @@ -21,6 +21,7 @@ */ #include "EAX.h" +#include "GF128.h" #include "Crypto.h" #include @@ -145,19 +146,6 @@ void EAXCommon::clear() clean(state); } -// Doubles a 128-bit value in the GF(2^128) field. -static void gfDouble(uint8_t value[16]) -{ - uint16_t temp = 0; - for (uint8_t index = 16; index > 0; ) { - --index; - temp |= (((uint16_t)(value[index])) << 1); - value[index] = (uint8_t)temp; - temp >>= 8; - } - value[15] ^= (uint8_t)((-temp) & 0x87); -} - /** * \brief Initialises the first OMAC hashing context and creates the B value. * @@ -175,7 +163,7 @@ void EAXCommon::omacInitFirst(uint8_t omac[16]) // Generate the B value from the encrypted block of zeroes. // We will need this later when finalising the OMAC hashes. memcpy(state.b, omac, 16); - gfDouble(state.b); + GF128::dblEAX(state.b); } /** @@ -230,17 +218,17 @@ void EAXCommon::omacFinal(uint8_t omac[16]) // Apply padding if necessary. if (state.authPosn != 16) { // Need padding: XOR with P = 2 * B. - uint8_t p[16]; + uint32_t p[4]; memcpy(p, state.b, 16); - gfDouble(p); + GF128::dblEAX(p); omac[state.authPosn] ^= 0x80; for (uint8_t index = 0; index < 16; ++index) - omac[index] ^= p[index]; + omac[index] ^= ((const uint8_t *)p)[index]; clean(p); } else { // No padding necessary: XOR with B. for (uint8_t index = 0; index < 16; ++index) - omac[index] ^= state.b[index]; + omac[index] ^= ((const uint8_t *)(state.b))[index]; } // Encrypt the hash to get the final OMAC value. diff --git a/libraries/Crypto/EAX.h b/libraries/Crypto/EAX.h index 1499b744..32e2a83c 100644 --- a/libraries/Crypto/EAX.h +++ b/libraries/Crypto/EAX.h @@ -59,7 +59,7 @@ private: uint8_t stream[16]; uint8_t tag[16]; uint8_t hash[16]; - uint8_t b[16]; + uint32_t b[4]; uint8_t encPosn; uint8_t authPosn; uint8_t authMode; diff --git a/libraries/Crypto/GF128.cpp b/libraries/Crypto/GF128.cpp new file mode 100644 index 00000000..3924392d --- /dev/null +++ b/libraries/Crypto/GF128.cpp @@ -0,0 +1,480 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "GF128.h" +#include "utility/EndianUtil.h" +#include + +/** + * \class GF128 GF128.h + * \brief Operations in the Galois field GF(2^128). + * + * This class contains helper functions for performing operations in + * the Galois field GF(2^128) which is used as the basis of GCM and GHASH. + * These functions are provided for use by other cryptographic protocols + * that make use of GF(2^128). + * + * Most of the functions in this class use the field, polynomial, and + * byte ordering conventions described in NIST SP 800-38D (GCM). The one + * exception is dblEAX() which uses the conventions of EAX mode instead. + * + * References: NIST SP 800-38D + * + * \sa GCM, GHASH + */ + +/** + * \brief Initialize multiplication in the GF(2^128) field. + * + * \param H The hash state to be initialized. + * \param key Points to the 16 byte authentication key which is assumed + * to be in big-endian byte order. + * + * This function and the companion mul() are intended for use by other + * classes that need access to the raw GF(2^128) field multiplication of + * GHASH without the overhead of GHASH itself. + * + * \sa mul(), dbl() + */ +void GF128::mulInit(uint32_t H[4], const void *key) +{ +#if defined(__AVR__) + // Copy the key into H but leave it in big endian order because + // we can correct for the byte order in mul() below. + memcpy(H, key, 16); +#else + // Copy the key into H and convert from big endian to host order. + memcpy(H, key, 16); +#if defined(CRYPTO_LITTLE_ENDIAN) + H[0] = be32toh(H[0]); + H[1] = be32toh(H[1]); + H[2] = be32toh(H[2]); + H[3] = be32toh(H[3]); +#endif +#endif +} + +/** + * \brief Perform a multiplication in the GF(2^128) field. + * + * \param Y The first value to multiply, and the result. This array is + * assumed to be in big-endian order on entry and exit. + * \param H The second value to multiply, which must have been initialized + * by the mulInit() function. + * + * This function and the companion mulInit() are intended for use by other + * classes that need access to the raw GF(2^128) field multiplication of + * GHASH without the overhead of GHASH itself. + * + * \sa mulInit(), dbl() + */ +void GF128::mul(uint32_t Y[4], const uint32_t H[4]) +{ +#if defined(__AVR__) + uint32_t Z[4] = {0, 0, 0, 0}; // Z = 0 + uint32_t V0 = H[0]; // V = H + uint32_t V1 = H[1]; + uint32_t V2 = H[2]; + uint32_t V3 = H[3]; + + // Multiply Z by V for the set bits in Y, starting at the top. + // This is a very simple bit by bit version that may not be very + // fast but it should be resistant to cache timing attacks. + for (uint8_t posn = 0; posn < 16; ++posn) { + uint8_t value = ((const uint8_t *)Y)[posn]; + for (uint8_t bit = 0; bit < 8; ++bit) { + __asm__ __volatile__ ( + // Extract the high bit of "value" and turn it into a mask. + "ldd r24,%8\n" + "lsl r24\n" + "std %8,r24\n" + "mov __tmp_reg__,__zero_reg__\n" + "sbc __tmp_reg__,__zero_reg__\n" + + // XOR V with Z if the bit is 1. + "mov r24,%D0\n" // Z0 ^= (V0 & mask) + "and r24,__tmp_reg__\n" + "ldd r25,%D4\n" + "eor r25,r24\n" + "std %D4,r25\n" + "mov r24,%C0\n" + "and r24,__tmp_reg__\n" + "ldd r25,%C4\n" + "eor r25,r24\n" + "std %C4,r25\n" + "mov r24,%B0\n" + "and r24,__tmp_reg__\n" + "ldd r25,%B4\n" + "eor r25,r24\n" + "std %B4,r25\n" + "mov r24,%A0\n" + "and r24,__tmp_reg__\n" + "ldd r25,%A4\n" + "eor r25,r24\n" + "std %A4,r25\n" + "mov r24,%D1\n" // Z1 ^= (V1 & mask) + "and r24,__tmp_reg__\n" + "ldd r25,%D5\n" + "eor r25,r24\n" + "std %D5,r25\n" + "mov r24,%C1\n" + "and r24,__tmp_reg__\n" + "ldd r25,%C5\n" + "eor r25,r24\n" + "std %C5,r25\n" + "mov r24,%B1\n" + "and r24,__tmp_reg__\n" + "ldd r25,%B5\n" + "eor r25,r24\n" + "std %B5,r25\n" + "mov r24,%A1\n" + "and r24,__tmp_reg__\n" + "ldd r25,%A5\n" + "eor r25,r24\n" + "std %A5,r25\n" + "mov r24,%D2\n" // Z2 ^= (V2 & mask) + "and r24,__tmp_reg__\n" + "ldd r25,%D6\n" + "eor r25,r24\n" + "std %D6,r25\n" + "mov r24,%C2\n" + "and r24,__tmp_reg__\n" + "ldd r25,%C6\n" + "eor r25,r24\n" + "std %C6,r25\n" + "mov r24,%B2\n" + "and r24,__tmp_reg__\n" + "ldd r25,%B6\n" + "eor r25,r24\n" + "std %B6,r25\n" + "mov r24,%A2\n" + "and r24,__tmp_reg__\n" + "ldd r25,%A6\n" + "eor r25,r24\n" + "std %A6,r25\n" + "mov r24,%D3\n" // Z3 ^= (V3 & mask) + "and r24,__tmp_reg__\n" + "ldd r25,%D7\n" + "eor r25,r24\n" + "std %D7,r25\n" + "mov r24,%C3\n" + "and r24,__tmp_reg__\n" + "ldd r25,%C7\n" + "eor r25,r24\n" + "std %C7,r25\n" + "mov r24,%B3\n" + "and r24,__tmp_reg__\n" + "ldd r25,%B7\n" + "eor r25,r24\n" + "std %B7,r25\n" + "mov r24,%A3\n" + "and r24,__tmp_reg__\n" + "ldd r25,%A7\n" + "eor r25,r24\n" + "std %A7,r25\n" + + // Rotate V right by 1 bit. + "lsr %A0\n" + "ror %B0\n" + "ror %C0\n" + "ror %D0\n" + "ror %A1\n" + "ror %B1\n" + "ror %C1\n" + "ror %D1\n" + "ror %A2\n" + "ror %B2\n" + "ror %C2\n" + "ror %D2\n" + "ror %A3\n" + "ror %B3\n" + "ror %C3\n" + "ror %D3\n" + "mov r24,__zero_reg__\n" + "sbc r24,__zero_reg__\n" + "andi r24,0xE1\n" + "eor %A0,r24\n" + : "+r"(V0), "+r"(V1), "+r"(V2), "+r"(V3) + : "Q"(Z[0]), "Q"(Z[1]), "Q"(Z[2]), "Q"(Z[3]), "Q"(value) + : "r24", "r25" + ); + } + } + + // We have finished the block so copy Z into Y and byte-swap. + __asm__ __volatile__ ( + "ldd __tmp_reg__,%A0\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%B0\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%C0\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%D0\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%A1\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%B1\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%C1\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%D1\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%A2\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%B2\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%C2\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%D2\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%A3\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%B3\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%C3\n" + "st X+,__tmp_reg__\n" + "ldd __tmp_reg__,%D3\n" + "st X,__tmp_reg__\n" + : : "Q"(Z[0]), "Q"(Z[1]), "Q"(Z[2]), "Q"(Z[3]), "x"(Y) + ); +#else // !__AVR__ + uint32_t Z0 = 0; // Z = 0 + uint32_t Z1 = 0; + uint32_t Z2 = 0; + uint32_t Z3 = 0; + uint32_t V0 = H[0]; // V = H + uint32_t V1 = H[1]; + uint32_t V2 = H[2]; + uint32_t V3 = H[3]; + + // Multiply Z by V for the set bits in Y, starting at the top. + // This is a very simple bit by bit version that may not be very + // fast but it should be resistant to cache timing attacks. + for (uint8_t posn = 0; posn < 16; ++posn) { + uint8_t value = ((const uint8_t *)Y)[posn]; + for (uint8_t bit = 0; bit < 8; ++bit, value <<= 1) { + // Extract the high bit of "value" and turn it into a mask. + uint32_t mask = (~((uint32_t)(value >> 7))) + 1; + + // XOR V with Z if the bit is 1. + Z0 ^= (V0 & mask); + Z1 ^= (V1 & mask); + Z2 ^= (V2 & mask); + Z3 ^= (V3 & mask); + + // Rotate V right by 1 bit. + mask = ((~(V3 & 0x01)) + 1) & 0xE1000000; + V3 = (V3 >> 1) | (V2 << 31); + V2 = (V2 >> 1) | (V1 << 31); + V1 = (V1 >> 1) | (V0 << 31); + V0 = (V0 >> 1) ^ mask; + } + } + + // We have finished the block so copy Z into Y and byte-swap. + Y[0] = htobe32(Z0); + Y[1] = htobe32(Z1); + Y[2] = htobe32(Z2); + Y[3] = htobe32(Z3); +#endif // !__AVR__ +} + +/** + * \brief Doubles a value in the GF(2^128) field. + * + * \param V The value to double, and the result. This array is + * assumed to be in big-endian order on entry and exit. + * + * Block cipher modes such as XEX + * are similar to CTR mode but instead of incrementing the nonce every + * block, the modes multiply the nonce by 2 in the GF(2^128) field every + * block. This function is provided to help with implementing such modes. + * + * \sa dblEAX(), mul() + */ +void GF128::dbl(uint32_t V[4]) +{ +#if defined(__AVR__) + __asm__ __volatile__ ( + "ld r16,Z\n" + "ldd r17,Z+1\n" + "ldd r18,Z+2\n" + "ldd r19,Z+3\n" + "lsr r16\n" + "ror r17\n" + "ror r18\n" + "ror r19\n" + "std Z+1,r17\n" + "std Z+2,r18\n" + "std Z+3,r19\n" + "ldd r17,Z+4\n" + "ldd r18,Z+5\n" + "ldd r19,Z+6\n" + "ldd r20,Z+7\n" + "ror r17\n" + "ror r18\n" + "ror r19\n" + "ror r20\n" + "std Z+4,r17\n" + "std Z+5,r18\n" + "std Z+6,r19\n" + "std Z+7,r20\n" + "ldd r17,Z+8\n" + "ldd r18,Z+9\n" + "ldd r19,Z+10\n" + "ldd r20,Z+11\n" + "ror r17\n" + "ror r18\n" + "ror r19\n" + "ror r20\n" + "std Z+8,r17\n" + "std Z+9,r18\n" + "std Z+10,r19\n" + "std Z+11,r20\n" + "ldd r17,Z+12\n" + "ldd r18,Z+13\n" + "ldd r19,Z+14\n" + "ldd r20,Z+15\n" + "ror r17\n" + "ror r18\n" + "ror r19\n" + "ror r20\n" + "std Z+12,r17\n" + "std Z+13,r18\n" + "std Z+14,r19\n" + "std Z+15,r20\n" + "mov r17,__zero_reg__\n" + "sbc r17,__zero_reg__\n" + "andi r17,0xE1\n" + "eor r16,r17\n" + "st Z,r16\n" + : : "z"(V) + : "r16", "r17", "r18", "r19", "r20" + ); +#else + uint32_t V0 = be32toh(V[0]); + uint32_t V1 = be32toh(V[1]); + uint32_t V2 = be32toh(V[2]); + uint32_t V3 = be32toh(V[3]); + uint32_t mask = ((~(V3 & 0x01)) + 1) & 0xE1000000; + V3 = (V3 >> 1) | (V2 << 31); + V2 = (V2 >> 1) | (V1 << 31); + V1 = (V1 >> 1) | (V0 << 31); + V0 = (V0 >> 1) ^ mask; + V[0] = htobe32(V0); + V[1] = htobe32(V1); + V[2] = htobe32(V2); + V[3] = htobe32(V3); +#endif +} + +/** + * \brief Doubles a value in the GF(2^128) field using EAX conventions. + * + * \param V The value to double, and the result. This array is + * assumed to be in big-endian order on entry and exit. + * + * This function differs from dbl() that it uses the conventions of EAX mode + * instead of those of NIST SP 800-38D (GCM). The two operations have + * equivalent security but the bits are ordered differently with the + * value shifted left instead of right. + * + * References: https://en.wikipedia.org/wiki/EAX_mode, + * http://web.cs.ucdavis.edu/~rogaway/papers/eax.html + * + * \sa dbl(), mul() + */ +void GF128::dblEAX(uint32_t V[4]) +{ +#if defined(__AVR__) + __asm__ __volatile__ ( + "ldd r16,Z+15\n" + "ldd r17,Z+14\n" + "ldd r18,Z+13\n" + "ldd r19,Z+12\n" + "lsl r16\n" + "rol r17\n" + "rol r18\n" + "rol r19\n" + "std Z+14,r17\n" + "std Z+13,r18\n" + "std Z+12,r19\n" + "ldd r17,Z+11\n" + "ldd r18,Z+10\n" + "ldd r19,Z+9\n" + "ldd r20,Z+8\n" + "rol r17\n" + "rol r18\n" + "rol r19\n" + "rol r20\n" + "std Z+11,r17\n" + "std Z+10,r18\n" + "std Z+9,r19\n" + "std Z+8,r20\n" + "ldd r17,Z+7\n" + "ldd r18,Z+6\n" + "ldd r19,Z+5\n" + "ldd r20,Z+4\n" + "rol r17\n" + "rol r18\n" + "rol r19\n" + "rol r20\n" + "std Z+7,r17\n" + "std Z+6,r18\n" + "std Z+5,r19\n" + "std Z+4,r20\n" + "ldd r17,Z+3\n" + "ldd r18,Z+2\n" + "ldd r19,Z+1\n" + "ld r20,Z\n" + "rol r17\n" + "rol r18\n" + "rol r19\n" + "rol r20\n" + "std Z+3,r17\n" + "std Z+2,r18\n" + "std Z+1,r19\n" + "st Z,r20\n" + "mov r17,__zero_reg__\n" + "sbc r17,__zero_reg__\n" + "andi r17,0x87\n" + "eor r16,r17\n" + "std Z+15,r16\n" + : : "z"(V) + : "r16", "r17", "r18", "r19", "r20" + ); +#else + uint32_t V0 = be32toh(V[0]); + uint32_t V1 = be32toh(V[1]); + uint32_t V2 = be32toh(V[2]); + uint32_t V3 = be32toh(V[3]); + uint32_t mask = ((~(V0 >> 31)) + 1) & 0x00000087; + V0 = (V0 << 1) | (V1 >> 31); + V1 = (V1 << 1) | (V2 >> 31); + V2 = (V2 << 1) | (V3 >> 31); + V3 = (V3 << 1) ^ mask; + V[0] = htobe32(V0); + V[1] = htobe32(V1); + V[2] = htobe32(V2); + V[3] = htobe32(V3); +#endif +} diff --git a/libraries/Crypto/GF128.h b/libraries/Crypto/GF128.h new file mode 100644 index 00000000..715daa98 --- /dev/null +++ b/libraries/Crypto/GF128.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_GF128_h +#define CRYPTO_GF128_h + +#include + +class GF128 +{ +private: + GF128() {} + ~GF128() {} + +public: + static void mulInit(uint32_t H[4], const void *key); + static void mul(uint32_t Y[4], const uint32_t H[4]); + static void dbl(uint32_t V[4]); + static void dblEAX(uint32_t V[4]); +}; + +#endif diff --git a/libraries/Crypto/GHASH.cpp b/libraries/Crypto/GHASH.cpp index f8fd6797..6853f081 100644 --- a/libraries/Crypto/GHASH.cpp +++ b/libraries/Crypto/GHASH.cpp @@ -21,8 +21,8 @@ */ #include "GHASH.h" +#include "GF128.h" #include "Crypto.h" -#include "utility/EndianUtil.h" #include /** @@ -66,16 +66,7 @@ GHASH::~GHASH() */ void GHASH::reset(const void *key) { - // Copy the key into H and convert from big endian to host order. - memcpy(state.H, key, 16); -#if defined(CRYPTO_LITTLE_ENDIAN) - state.H[0] = be32toh(state.H[0]); - state.H[1] = be32toh(state.H[1]); - state.H[2] = be32toh(state.H[2]); - state.H[3] = be32toh(state.H[3]); -#endif - - // Reset the hash. + GF128::mulInit(state.H, key); memset(state.Y, 0, sizeof(state.Y)); state.posn = 0; } @@ -106,7 +97,7 @@ void GHASH::update(const void *data, size_t len) len -= size; d += size; if (state.posn == 16) { - processChunk(); + GF128::mul(state.Y, state.H); state.posn = 0; } } @@ -148,7 +139,7 @@ void GHASH::pad() if (state.posn != 0) { // Padding involves XOR'ing the rest of state.Y with zeroes, // which does nothing. Immediately process the next chunk. - processChunk(); + GF128::mul(state.Y, state.H); state.posn = 0; } } @@ -160,45 +151,3 @@ void GHASH::clear() { clean(state); } - -void GHASH::processChunk() -{ - uint32_t Z0 = 0; // Z = 0 - uint32_t Z1 = 0; - uint32_t Z2 = 0; - uint32_t Z3 = 0; - uint32_t V0 = state.H[0]; // V = H - uint32_t V1 = state.H[1]; - uint32_t V2 = state.H[2]; - uint32_t V3 = state.H[3]; - - // Multiply Z by V for the set bits in Y, starting at the top. - // This is a very simple bit by bit version that may not be very - // fast but it should be resistant to cache timing attacks. - for (uint8_t posn = 0; posn < 16; ++posn) { - uint8_t value = ((const uint8_t *)state.Y)[posn]; - for (uint8_t bit = 0; bit < 8; ++bit, value <<= 1) { - // Extract the high bit of "value" and turn it into a mask. - uint32_t mask = (~((uint32_t)(value >> 7))) + 1; - - // XOR V with Z if the bit is 1. - Z0 ^= (V0 & mask); - Z1 ^= (V1 & mask); - Z2 ^= (V2 & mask); - Z3 ^= (V3 & mask); - - // Rotate V right by 1 bit. - mask = ((~(V3 & 0x01)) + 1) & 0xE1000000; - V3 = (V3 >> 1) | (V2 << 31); - V2 = (V2 >> 1) | (V1 << 31); - V1 = (V1 >> 1) | (V0 << 31); - V0 = (V0 >> 1) ^ mask; - } - } - - // We have finished the block so copy Z into Y and byte-swap. - state.Y[0] = htobe32(Z0); - state.Y[1] = htobe32(Z1); - state.Y[2] = htobe32(Z2); - state.Y[3] = htobe32(Z3); -} diff --git a/libraries/Crypto/GHASH.h b/libraries/Crypto/GHASH.h index 07a84bfa..20677685 100644 --- a/libraries/Crypto/GHASH.h +++ b/libraries/Crypto/GHASH.h @@ -46,8 +46,6 @@ private: uint32_t Y[4]; uint8_t posn; } state; - - void processChunk(); }; #endif diff --git a/libraries/Crypto/examples/TestEAX/TestEAX.ino b/libraries/Crypto/examples/TestEAX/TestEAX.ino index 89e6f5f4..e6ac415f 100644 --- a/libraries/Crypto/examples/TestEAX/TestEAX.ino +++ b/libraries/Crypto/examples/TestEAX/TestEAX.ino @@ -233,6 +233,7 @@ static TestVector const testVectorEAX10 PROGMEM = { TestVector testVector; EAX *eax; +EAX *eax256; EAX *eaxSpeck; EAX *eaxSpeckLowMemory; @@ -353,7 +354,7 @@ void perfCipherSetKey(AuthenticatedCipher *cipher, const struct TestVector *test start = micros(); for (count = 0; count < 1000; ++count) { - cipher->setKey(test->key, 16); + cipher->setKey(test->key, cipher->keySize()); cipher->setIV(test->iv, test->ivsize); } elapsed = micros() - start; @@ -378,7 +379,7 @@ void perfCipherEncrypt(AuthenticatedCipher *cipher, const struct TestVector *tes Serial.print(test->name); Serial.print(" Encrypt ... "); - cipher->setKey(test->key, 16); + cipher->setKey(test->key, cipher->keySize()); cipher->setIV(test->iv, test->ivsize); start = micros(); for (count = 0; count < 500; ++count) { @@ -406,7 +407,7 @@ void perfCipherDecrypt(AuthenticatedCipher *cipher, const struct TestVector *tes Serial.print(test->name); Serial.print(" Decrypt ... "); - cipher->setKey(test->key, 16); + cipher->setKey(test->key, cipher->keySize()); cipher->setIV(test->iv, test->ivsize); start = micros(); for (count = 0; count < 500; ++count) { @@ -434,7 +435,7 @@ void perfCipherAddAuthData(AuthenticatedCipher *cipher, const struct TestVector Serial.print(test->name); Serial.print(" AddAuthData ... "); - cipher->setKey(test->key, 16); + cipher->setKey(test->key, cipher->keySize()); cipher->setIV(test->iv, test->ivsize); start = micros(); memset(buffer, 0xBA, 128); @@ -463,7 +464,7 @@ void perfCipherComputeTag(AuthenticatedCipher *cipher, const struct TestVector * Serial.print(test->name); Serial.print(" ComputeTag ... "); - cipher->setKey(test->key, 16); + cipher->setKey(test->key, cipher->keySize()); cipher->setIV(test->iv, test->ivsize); start = micros(); for (count = 0; count < 1000; ++count) { @@ -495,6 +496,8 @@ void setup() Serial.println("State Sizes:"); Serial.print("EAX ... "); Serial.println(sizeof(*eax)); + Serial.print("EAX ... "); + Serial.println(sizeof(*eax256)); Serial.print("EAX ... "); Serial.println(sizeof(*eaxSpeck)); Serial.print("EAX ... "); @@ -520,6 +523,10 @@ void setup() perfCipher(eax, &testVectorEAX1, "AES-128"); Serial.println(); delete eax; + eax256 = new EAX(); + perfCipher(eax, &testVectorEAX1, "AES-256"); + Serial.println(); + delete eax256; eaxSpeck = new EAX(); perfCipher(eaxSpeck, &testVectorEAX1, "Speck"); Serial.println(); diff --git a/libraries/Crypto/examples/TestGCM/TestGCM.ino b/libraries/Crypto/examples/TestGCM/TestGCM.ino index dd015615..7084c6f8 100644 --- a/libraries/Crypto/examples/TestGCM/TestGCM.ino +++ b/libraries/Crypto/examples/TestGCM/TestGCM.ino @@ -26,10 +26,19 @@ This example runs tests on the GCM implementation to verify correct behaviour. #include #include +#include +#include #include #include #include +// There isn't enough memory to test both AES and Speck on the Uno, +// so disable Speck testing on AVR platforms unless explicitly enabled. +// When enabled, some of the AES tests are disabled to reclaim memory. +#if defined(__AVR__) +//#define TEST_SPECK 1 +#endif + #define MAX_PLAINTEXT_LEN 64 struct TestVector @@ -65,6 +74,7 @@ static TestVector const testVectorGCM1 PROGMEM = { .tagsize = 16, .ivsize = 12 }; +#ifndef TEST_SPECK static TestVector const testVectorGCM2 PROGMEM = { .name = "AES-128 GCM #2", .key = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -176,6 +186,7 @@ static TestVector const testVectorGCM5 PROGMEM = { .tagsize = 16, .ivsize = 8 }; +#endif // !TEST_SPECK static TestVector const testVectorGCM10 PROGMEM = { .name = "AES-192 GCM #10", .key = {0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c, @@ -249,6 +260,8 @@ TestVector testVector; GCM *gcmaes128 = 0; GCM *gcmaes192 = 0; GCM *gcmaes256 = 0; +GCM *gcmspeck = 0; +GCM *gcmspecklm = 0; byte buffer[128]; @@ -348,7 +361,7 @@ void testCipher(AuthenticatedCipher *cipher, const struct TestVector *test) Serial.println("Failed"); } -void perfCipherSetKey(AuthenticatedCipher *cipher, const struct TestVector *test) +void perfCipherSetKey(AuthenticatedCipher *cipher, const struct TestVector *test, const char *name) { unsigned long start; unsigned long elapsed; @@ -357,7 +370,7 @@ void perfCipherSetKey(AuthenticatedCipher *cipher, const struct TestVector *test memcpy_P(&testVector, test, sizeof(TestVector)); test = &testVector; - Serial.print(test->name); + Serial.print(name); Serial.print(" SetKey ... "); start = micros(); @@ -373,7 +386,7 @@ void perfCipherSetKey(AuthenticatedCipher *cipher, const struct TestVector *test Serial.println(" per second"); } -void perfCipherEncrypt(AuthenticatedCipher *cipher, const struct TestVector *test) +void perfCipherEncrypt(AuthenticatedCipher *cipher, const struct TestVector *test, const char *name) { unsigned long start; unsigned long elapsed; @@ -382,7 +395,7 @@ void perfCipherEncrypt(AuthenticatedCipher *cipher, const struct TestVector *tes memcpy_P(&testVector, test, sizeof(TestVector)); test = &testVector; - Serial.print(test->name); + Serial.print(name); Serial.print(" Encrypt ... "); cipher->setKey(test->key, cipher->keySize()); @@ -399,7 +412,7 @@ void perfCipherEncrypt(AuthenticatedCipher *cipher, const struct TestVector *tes Serial.println(" bytes per second"); } -void perfCipherDecrypt(AuthenticatedCipher *cipher, const struct TestVector *test) +void perfCipherDecrypt(AuthenticatedCipher *cipher, const struct TestVector *test, const char *name) { unsigned long start; unsigned long elapsed; @@ -408,7 +421,7 @@ void perfCipherDecrypt(AuthenticatedCipher *cipher, const struct TestVector *tes memcpy_P(&testVector, test, sizeof(TestVector)); test = &testVector; - Serial.print(test->name); + Serial.print(name); Serial.print(" Decrypt ... "); cipher->setKey(test->key, cipher->keySize()); @@ -425,7 +438,7 @@ void perfCipherDecrypt(AuthenticatedCipher *cipher, const struct TestVector *tes Serial.println(" bytes per second"); } -void perfCipherAddAuthData(AuthenticatedCipher *cipher, const struct TestVector *test) +void perfCipherAddAuthData(AuthenticatedCipher *cipher, const struct TestVector *test, const char *name) { unsigned long start; unsigned long elapsed; @@ -434,7 +447,7 @@ void perfCipherAddAuthData(AuthenticatedCipher *cipher, const struct TestVector memcpy_P(&testVector, test, sizeof(TestVector)); test = &testVector; - Serial.print(test->name); + Serial.print(name); Serial.print(" AddAuthData ... "); cipher->setKey(test->key, cipher->keySize()); @@ -452,7 +465,7 @@ void perfCipherAddAuthData(AuthenticatedCipher *cipher, const struct TestVector Serial.println(" bytes per second"); } -void perfCipherComputeTag(AuthenticatedCipher *cipher, const struct TestVector *test) +void perfCipherComputeTag(AuthenticatedCipher *cipher, const struct TestVector *test, const char *name) { unsigned long start; unsigned long elapsed; @@ -461,7 +474,7 @@ void perfCipherComputeTag(AuthenticatedCipher *cipher, const struct TestVector * memcpy_P(&testVector, test, sizeof(TestVector)); test = &testVector; - Serial.print(test->name); + Serial.print(name); Serial.print(" ComputeTag ... "); cipher->setKey(test->key, cipher->keySize()); @@ -478,13 +491,13 @@ void perfCipherComputeTag(AuthenticatedCipher *cipher, const struct TestVector * Serial.println(" per second"); } -void perfCipher(AuthenticatedCipher *cipher, const struct TestVector *test) +void perfCipher(AuthenticatedCipher *cipher, const struct TestVector *test, const char *name) { - perfCipherSetKey(cipher, test); - perfCipherEncrypt(cipher, test); - perfCipherDecrypt(cipher, test); - perfCipherAddAuthData(cipher, test); - perfCipherComputeTag(cipher, test); + perfCipherSetKey(cipher, test, name); + perfCipherEncrypt(cipher, test, name); + perfCipherDecrypt(cipher, test, name); + perfCipherAddAuthData(cipher, test, name); + perfCipherComputeTag(cipher, test, name); } void setup() @@ -493,6 +506,7 @@ void setup() Serial.println(); +#ifndef TEST_SPECK Serial.println("State Sizes:"); Serial.print("GCM ... "); Serial.println(sizeof(*gcmaes128)); @@ -500,15 +514,22 @@ void setup() Serial.println(sizeof(*gcmaes192)); Serial.print("GCM ... "); Serial.println(sizeof(*gcmaes256)); + Serial.print("GCM ... "); + Serial.println(sizeof(*gcmspeck)); + Serial.print("GCM ... "); + Serial.println(sizeof(*gcmspecklm)); Serial.println(); +#endif Serial.println("Test Vectors:"); gcmaes128 = new GCM(); testCipher(gcmaes128, &testVectorGCM1); +#ifndef TEST_SPECK testCipher(gcmaes128, &testVectorGCM2); testCipher(gcmaes128, &testVectorGCM3); testCipher(gcmaes128, &testVectorGCM4); testCipher(gcmaes128, &testVectorGCM5); +#endif delete gcmaes128; gcmaes192 = new GCM(); testCipher(gcmaes192, &testVectorGCM10); @@ -520,15 +541,25 @@ void setup() Serial.println(); Serial.println("Performance Tests:"); +#ifndef TEST_SPECK gcmaes128 = new GCM(); - perfCipher(gcmaes128, &testVectorGCM1); + perfCipher(gcmaes128, &testVectorGCM1, testVectorGCM1.name); delete gcmaes128; gcmaes192 = new GCM(); - perfCipher(gcmaes192, &testVectorGCM10); + perfCipher(gcmaes192, &testVectorGCM10, testVectorGCM10.name); delete gcmaes192; gcmaes256 = new GCM(); - perfCipher(gcmaes256, &testVectorGCM16); + perfCipher(gcmaes256, &testVectorGCM16, testVectorGCM16.name); delete gcmaes256; +#endif +#if defined(TEST_SPECK) || !defined(__AVR__) + gcmspeck = new GCM(); + perfCipher(gcmspeck, &testVectorGCM16, "GCM-Speck-256"); + delete gcmspeck; + gcmspecklm = new GCM(); + perfCipher(gcmspecklm, &testVectorGCM16, "GCM-SpeckLowMemory-256"); + delete gcmspecklm; +#endif } void loop()