diff --git a/doc/crypto.dox b/doc/crypto.dox index d33c82b3..0df61d00 100644 --- a/doc/crypto.dox +++ b/doc/crypto.dox @@ -50,8 +50,8 @@ is desirable. If code size is an issue for your application (for example on very low end Arduino variants), then Speck on AVR is less than half the code size of ChaCha, at the cost of more data memory for the state and longer key -setup times. The SpeckLowMemory class is even smaller at the cost of -some performance when encrypting. +setup times. The SpeckTiny and SpeckSmall classes are even smaller at +the cost of some performance when encrypting. BLAKE2s and BLAKE2b are variations on the ChaCha stream cipher, designed for hashing, with 256-bit and 512-bit hash outputs respectively. They are @@ -80,9 +80,12 @@ Ardunino Mega 2560 running at 16 MHz are similar: Speck (128-bit key, ECB mode)10.72us11.09us287.02us275 Speck (192-bit key, ECB mode)11.03us11.42us298.21us275 Speck (256-bit key, ECB mode)11.35us11.74us309.66us275 -SpeckLowMemory (128-bit key, ECB mode)35.25us 10.22us35 -SpeckLowMemory (192-bit key, ECB mode)36.56us 13.62us35 -SpeckLowMemory (256-bit key, ECB mode)37.87us 16.89us35 +SpeckSmall (128-bit key, ECB mode)35.25us36.46us207.66us67 +SpeckSmall (192-bit key, ECB mode)36.56us37.56us220.55us67 +SpeckSmall (256-bit key, ECB mode)37.87us38.67us233.32us67 +SpeckTiny (128-bit key, ECB mode)35.25us 10.22us35 +SpeckTiny (192-bit key, ECB mode)36.56us 13.62us35 +SpeckTiny (256-bit key, ECB mode)37.87us 16.89us35 AEAD AlgorithmEncryption (per byte)Decryption (per byte)Key SetupState Size (bytes) ChaChaPoly41.20us41.19us902.36us221 @@ -90,11 +93,11 @@ Ardunino Mega 2560 running at 16 MHz are similar: GCM<AES192>116.38us115.92us1485.56us316 GCM<AES256>123.04us122.59us1760.28us348 GCM<Speck> (256-bit key)87.78us87.32us714.41us378 -GCM<SpeckLowMemory> (256-bit key)114.30us113.84us1270.32us138 +GCM<SpeckTiny> (256-bit key)114.30us113.84us1270.32us138 EAX<AES128>71.14us71.14us1311.97us268 EAX<AES256>97.80us97.80us1806.57us332 EAX<Speck> (256-bit key)27.27us27.26us760.74us362 -EAX<SpeckLowMemory> (256-bit key)80.31us80.31us1316.60us122 +EAX<SpeckTiny> (256-bit key)80.31us80.31us1316.60us122 Hash AlgorithmHashing (per byte)Finalization State Size (bytes) SHA25643.85us2841.04us 107 @@ -138,9 +141,12 @@ All figures are for the Arduino Due running at 84 MHz: Speck (128-bit key, ECB mode)0.97us0.96us36.80us288 Speck (192-bit key, ECB mode)1.00us0.98us38.14us288 Speck (256-bit key, ECB mode)1.03us1.01us39.31us288 -SpeckLowMemory (128-bit key, ECB mode)2.72us 1.47us48 -SpeckLowMemory (192-bit key, ECB mode)2.81us 1.54us48 -SpeckLowMemory (256-bit key, ECB mode)2.90us 1.83us48 +SpeckSmall (128-bit key, ECB mode)2.72us2.30us26.89us80 +SpeckSmall (192-bit key, ECB mode)2.80us2.39us27.80us80 +SpeckSmall (256-bit key, ECB mode)2.90us2.48us29.08us80 +SpeckTiny (128-bit key, ECB mode)2.72us 1.47us48 +SpeckTiny (192-bit key, ECB mode)2.81us 1.54us48 +SpeckTiny (256-bit key, ECB mode)2.90us 1.83us48 AEAD AlgorithmEncryption (per byte)Decryption (per byte)Key SetupState Size (bytes) ChaChaPoly1.71us1.71us45.08us240 @@ -148,11 +154,11 @@ All figures are for the Arduino Due running at 84 MHz: GCM<AES192>12.30us12.31us296.83us344 GCM<AES256>13.66us13.67us350.25us376 GCM<Speck> (256-bit key)5.27us5.28us75.31us408 -GCM<SpeckLowMemory> (256-bit key)7.06us7.07us94.20us168 +GCM<SpeckTiny> (256-bit key)7.06us7.07us94.20us168 EAX<AES128>12.33us12.33us234.91us280 EAX<AES256>16.99us16.99us322.92us344 EAX<Speck> (256-bit key)2.80us2.80us81.63us384 -EAX<SpeckLowMemory> (256-bit key)6.69us6.69us110.91us144 +EAX<SpeckTiny> (256-bit key)6.69us6.69us110.91us144 Hash AlgorithmHashing (per byte)Finalization State Size (bytes) SHA2561.15us76.60us 120 diff --git a/libraries/Crypto/Speck.cpp b/libraries/Crypto/Speck.cpp index 5834f70d..8c50b976 100644 --- a/libraries/Crypto/Speck.cpp +++ b/libraries/Crypto/Speck.cpp @@ -43,14 +43,14 @@ * weaknesses in the full-round version of Speck. But if you are wary of * ciphers designed by the NSA, then use ChaCha or AES instead. * - * The SpeckLowMemory class provides an alternative implementation that - * has reduced RAM and flash size requirements at the cost of some encryption - * performance. + * The SpeckTiny and SpeckSmall classes provide alternative implementations + * that have reduced RAM and flash size requirements at the cost of some + * features and performance. * * References: https://en.wikipedia.org/wiki/Speck_%28cipher%29, * http://eprint.iacr.org/2013/404 * - * \sa SpeckLowMemory + * \sa SpeckTiny, SpeckSmall */ // The "avr-gcc" compiler doesn't do a very good job of compiling @@ -390,8 +390,8 @@ void Speck::encryptBlock(uint8_t *output, const uint8_t *input) "rol %B3\n" "rol %C3\n" "rol %D3\n" - "adc %A2,__zero_reg__\n" + "lsl %A2\n" // y = leftRotate1_64(y) "rol %B2\n" "rol %C2\n" diff --git a/libraries/Crypto/SpeckSmall.cpp b/libraries/Crypto/SpeckSmall.cpp new file mode 100644 index 00000000..c7ca6ddd --- /dev/null +++ b/libraries/Crypto/SpeckSmall.cpp @@ -0,0 +1,652 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "SpeckSmall.h" +#include "Crypto.h" +#include "utility/RotateUtil.h" +#include "utility/EndianUtil.h" +#include + +/** + * \class SpeckSmall SpeckSmall.h + * \brief Speck block cipher with a 128-bit block size (small-memory version). + * + * This class differs from the Speck class in that the RAM requirements are + * vastly reduced. The key schedule is expanded round by round instead of + * being generated and stored by setKey(). The performance of encryption + * and decryption is slightly less because of this. + * + * This class is useful when RAM is at a premium and reduced encryption + * performance is not a hindrance to the application. Even though the + * performance is reduced, this class is still faster than AES with + * equivalent key sizes. + * + * The companion SpeckTiny class uses even less RAM but only supports the + * encryptBlock() operation. Block cipher modes like CTR, EAX, and GCM + * do not need the decryptBlock() operation, so SpeckTiny may be a better + * option than SpeckSmall for many applications. + * + * See the documentation for the Speck class for more information on the + * Speck family of block ciphers. + * + * References: https://en.wikipedia.org/wiki/Speck_%28cipher%29, + * http://eprint.iacr.org/2013/404 + * + * \sa Speck, SpeckTiny + */ + +// The "avr-gcc" compiler doesn't do a very good job of compiling +// code involving 64-bit values. So we have to use inline assembly. +// It also helps to break the state up into 32-bit quantities +// because "asm" supports register names like %A0, %B0, %C0, %D0 +// for the bytes in a 32-bit quantity, but it does not support +// %E0, %F0, %G0, %H0 for the high bytes of a 64-bit quantity. +#if defined(__AVR__) +#define USE_AVR_INLINE_ASM 1 +#endif + +// Pack/unpack byte-aligned big-endian 64-bit quantities. +#define pack64(data, value) \ + do { \ + uint64_t v = htobe64((value)); \ + memcpy((data), &v, sizeof(uint64_t)); \ + } while (0) +#define unpack64(value, data) \ + do { \ + memcpy(&(value), (data), sizeof(uint64_t)); \ + (value) = be64toh((value)); \ + } while (0) + +/** + * \brief Constructs a small-memory Speck block cipher with no initial key. + * + * This constructor must be followed by a call to setKey() before the + * block cipher can be used for encryption or decryption. + */ +SpeckSmall::SpeckSmall() +{ +} + +SpeckSmall::~SpeckSmall() +{ + clean(l); +} + +bool SpeckSmall::setKey(const uint8_t *key, size_t len) +{ + // Try setting the key for the forward encryption direction. + if (!SpeckTiny::setKey(key, len)) + return false; + +#if USE_AVR_INLINE_ASM + // Expand the key schedule to get the l and s values at the end + // of the schedule, which will allow us to reverse it later. + uint8_t mb = (rounds - 31) * 8; + __asm__ __volatile__ ( + "ld r16,Z+\n" // s = k[0] + "ld r17,Z+\n" + "ld r18,Z+\n" + "ld r19,Z+\n" + "ld r20,Z+\n" + "ld r21,Z+\n" + "ld r22,Z+\n" + "ld r23,Z+\n" + + "mov r24,%3\n" // memcpy(l, k + 1, mb) + "3:\n" + "ld __tmp_reg__,Z+\n" + "st X+,__tmp_reg__\n" + "dec r24\n" + "brne 3b\n" + "sub %A1,%3\n" // return X to its initial value + "sbc %B1,__zero_reg__\n" + + "1:\n" + + // l[li_out] = (s + rightRotate8_64(l[li_in])) ^ i; + "add %A1,%2\n" // X = &(l[li_in]) + "adc %B1,__zero_reg__\n" + "ld r15,X+\n" // x = rightRotate8_64(l[li_in]) + "ld r8,X+\n" + "ld r9,X+\n" + "ld r10,X+\n" + "ld r11,X+\n" + "ld r12,X+\n" + "ld r13,X+\n" + "ld r14,X+\n" + + "add r8,r16\n" // x += s + "adc r9,r17\n" + "adc r10,r18\n" + "adc r11,r19\n" + "adc r12,r20\n" + "adc r13,r21\n" + "adc r14,r22\n" + "adc r15,r23\n" + + "eor r8,%4\n" // x ^= i + + // X = X - li_in + li_out + "ldi r24,8\n" // li_in = li_in + 1 + "add %2,r24\n" + "sub %A1,%2\n" // return X to its initial value + "sbc %B1,__zero_reg__\n" + "ldi r25,0x1f\n" + "and %2,r25\n" // li_in = li_in % 4 + "add %A1,%3\n" // X = &(l[li_out]) + "adc %B1,__zero_reg__\n" + + "st X+,r8\n" // l[li_out] = x + "st X+,r9\n" + "st X+,r10\n" + "st X+,r11\n" + "st X+,r12\n" + "st X+,r13\n" + "st X+,r14\n" + "st X+,r15\n" + + "add %3,r24\n" // li_out = li_out + 1 + "sub %A1,%3\n" // return X to its initial value + "sbc %B1,__zero_reg__\n" + "and %3,r25\n" // li_out = li_out % 4 + + // s = leftRotate3_64(s) ^ l[li_out]; + "lsl r16\n" // s = leftRotate1_64(s) + "rol r17\n" + "rol r18\n" + "rol r19\n" + "rol r20\n" + "rol r21\n" + "rol r22\n" + "rol r23\n" + "adc r16,__zero_reg__\n" + + "lsl r16\n" // s = leftRotate1_64(s) + "rol r17\n" + "rol r18\n" + "rol r19\n" + "rol r20\n" + "rol r21\n" + "rol r22\n" + "rol r23\n" + "adc r16,__zero_reg__\n" + + "lsl r16\n" // s = leftRotate1_64(s) + "rol r17\n" + "rol r18\n" + "rol r19\n" + "rol r20\n" + "rol r21\n" + "rol r22\n" + "rol r23\n" + "adc r16,__zero_reg__\n" + + "eor r16,r8\n" // s ^= x + "eor r17,r9\n" + "eor r18,r10\n" + "eor r19,r11\n" + "eor r20,r12\n" + "eor r21,r13\n" + "eor r22,r14\n" + "eor r23,r15\n" + + // Loop + "inc %4\n" // ++i + "dec %5\n" // --rounds + "breq 2f\n" + "rjmp 1b\n" + "2:\n" + + "add %A1,%3\n" // X = &(l[li_out]) + "adc %B1,__zero_reg__\n" + "st X+,r16\n" // l[li_out] = s + "st X+,r17\n" + "st X+,r18\n" + "st X+,r19\n" + "st X+,r20\n" + "st X+,r21\n" + "st X+,r22\n" + "st X+,r23\n" + + : : "z"(k), "x"(l), + "r"((uint8_t)0), // initial value of li_in + "r"((uint8_t)mb), // initial value of li_out + "r"(0), // initial value of i + "r"(rounds - 1) + : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25" + ); + return true; +#else + // Expand the key schedule to get the l and s values at the end + // of the schedule, which will allow us to reverse it later. + uint8_t m = rounds - 30; + uint8_t li_in = 0; + uint8_t li_out = m - 1; + uint64_t s = k[0]; + memcpy(l, k + 1, (m - 1) * sizeof(uint64_t)); + for (uint8_t i = 0; i < (rounds - 1); ++i) { + l[li_out] = (s + rightRotate8_64(l[li_in])) ^ i; + s = leftRotate3_64(s) ^ l[li_out]; + li_in = (li_in + 1) & 0x03; + li_out = (li_out + 1) & 0x03; + } + + // Save the final s value in the l array so that we can recover it later. + l[li_out] = s; + return true; +#endif +} + +void SpeckSmall::decryptBlock(uint8_t *output, const uint8_t *input) +{ +#if USE_AVR_INLINE_ASM + uint64_t l[4]; + uint32_t xlow, xhigh, ylow, yhigh; + uint32_t slow, shigh; + uint8_t li_in = (rounds + 3) & 0x03; + uint8_t li_out = (((rounds - 31) + li_in) & 0x03) * 8; + li_in *= 8; + + // Prepare to expand the key schedule. + __asm__ __volatile__ ( + "add r30,%4\n" // Z = &(this->l[li_out]) + "adc r31,__zero_reg__\n" + "ld __tmp_reg__,Z\n" // s = this->l[li_out] + "std %A0,__tmp_reg__\n" + "ldd __tmp_reg__,Z+1\n" + "std %B0,__tmp_reg__\n" + "ldd __tmp_reg__,Z+2\n" + "std %C0,__tmp_reg__\n" + "ldd __tmp_reg__,Z+3\n" + "std %D0,__tmp_reg__\n" + "ldd __tmp_reg__,Z+4\n" + "std %A1,__tmp_reg__\n" + "ldd __tmp_reg__,Z+5\n" + "std %B1,__tmp_reg__\n" + "ldd __tmp_reg__,Z+6\n" + "std %C1,__tmp_reg__\n" + "ldd __tmp_reg__,Z+7\n" + "std %D1,__tmp_reg__\n" + "sub r30,%4\n" // Point Z back to the start of this->l. + "sbc r31,__zero_reg__\n" + + "ldi r25,32\n" // Copy the entire this->l array into l. + "1:\n" + "ld __tmp_reg__,Z+\n" + "st X+,__tmp_reg__\n" + "dec r25\n" + "brne 1b\n" + : "=Q"(slow), "=Q"(shigh) + : "z"(this->l), "x"(l), "r"(li_out) + : "r25" + ); + + // Unpack the input into the x and y variables, converting + // from big-endian into little-endian in the process. + __asm__ __volatile__ ( + "ld %D1,Z\n" + "ldd %C1,Z+1\n" + "ldd %B1,Z+2\n" + "ldd %A1,Z+3\n" + "ldd %D0,Z+4\n" + "ldd %C0,Z+5\n" + "ldd %B0,Z+6\n" + "ldd %A0,Z+7\n" + "ldd %D3,Z+8\n" + "ldd %C3,Z+9\n" + "ldd %B3,Z+10\n" + "ldd %A3,Z+11\n" + "ldd %D2,Z+12\n" + "ldd %C2,Z+13\n" + "ldd %B2,Z+14\n" + "ldd %A2,Z+15\n" + : "=r"(xlow), "=r"(xhigh), "=r"(ylow), "=r"(yhigh) + : "z"(input) + ); + + // Perform all decryption rounds while expanding the key schedule in-place. + __asm__ __volatile__ ( + "mov r23,%9\n" // i = rounds - 1 + "dec r23\n" + "1:\n" + + // Adjust x and y for this round using the key schedule word s. + + // y = rightRotate3_64(x ^ y); + "eor %A2,%A0\n" // y ^= x + "eor %B2,%B0\n" + "eor %C2,%C0\n" + "eor %D2,%D0\n" + "eor %A3,%A1\n" + "eor %B3,%B1\n" + "eor %C3,%C1\n" + "eor %D3,%D1\n" + + "bst %A2,0\n" // y = rightRotate1_64(y) + "ror %D3\n" + "ror %C3\n" + "ror %B3\n" + "ror %A3\n" + "ror %D2\n" + "ror %C2\n" + "ror %B2\n" + "ror %A2\n" + "bld %D3,7\n" + + "bst %A2,0\n" // y = rightRotate1_64(y) + "ror %D3\n" + "ror %C3\n" + "ror %B3\n" + "ror %A3\n" + "ror %D2\n" + "ror %C2\n" + "ror %B2\n" + "ror %A2\n" + "bld %D3,7\n" + + "bst %A2,0\n" // y = rightRotate1_64(y) + "ror %D3\n" + "ror %C3\n" + "ror %B3\n" + "ror %A3\n" + "ror %D2\n" + "ror %C2\n" + "ror %B2\n" + "ror %A2\n" + "bld %D3,7\n" + + // x = leftRotate8_64((x ^ s) - y); + "ldd __tmp_reg__,%A4\n" // x ^= s + "eor %A0,__tmp_reg__\n" + "ldd __tmp_reg__,%B4\n" + "eor %B0,__tmp_reg__\n" + "ldd __tmp_reg__,%C4\n" + "eor %C0,__tmp_reg__\n" + "ldd __tmp_reg__,%D4\n" + "eor %D0,__tmp_reg__\n" + "ldd __tmp_reg__,%A5\n" + "eor %A1,__tmp_reg__\n" + "ldd __tmp_reg__,%B5\n" + "eor %B1,__tmp_reg__\n" + "ldd __tmp_reg__,%C5\n" + "eor %C1,__tmp_reg__\n" + "ldd __tmp_reg__,%D5\n" + "eor %D1,__tmp_reg__\n" + + "sub %A0,%A2\n" // x -= y + "sbc %B0,%B2\n" + "sbc %C0,%C2\n" + "sbc %D0,%D2\n" + "sbc %A1,%A3\n" + "sbc %B1,%B3\n" + "sbc %C1,%C3\n" + "sbc %D1,%D3\n" + + "mov __tmp_reg__,%D1\n" // x = lefRotate8_64(x) + "mov %D1,%C1\n" + "mov %C1,%B1\n" + "mov %B1,%A1\n" + "mov %A1,%D0\n" + "mov %D0,%C0\n" + "mov %C0,%B0\n" + "mov %B0,%A0\n" + "mov %A0,__tmp_reg__\n" + + // On the last round we don't need to compute s so we + // can exit early here if i == 0. + "or r23,r23\n" // if (i == 0) + "brne 2f\n" + "rjmp 3f\n" + "2:\n" + "dec r23\n" // --i + + // Save x and y on the stack so we can reuse registers for t and s. + "push %A0\n" + "push %B0\n" + "push %C0\n" + "push %D0\n" + "push %A1\n" + "push %B1\n" + "push %C1\n" + "push %D1\n" + "push %A2\n" + "push %B2\n" + "push %C2\n" + "push %D2\n" + "push %A3\n" + "push %B3\n" + "push %C3\n" + "push %D3\n" + + // Compute the key schedule word s for the next round. + + // li_out = (li_out + 3) & 0x03; + "ldd r24,%7\n" + "ldi r25,24\n" + "add r24,r25\n" + "andi r24,0x1f\n" + "std %7,r24\n" + + // s = rightRotate3_64(s ^ l[li_out]); + "add %A8,r24\n" // Z = &(l[li_out]) + "adc %B8,__zero_reg__\n" + + "ld %A0,Z\n" // t = l[li_out] + "ldd %B0,Z+1\n" + "ldd %C0,Z+2\n" + "ldd %D0,Z+3\n" + "ldd %A1,Z+4\n" + "ldd %B1,Z+5\n" + "ldd %C1,Z+6\n" + "ldd %D1,Z+7\n" + + "ldd %A2,%A4\n" // load s + "ldd %B2,%B4\n" + "ldd %C2,%C4\n" + "ldd %D2,%D4\n" + "ldd %A3,%A5\n" + "ldd %B3,%B5\n" + "ldd %C3,%C5\n" + "ldd %D3,%D5\n" + + "eor %A2,%A0\n" // s ^= t + "eor %B2,%B0\n" + "eor %C2,%C0\n" + "eor %D2,%D0\n" + "eor %A3,%A1\n" + "eor %B3,%B1\n" + "eor %C3,%C1\n" + "eor %D3,%D1\n" + + "bst %A2,0\n" // s = rightRotate1_64(s) + "ror %D3\n" + "ror %C3\n" + "ror %B3\n" + "ror %A3\n" + "ror %D2\n" + "ror %C2\n" + "ror %B2\n" + "ror %A2\n" + "bld %D3,7\n" + + "bst %A2,0\n" // s = rightRotate1_64(s) + "ror %D3\n" + "ror %C3\n" + "ror %B3\n" + "ror %A3\n" + "ror %D2\n" + "ror %C2\n" + "ror %B2\n" + "ror %A2\n" + "bld %D3,7\n" + + "bst %A2,0\n" // s = rightRotate1_64(s) + "ror %D3\n" + "ror %C3\n" + "ror %B3\n" + "ror %A3\n" + "ror %D2\n" + "ror %C2\n" + "ror %B2\n" + "ror %A2\n" + "bld %D3,7\n" + + "sub %A8,r24\n" // Z -= li_out + "sbc %B8,__zero_reg__\n" + + // li_in = (li_in + 3) & 0x03; + "ldd r24,%6\n" + "add r24,r25\n" + "andi r24,0x1f\n" + "std %6,r24\n" + + // l[li_in] = leftRotate8_64((l[li_out] ^ i) - s); + "add %A8,r24\n" // Z = &(l[li_in]) + "adc %B8,__zero_reg__\n" + + "eor %A0,r23\n" // t ^= i + + "sub %A0,%A2\n" // t -= s + "sbc %B0,%B2\n" + "sbc %C0,%C2\n" + "sbc %D0,%D2\n" + "sbc %A1,%A3\n" + "sbc %B1,%B3\n" + "sbc %C1,%C3\n" + "sbc %D1,%D3\n" + + "st Z,%D1\n" // l[li_in] = leftRotate8_64(t) + "std Z+1,%A0\n" + "std Z+2,%B0\n" + "std Z+3,%C0\n" + "std Z+4,%D0\n" + "std Z+5,%A1\n" + "std Z+6,%B1\n" + "std Z+7,%C1\n" + + "sub %A8,r24\n" // Z -= li_in + "sbc %B8,__zero_reg__\n" + + "std %A4,%A2\n" // store s + "std %B4,%B2\n" + "std %C4,%C2\n" + "std %D4,%D2\n" + "std %A5,%A3\n" + "std %B5,%B3\n" + "std %C5,%C3\n" + "std %D5,%D3\n" + + // Pop registers from the stack to recover the x and y values. + "pop %D3\n" + "pop %C3\n" + "pop %B3\n" + "pop %A3\n" + "pop %D2\n" + "pop %C2\n" + "pop %B2\n" + "pop %A2\n" + "pop %D1\n" + "pop %C1\n" + "pop %B1\n" + "pop %A1\n" + "pop %D0\n" + "pop %C0\n" + "pop %B0\n" + "pop %A0\n" + + // Bottom of the loop. + "rjmp 1b\n" + "3:\n" + + : "+r"(xlow), "+r"(xhigh), "+r"(ylow), "+r"(yhigh), + "+Q"(slow), "+Q"(shigh), "+Q"(li_in), "+Q"(li_out) + : "z"(l), "r"(rounds) + : "r23", "r24", "r25" + ); + + // Pack the results into the output and convert back to big-endian. + __asm__ __volatile__ ( + "st Z,%D1\n" + "std Z+1,%C1\n" + "std Z+2,%B1\n" + "std Z+3,%A1\n" + "std Z+4,%D0\n" + "std Z+5,%C0\n" + "std Z+6,%B0\n" + "std Z+7,%A0\n" + "std Z+8,%D3\n" + "std Z+9,%C3\n" + "std Z+10,%B3\n" + "std Z+11,%A3\n" + "std Z+12,%D2\n" + "std Z+13,%C2\n" + "std Z+14,%B2\n" + "std Z+15,%A2\n" + : : "r"(xlow), "r"(xhigh), "r"(ylow), "r"(yhigh), "z"(output) + ); +#else + uint64_t l[4]; + uint64_t x, y, s; + uint8_t round; + uint8_t li_in = (rounds + 3) & 0x03; + uint8_t li_out = ((rounds - 31) + li_in) & 0x03; + + // Prepare the key schedule, starting at the end. + for (round = li_in; round != li_out; round = (round + 1) & 0x03) + l[round] = this->l[round]; + s = this->l[li_out]; + + // Unpack the input and convert from big-endian. + unpack64(x, input); + unpack64(y, input + 8); + + // Perform all decryption rounds except the last while + // expanding the decryption schedule on the fly. + for (uint8_t round = rounds - 1; round > 0; --round) { + // Decrypt using the current round key. + y = rightRotate3_64(x ^ y); + x = leftRotate8_64((x ^ s) - y); + + // Generate the round key for the previous round. + li_in = (li_in + 3) & 0x03; + li_out = (li_out + 3) & 0x03; + s = rightRotate3_64(s ^ l[li_out]); + l[li_in] = leftRotate8_64((l[li_out] ^ (round - 1)) - s); + } + + // Perform the final decryption round. + y = rightRotate3_64(x ^ y); + x = leftRotate8_64((x ^ s) - y); + + // Pack the output and convert to big-endian. + pack64(output, x); + pack64(output + 8, y); +#endif +} + +void SpeckSmall::clear() +{ + SpeckTiny::clear(); + clean(l); +} diff --git a/libraries/Crypto/SpeckSmall.h b/libraries/Crypto/SpeckSmall.h new file mode 100644 index 00000000..cdf019ee --- /dev/null +++ b/libraries/Crypto/SpeckSmall.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_SPECK_SMALL_H +#define CRYPTO_SPECK_SMALL_H + +#include "SpeckTiny.h" + +class SpeckSmall : public SpeckTiny +{ +public: + SpeckSmall(); + virtual ~SpeckSmall(); + + bool setKey(const uint8_t *key, size_t len); + + void decryptBlock(uint8_t *output, const uint8_t *input); + + void clear(); + +private: + uint64_t l[4]; +}; + +#endif diff --git a/libraries/Crypto/SpeckLowMemory.cpp b/libraries/Crypto/SpeckTiny.cpp similarity index 91% rename from libraries/Crypto/SpeckLowMemory.cpp rename to libraries/Crypto/SpeckTiny.cpp index a3a9d7f0..85446089 100644 --- a/libraries/Crypto/SpeckLowMemory.cpp +++ b/libraries/Crypto/SpeckTiny.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,15 +20,15 @@ * DEALINGS IN THE SOFTWARE. */ -#include "SpeckLowMemory.h" +#include "SpeckTiny.h" #include "Crypto.h" #include "utility/RotateUtil.h" #include "utility/EndianUtil.h" #include /** - * \class SpeckLowMemory SpeckLowMemory.h - * \brief Speck block cipher with a 128-bit block size (low-memory version). + * \class SpeckTiny SpeckTiny.h + * \brief Speck block cipher with a 128-bit block size (tiny-memory version). * * This class differs from the Speck class in the following ways: * @@ -38,20 +38,23 @@ * \li Performance of encryptBlock() is slower than for Speck due to * expanding the key on the fly rather than ahead of time. * \li The decryptBlock() function is not supported, which means that CBC - * mode cannot be used but the CTR, CFB, OFB, and GCM modes can be used. + * mode cannot be used but the CTR, CFB, OFB, EAX, and GCM modes can be used. * * This class is useful when RAM is at a premium, CBC mode is not required, * and reduced encryption performance is not a hindrance to the application. * Even though the performance of encryptBlock() is reduced, this class is * still faster than AES with equivalent key sizes. * + * The companion SpeckSmall class supports decryptBlock() at the cost of + * some additional memory and slower setKey() times. + * * See the documentation for the Speck class for more information on the * Speck family of block ciphers. * * References: https://en.wikipedia.org/wiki/Speck_%28cipher%29, * http://eprint.iacr.org/2013/404 * - * \sa Speck + * \sa Speck, SpeckSmall */ // The "avr-gcc" compiler doesn't do a very good job of compiling @@ -65,27 +68,27 @@ #endif /** - * \brief Constructs a low-memory Speck block cipher with no initial key. + * \brief Constructs a tiny-memory Speck block cipher with no initial key. * * This constructor must be followed by a call to setKey() before the * block cipher can be used for encryption. */ -SpeckLowMemory::SpeckLowMemory() +SpeckTiny::SpeckTiny() : rounds(32) { } -SpeckLowMemory::~SpeckLowMemory() +SpeckTiny::~SpeckTiny() { clean(k); } -size_t SpeckLowMemory::blockSize() const +size_t SpeckTiny::blockSize() const { return 16; } -size_t SpeckLowMemory::keySize() const +size_t SpeckTiny::keySize() const { // Also supports 128-bit and 192-bit, but we only report 256-bit. return 32; @@ -103,7 +106,7 @@ size_t SpeckLowMemory::keySize() const (value) = be64toh((value)); \ } while (0) -bool SpeckLowMemory::setKey(const uint8_t *key, size_t len) +bool SpeckTiny::setKey(const uint8_t *key, size_t len) { #if USE_AVR_INLINE_ASM // Determine the number of rounds to use and validate the key length. @@ -150,7 +153,7 @@ bool SpeckLowMemory::setKey(const uint8_t *key, size_t len) return true; } -void SpeckLowMemory::encryptBlock(uint8_t *output, const uint8_t *input) +void SpeckTiny::encryptBlock(uint8_t *output, const uint8_t *input) { #if USE_AVR_INLINE_ASM uint64_t l[4]; @@ -521,25 +524,12 @@ void SpeckLowMemory::encryptBlock(uint8_t *output, const uint8_t *input) #endif } -/** - * \brief Decrypts a single block using this cipher. - * - * \param output The output buffer to put the plaintext into. - * Must be at least blockSize() bytes in length. - * \param input The input buffer to read the ciphertext from which is - * allowed to overlap with \a output. Must be at least blockSize() - * bytes in length. - * - * \note This function is not supported for SpeckLowMemory, which means - * that CBC mode cannot be used but that the CTR, CFB, OFB, and GCM modes - * can be used. - */ -void SpeckLowMemory::decryptBlock(uint8_t *output, const uint8_t *input) +void SpeckTiny::decryptBlock(uint8_t *output, const uint8_t *input) { - // Decryption is not supported. + // Decryption is not supported by SpeckTiny. Use SpeckSmall instead. } -void SpeckLowMemory::clear() +void SpeckTiny::clear() { clean(k); } diff --git a/libraries/Crypto/SpeckLowMemory.h b/libraries/Crypto/SpeckTiny.h similarity index 85% rename from libraries/Crypto/SpeckLowMemory.h rename to libraries/Crypto/SpeckTiny.h index 23ffaad2..2e84a663 100644 --- a/libraries/Crypto/SpeckLowMemory.h +++ b/libraries/Crypto/SpeckTiny.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,16 +20,18 @@ * DEALINGS IN THE SOFTWARE. */ -#ifndef CRYPTO_SPECK_LOW_MEMORY_H -#define CRYPTO_SPECK_LOW_MEMORY_H +#ifndef CRYPTO_SPECK_TINY_H +#define CRYPTO_SPECK_TINY_H #include "BlockCipher.h" -class SpeckLowMemory : public BlockCipher +class SpeckSmall; + +class SpeckTiny : public BlockCipher { public: - SpeckLowMemory(); - virtual ~SpeckLowMemory(); + SpeckTiny(); + virtual ~SpeckTiny(); size_t blockSize() const; size_t keySize() const; @@ -44,6 +46,8 @@ public: private: uint64_t k[4]; uint8_t rounds; + + friend class SpeckSmall; }; #endif diff --git a/libraries/Crypto/examples/TestEAX/TestEAX.ino b/libraries/Crypto/examples/TestEAX/TestEAX.ino index e6ac415f..142fb44b 100644 --- a/libraries/Crypto/examples/TestEAX/TestEAX.ino +++ b/libraries/Crypto/examples/TestEAX/TestEAX.ino @@ -28,7 +28,7 @@ This example runs tests on the EAX implementation to verify correct behaviour. #include #include #include -#include +#include #include #include @@ -235,7 +235,7 @@ TestVector testVector; EAX *eax; EAX *eax256; EAX *eaxSpeck; -EAX *eaxSpeckLowMemory; +EAX *eaxSpeckTiny; byte buffer[128]; @@ -500,8 +500,8 @@ void setup() Serial.println(sizeof(*eax256)); Serial.print("EAX ... "); Serial.println(sizeof(*eaxSpeck)); - Serial.print("EAX ... "); - Serial.println(sizeof(*eaxSpeckLowMemory)); + Serial.print("EAX ... "); + Serial.println(sizeof(*eaxSpeckTiny)); Serial.println(); Serial.println("Test Vectors:"); @@ -531,9 +531,9 @@ void setup() perfCipher(eaxSpeck, &testVectorEAX1, "Speck"); Serial.println(); delete eaxSpeck; - eaxSpeckLowMemory = new EAX(); - perfCipher(eaxSpeckLowMemory, &testVectorEAX1, "SpeckLowMemory"); - delete eaxSpeckLowMemory; + eaxSpeckTiny = new EAX(); + perfCipher(eaxSpeckTiny, &testVectorEAX1, "SpeckTiny"); + delete eaxSpeckTiny; } void loop() diff --git a/libraries/Crypto/examples/TestGCM/TestGCM.ino b/libraries/Crypto/examples/TestGCM/TestGCM.ino index 7084c6f8..bebf8c4a 100644 --- a/libraries/Crypto/examples/TestGCM/TestGCM.ino +++ b/libraries/Crypto/examples/TestGCM/TestGCM.ino @@ -27,7 +27,7 @@ This example runs tests on the GCM implementation to verify correct behaviour. #include #include #include -#include +#include #include #include #include @@ -261,7 +261,7 @@ GCM *gcmaes128 = 0; GCM *gcmaes192 = 0; GCM *gcmaes256 = 0; GCM *gcmspeck = 0; -GCM *gcmspecklm = 0; +GCM *gcmspecklm = 0; byte buffer[128]; @@ -516,7 +516,7 @@ void setup() Serial.println(sizeof(*gcmaes256)); Serial.print("GCM ... "); Serial.println(sizeof(*gcmspeck)); - Serial.print("GCM ... "); + Serial.print("GCM ... "); Serial.println(sizeof(*gcmspecklm)); Serial.println(); #endif @@ -556,8 +556,8 @@ void setup() gcmspeck = new GCM(); perfCipher(gcmspeck, &testVectorGCM16, "GCM-Speck-256"); delete gcmspeck; - gcmspecklm = new GCM(); - perfCipher(gcmspecklm, &testVectorGCM16, "GCM-SpeckLowMemory-256"); + gcmspecklm = new GCM(); + perfCipher(gcmspecklm, &testVectorGCM16, "GCM-SpeckTiny-256"); delete gcmspecklm; #endif } diff --git a/libraries/Crypto/examples/TestSpeck/TestSpeck.ino b/libraries/Crypto/examples/TestSpeck/TestSpeck.ino index af79687d..d70d905e 100644 --- a/libraries/Crypto/examples/TestSpeck/TestSpeck.ino +++ b/libraries/Crypto/examples/TestSpeck/TestSpeck.ino @@ -26,7 +26,8 @@ This example runs tests on the Speck implementation to verify correct behaviour. #include #include -#include +#include +#include #include struct TestVector @@ -70,7 +71,8 @@ static TestVector const testVectorSpeck256 = { }; Speck speck; -SpeckLowMemory speckLowMemory; +SpeckSmall speckSmall; +SpeckTiny speckTiny; byte buffer[16]; @@ -156,8 +158,10 @@ void setup() Serial.println("State Sizes:"); Serial.print("Speck ... "); Serial.println(sizeof(Speck)); - Serial.print("SpeckLowMemory ... "); - Serial.println(sizeof(SpeckLowMemory)); + Serial.print("SpeckSmall ... "); + Serial.println(sizeof(SpeckSmall)); + Serial.print("SpeckTiny ... "); + Serial.println(sizeof(SpeckTiny)); Serial.println(); Serial.println("Speck Test Vectors:"); @@ -167,10 +171,17 @@ void setup() Serial.println(); - Serial.println("SpeckLowMemory Test Vectors:"); - testCipher(&speckLowMemory, &testVectorSpeck128, 16, false); - testCipher(&speckLowMemory, &testVectorSpeck192, 24, false); - testCipher(&speckLowMemory, &testVectorSpeck256, 32, false); + Serial.println("SpeckSmall Test Vectors:"); + testCipher(&speckSmall, &testVectorSpeck128, 16); + testCipher(&speckSmall, &testVectorSpeck192, 24); + testCipher(&speckSmall, &testVectorSpeck256, 32); + + Serial.println(); + + Serial.println("SpeckTiny Test Vectors:"); + testCipher(&speckTiny, &testVectorSpeck128, 16, false); + testCipher(&speckTiny, &testVectorSpeck192, 24, false); + testCipher(&speckTiny, &testVectorSpeck256, 32, false); Serial.println(); @@ -179,10 +190,15 @@ void setup() perfCipher(&speck, &testVectorSpeck192, 24); perfCipher(&speck, &testVectorSpeck256, 32); - Serial.println("SpeckLowMemory Performance Tests:"); - perfCipher(&speckLowMemory, &testVectorSpeck128, 16, false); - perfCipher(&speckLowMemory, &testVectorSpeck192, 24, false); - perfCipher(&speckLowMemory, &testVectorSpeck256, 32, false); + Serial.println("SpeckSmall Performance Tests:"); + perfCipher(&speckSmall, &testVectorSpeck128, 16); + perfCipher(&speckSmall, &testVectorSpeck192, 24); + perfCipher(&speckSmall, &testVectorSpeck256, 32); + + Serial.println("SpeckTiny Performance Tests:"); + perfCipher(&speckTiny, &testVectorSpeck128, 16, false); + perfCipher(&speckTiny, &testVectorSpeck192, 24, false); + perfCipher(&speckTiny, &testVectorSpeck256, 32, false); } void loop() diff --git a/libraries/Crypto/keywords.txt b/libraries/Crypto/keywords.txt index 66339da7..410d602a 100644 --- a/libraries/Crypto/keywords.txt +++ b/libraries/Crypto/keywords.txt @@ -2,7 +2,8 @@ AES128 KEYWORD1 AES192 KEYWORD1 AES256 KEYWORD1 Speck KEYWORD1 -SpeckLowMemory KEYWORD1 +SpeckTiny KEYWORD1 +SpeckSmall KEYWORD1 ChaCha KEYWORD1 ChaChaPoly KEYWORD1