From f52888517b609e818f7e3b4e1ce20d6561e55d68 Mon Sep 17 00:00:00 2001 From: Rhys Weatherley Date: Fri, 15 Jan 2016 17:50:36 +1000 Subject: [PATCH] Move the Keccak B state from the object to the stack --- doc/crypto.dox | 8 ++-- libraries/Crypto/KeccakCore.cpp | 69 +++++++++++++++++---------------- libraries/Crypto/KeccakCore.h | 1 - 3 files changed, 39 insertions(+), 39 deletions(-) diff --git a/doc/crypto.dox b/doc/crypto.dox index 0b7ee49c..ae7bf523 100644 --- a/doc/crypto.dox +++ b/doc/crypto.dox @@ -97,8 +97,8 @@ Ardunino Mega 2560 running at 16 MHz are similar: SHA121.90us1423.28us 95 SHA25643.85us2841.04us 107 SHA512122.82us15953.42us 211 -SHA3_25661.78us8328.70us 405 -SHA3_512115.94us8344.80us 405 +SHA3_25660.69us8180.24us 205 +SHA3_512113.88us8196.34us 205 BLAKE2s18.54us1200.06us 171 BLAKE2b50.70us6515.87us 339 @@ -154,8 +154,8 @@ All figures are for the Arduino Due running at 84 MHz: SHA10.94us62.55us 112 SHA2561.15us76.60us 120 SHA5122.87us370.37us 224 -SHA3_2565.36us697.65us 424 -SHA3_5129.89us697.81us 424 +SHA3_2565.64us735.29us 224 +SHA3_51210.42us735.49us 224 BLAKE2s0.76us50.88us 184 BLAKE2b1.33us170.93us 352 diff --git a/libraries/Crypto/KeccakCore.cpp b/libraries/Crypto/KeccakCore.cpp index 7312a9a6..1abfb604 100644 --- a/libraries/Crypto/KeccakCore.cpp +++ b/libraries/Crypto/KeccakCore.cpp @@ -273,6 +273,7 @@ void KeccakCore::setHMACKey(const void *key, size_t len, uint8_t pad, size_t has */ void KeccakCore::keccakp() { + uint64_t B[5][5]; #if defined(__AVR__) // This assembly code was generated by the "genkeccak.c" program. // Do not modify this code directly. Instead modify "genkeccak.c" @@ -1880,7 +1881,7 @@ void KeccakCore::keccakp() "pop r29\n" // Done - : : "x"(state.B), "z"(state.A) + : : "x"(B), "z"(state.A) : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "memory" ); @@ -1896,52 +1897,52 @@ void KeccakCore::keccakp() // arrays of size 5 called C and D. To save a bit of memory, // we use the first row of B to store C and compute D on the fly. for (index = 0; index < 5; ++index) { - state.B[0][index] = state.A[0][index] ^ state.A[1][index] ^ - state.A[2][index] ^ state.A[3][index] ^ - state.A[4][index]; + B[0][index] = state.A[0][index] ^ state.A[1][index] ^ + state.A[2][index] ^ state.A[3][index] ^ + state.A[4][index]; } for (index = 0; index < 5; ++index) { - D = state.B[0][addMod5(index, 4)] ^ - leftRotate1_64(state.B[0][addMod5(index, 1)]); + D = B[0][addMod5(index, 4)] ^ + leftRotate1_64(B[0][addMod5(index, 1)]); for (index2 = 0; index2 < 5; ++index2) state.A[index2][index] ^= D; } // Step mapping rho and pi combined into a single step. // Rotate all lanes by a specific offset and rearrange. - state.B[0][0] = state.A[0][0]; - state.B[1][0] = leftRotate28_64(state.A[0][3]); - state.B[2][0] = leftRotate1_64 (state.A[0][1]); - state.B[3][0] = leftRotate27_64(state.A[0][4]); - state.B[4][0] = leftRotate62_64(state.A[0][2]); - state.B[0][1] = leftRotate44_64(state.A[1][1]); - state.B[1][1] = leftRotate20_64(state.A[1][4]); - state.B[2][1] = leftRotate6_64 (state.A[1][2]); - state.B[3][1] = leftRotate36_64(state.A[1][0]); - state.B[4][1] = leftRotate55_64(state.A[1][3]); - state.B[0][2] = leftRotate43_64(state.A[2][2]); - state.B[1][2] = leftRotate3_64 (state.A[2][0]); - state.B[2][2] = leftRotate25_64(state.A[2][3]); - state.B[3][2] = leftRotate10_64(state.A[2][1]); - state.B[4][2] = leftRotate39_64(state.A[2][4]); - state.B[0][3] = leftRotate21_64(state.A[3][3]); - state.B[1][3] = leftRotate45_64(state.A[3][1]); - state.B[2][3] = leftRotate8_64 (state.A[3][4]); - state.B[3][3] = leftRotate15_64(state.A[3][2]); - state.B[4][3] = leftRotate41_64(state.A[3][0]); - state.B[0][4] = leftRotate14_64(state.A[4][4]); - state.B[1][4] = leftRotate61_64(state.A[4][2]); - state.B[2][4] = leftRotate18_64(state.A[4][0]); - state.B[3][4] = leftRotate56_64(state.A[4][3]); - state.B[4][4] = leftRotate2_64 (state.A[4][1]); + B[0][0] = state.A[0][0]; + B[1][0] = leftRotate28_64(state.A[0][3]); + B[2][0] = leftRotate1_64 (state.A[0][1]); + B[3][0] = leftRotate27_64(state.A[0][4]); + B[4][0] = leftRotate62_64(state.A[0][2]); + B[0][1] = leftRotate44_64(state.A[1][1]); + B[1][1] = leftRotate20_64(state.A[1][4]); + B[2][1] = leftRotate6_64 (state.A[1][2]); + B[3][1] = leftRotate36_64(state.A[1][0]); + B[4][1] = leftRotate55_64(state.A[1][3]); + B[0][2] = leftRotate43_64(state.A[2][2]); + B[1][2] = leftRotate3_64 (state.A[2][0]); + B[2][2] = leftRotate25_64(state.A[2][3]); + B[3][2] = leftRotate10_64(state.A[2][1]); + B[4][2] = leftRotate39_64(state.A[2][4]); + B[0][3] = leftRotate21_64(state.A[3][3]); + B[1][3] = leftRotate45_64(state.A[3][1]); + B[2][3] = leftRotate8_64 (state.A[3][4]); + B[3][3] = leftRotate15_64(state.A[3][2]); + B[4][3] = leftRotate41_64(state.A[3][0]); + B[0][4] = leftRotate14_64(state.A[4][4]); + B[1][4] = leftRotate61_64(state.A[4][2]); + B[2][4] = leftRotate18_64(state.A[4][0]); + B[3][4] = leftRotate56_64(state.A[4][3]); + B[4][4] = leftRotate2_64 (state.A[4][1]); // Step mapping chi. Combine each lane with two other lanes in its row. for (index = 0; index < 5; ++index) { for (index2 = 0; index2 < 5; ++index2) { state.A[index2][index] = - state.B[index2][index] ^ - ((~state.B[index2][addMod5(index, 1)]) & - state.B[index2][addMod5(index, 2)]); + B[index2][index] ^ + ((~B[index2][addMod5(index, 1)]) & + B[index2][addMod5(index, 2)]); } } #endif diff --git a/libraries/Crypto/KeccakCore.h b/libraries/Crypto/KeccakCore.h index d2a0464c..12a5402b 100644 --- a/libraries/Crypto/KeccakCore.h +++ b/libraries/Crypto/KeccakCore.h @@ -51,7 +51,6 @@ public: private: struct { uint64_t A[5][5]; - uint64_t B[5][5]; uint8_t inputSize; uint8_t outputSize; } state;