From 1ae693127c4f28192e852f1b424893998a2eab79 Mon Sep 17 00:00:00 2001 From: Rhys Weatherley Date: Sat, 16 Jan 2016 06:43:56 +1000 Subject: [PATCH] Reduce the state size for BLAKE2 by moving state.v to the stack --- doc/crypto.dox | 12 ++++++------ libraries/Crypto/BLAKE2b.cpp | 37 ++++++++++++++++++------------------ libraries/Crypto/BLAKE2b.h | 1 - libraries/Crypto/BLAKE2s.cpp | 37 ++++++++++++++++++------------------ libraries/Crypto/BLAKE2s.h | 1 - 5 files changed, 44 insertions(+), 44 deletions(-) diff --git a/doc/crypto.dox b/doc/crypto.dox index ae7bf523..d4214d14 100644 --- a/doc/crypto.dox +++ b/doc/crypto.dox @@ -99,13 +99,13 @@ Ardunino Mega 2560 running at 16 MHz are similar: SHA512122.82us15953.42us 211 SHA3_25660.69us8180.24us 205 SHA3_512113.88us8196.34us 205 -BLAKE2s18.54us1200.06us 171 -BLAKE2b50.70us6515.87us 339 +BLAKE2s20.65us1335.25us 107 +BLAKE2b65.22us8375.36us 211 Authentication AlgorithmHashing (per byte)FinalizationKey SetupState Size (bytes) SHA1 (HMAC mode)21.90us4296.33us1420.24us95 SHA256 (HMAC mode)43.85us8552.61us2836.49us107 -BLAKE2s (HMAC mode)18.54us3649.98us1214.81us171 +BLAKE2s (HMAC mode)20.65us4055.56us1350.00us107 Poly130526.29us486.15us17.26us87 GHASH148.14us17.09us21.87us33 @@ -156,13 +156,13 @@ All figures are for the Arduino Due running at 84 MHz: SHA5122.87us370.37us 224 SHA3_2565.64us735.29us 224 SHA3_51210.42us735.49us 224 -BLAKE2s0.76us50.88us 184 -BLAKE2b1.33us170.93us 352 +BLAKE2s0.72us48.24us 120 +BLAKE2b1.29us165.28us 224 Authentication AlgorithmHashing (per byte)FinalizationKey SetupState Size (bytes) SHA1 (HMAC mode)0.94us193.92us65.09us112 SHA256 (HMAC mode)1.15us238.98us80.44us120 -BLAKE2s (HMAC mode)0.76us165.64us59.92us184 +BLAKE2s (HMAC mode)0.72us157.75us57.18us120 Poly13050.85us19.25us2.35us96 GHASH4.37us1.50us4.37us36 diff --git a/libraries/Crypto/BLAKE2b.cpp b/libraries/Crypto/BLAKE2b.cpp index 3c21e3c1..1f7d21e7 100644 --- a/libraries/Crypto/BLAKE2b.cpp +++ b/libraries/Crypto/BLAKE2b.cpp @@ -216,6 +216,7 @@ static const uint8_t sigma[12][16] PROGMEM = { void BLAKE2b::processChunk(uint64_t f0) { uint8_t index; + uint64_t v[16]; // Byte-swap the message buffer into little-endian if necessary. #if !defined(CRYPTO_LITTLE_ENDIAN) @@ -224,32 +225,32 @@ void BLAKE2b::processChunk(uint64_t f0) #endif // Format the block to be hashed. - memcpy(state.v, state.h, sizeof(state.h)); - state.v[8] = BLAKE2b_IV0; - state.v[9] = BLAKE2b_IV1; - state.v[10] = BLAKE2b_IV2; - state.v[11] = BLAKE2b_IV3; - state.v[12] = BLAKE2b_IV4 ^ state.lengthLow; - state.v[13] = BLAKE2b_IV5 ^ state.lengthHigh; - state.v[14] = BLAKE2b_IV6 ^ f0; - state.v[15] = BLAKE2b_IV7; + memcpy(v, state.h, sizeof(state.h)); + v[8] = BLAKE2b_IV0; + v[9] = BLAKE2b_IV1; + v[10] = BLAKE2b_IV2; + v[11] = BLAKE2b_IV3; + v[12] = BLAKE2b_IV4 ^ state.lengthLow; + v[13] = BLAKE2b_IV5 ^ state.lengthHigh; + v[14] = BLAKE2b_IV6 ^ f0; + v[15] = BLAKE2b_IV7; // Perform the 12 BLAKE2b rounds. for (index = 0; index < 12; ++index) { // Column round. - quarterRound(state.v[0], state.v[4], state.v[8], state.v[12], 0); - quarterRound(state.v[1], state.v[5], state.v[9], state.v[13], 1); - quarterRound(state.v[2], state.v[6], state.v[10], state.v[14], 2); - quarterRound(state.v[3], state.v[7], state.v[11], state.v[15], 3); + quarterRound(v[0], v[4], v[8], v[12], 0); + quarterRound(v[1], v[5], v[9], v[13], 1); + quarterRound(v[2], v[6], v[10], v[14], 2); + quarterRound(v[3], v[7], v[11], v[15], 3); // Diagonal round. - quarterRound(state.v[0], state.v[5], state.v[10], state.v[15], 4); - quarterRound(state.v[1], state.v[6], state.v[11], state.v[12], 5); - quarterRound(state.v[2], state.v[7], state.v[8], state.v[13], 6); - quarterRound(state.v[3], state.v[4], state.v[9], state.v[14], 7); + quarterRound(v[0], v[5], v[10], v[15], 4); + quarterRound(v[1], v[6], v[11], v[12], 5); + quarterRound(v[2], v[7], v[8], v[13], 6); + quarterRound(v[3], v[4], v[9], v[14], 7); } // Combine the new and old hash values. for (index = 0; index < 8; ++index) - state.h[index] ^= (state.v[index] ^ state.v[index + 8]); + state.h[index] ^= (v[index] ^ v[index + 8]); } diff --git a/libraries/Crypto/BLAKE2b.h b/libraries/Crypto/BLAKE2b.h index db9964a8..7f876647 100644 --- a/libraries/Crypto/BLAKE2b.h +++ b/libraries/Crypto/BLAKE2b.h @@ -48,7 +48,6 @@ private: struct { uint64_t h[8]; uint64_t m[16]; - uint64_t v[16]; uint64_t lengthLow; uint64_t lengthHigh; uint8_t chunkSize; diff --git a/libraries/Crypto/BLAKE2s.cpp b/libraries/Crypto/BLAKE2s.cpp index 26bdd548..3d70304d 100644 --- a/libraries/Crypto/BLAKE2s.cpp +++ b/libraries/Crypto/BLAKE2s.cpp @@ -209,6 +209,7 @@ static const uint8_t sigma[10][16] PROGMEM = { void BLAKE2s::processChunk(uint32_t f0) { uint8_t index; + uint32_t v[16]; // Byte-swap the message buffer into little-endian if necessary. #if !defined(CRYPTO_LITTLE_ENDIAN) @@ -217,32 +218,32 @@ void BLAKE2s::processChunk(uint32_t f0) #endif // Format the block to be hashed. - memcpy(state.v, state.h, sizeof(state.h)); - state.v[8] = BLAKE2s_IV0; - state.v[9] = BLAKE2s_IV1; - state.v[10] = BLAKE2s_IV2; - state.v[11] = BLAKE2s_IV3; - state.v[12] = BLAKE2s_IV4 ^ (uint32_t)(state.length); - state.v[13] = BLAKE2s_IV5 ^ (uint32_t)(state.length >> 32); - state.v[14] = BLAKE2s_IV6 ^ f0; - state.v[15] = BLAKE2s_IV7; + memcpy(v, state.h, sizeof(state.h)); + v[8] = BLAKE2s_IV0; + v[9] = BLAKE2s_IV1; + v[10] = BLAKE2s_IV2; + v[11] = BLAKE2s_IV3; + v[12] = BLAKE2s_IV4 ^ (uint32_t)(state.length); + v[13] = BLAKE2s_IV5 ^ (uint32_t)(state.length >> 32); + v[14] = BLAKE2s_IV6 ^ f0; + v[15] = BLAKE2s_IV7; // Perform the 10 BLAKE2s rounds. for (index = 0; index < 10; ++index) { // Column round. - quarterRound(state.v[0], state.v[4], state.v[8], state.v[12], 0); - quarterRound(state.v[1], state.v[5], state.v[9], state.v[13], 1); - quarterRound(state.v[2], state.v[6], state.v[10], state.v[14], 2); - quarterRound(state.v[3], state.v[7], state.v[11], state.v[15], 3); + quarterRound(v[0], v[4], v[8], v[12], 0); + quarterRound(v[1], v[5], v[9], v[13], 1); + quarterRound(v[2], v[6], v[10], v[14], 2); + quarterRound(v[3], v[7], v[11], v[15], 3); // Diagonal round. - quarterRound(state.v[0], state.v[5], state.v[10], state.v[15], 4); - quarterRound(state.v[1], state.v[6], state.v[11], state.v[12], 5); - quarterRound(state.v[2], state.v[7], state.v[8], state.v[13], 6); - quarterRound(state.v[3], state.v[4], state.v[9], state.v[14], 7); + quarterRound(v[0], v[5], v[10], v[15], 4); + quarterRound(v[1], v[6], v[11], v[12], 5); + quarterRound(v[2], v[7], v[8], v[13], 6); + quarterRound(v[3], v[4], v[9], v[14], 7); } // Combine the new and old hash values. for (index = 0; index < 8; ++index) - state.h[index] ^= (state.v[index] ^ state.v[index + 8]); + state.h[index] ^= (v[index] ^ v[index + 8]); } diff --git a/libraries/Crypto/BLAKE2s.h b/libraries/Crypto/BLAKE2s.h index baece681..1131e282 100644 --- a/libraries/Crypto/BLAKE2s.h +++ b/libraries/Crypto/BLAKE2s.h @@ -48,7 +48,6 @@ private: struct { uint32_t h[8]; uint32_t m[16]; - uint32_t v[16]; uint64_t length; uint8_t chunkSize; } state;