1
0
mirror of https://github.com/taigrr/arduinolibs synced 2025-01-18 04:33:12 -08:00

Reduce the state size for BLAKE2 by moving state.v to the stack

This commit is contained in:
Rhys Weatherley 2016-01-16 06:43:56 +10:00
parent f52888517b
commit 1ae693127c
5 changed files with 44 additions and 44 deletions

View File

@ -99,13 +99,13 @@ Ardunino Mega 2560 running at 16 MHz are similar:
<tr><td>SHA512</td><td align="right">122.82us</td><td align="right">15953.42us</td><td align="right"> </td><td align="right">211</td></tr> <tr><td>SHA512</td><td align="right">122.82us</td><td align="right">15953.42us</td><td align="right"> </td><td align="right">211</td></tr>
<tr><td>SHA3_256</td><td align="right">60.69us</td><td align="right">8180.24us</td><td align="right"> </td><td align="right">205</td></tr> <tr><td>SHA3_256</td><td align="right">60.69us</td><td align="right">8180.24us</td><td align="right"> </td><td align="right">205</td></tr>
<tr><td>SHA3_512</td><td align="right">113.88us</td><td align="right">8196.34us</td><td align="right"> </td><td align="right">205</td></tr> <tr><td>SHA3_512</td><td align="right">113.88us</td><td align="right">8196.34us</td><td align="right"> </td><td align="right">205</td></tr>
<tr><td>BLAKE2s</td><td align="right">18.54us</td><td align="right">1200.06us</td><td align="right"> </td><td align="right">171</td></tr> <tr><td>BLAKE2s</td><td align="right">20.65us</td><td align="right">1335.25us</td><td align="right"> </td><td align="right">107</td></tr>
<tr><td>BLAKE2b</td><td align="right">50.70us</td><td align="right">6515.87us</td><td align="right"> </td><td align="right">339</td></tr> <tr><td>BLAKE2b</td><td align="right">65.22us</td><td align="right">8375.36us</td><td align="right"> </td><td align="right">211</td></tr>
<tr><td colspan="5"> </td></tr> <tr><td colspan="5"> </td></tr>
<tr><td>Authentication Algorithm</td><td align="right">Hashing (per byte)</td><td align="right">Finalization</td><td>Key Setup</td><td>State Size (bytes)</td></tr> <tr><td>Authentication Algorithm</td><td align="right">Hashing (per byte)</td><td align="right">Finalization</td><td>Key Setup</td><td>State Size (bytes)</td></tr>
<tr><td>SHA1 (HMAC mode)</td><td align="right">21.90us</td><td align="right">4296.33us</td><td align="right">1420.24us</td><td align="right">95</td></tr> <tr><td>SHA1 (HMAC mode)</td><td align="right">21.90us</td><td align="right">4296.33us</td><td align="right">1420.24us</td><td align="right">95</td></tr>
<tr><td>SHA256 (HMAC mode)</td><td align="right">43.85us</td><td align="right">8552.61us</td><td align="right">2836.49us</td><td align="right">107</td></tr> <tr><td>SHA256 (HMAC mode)</td><td align="right">43.85us</td><td align="right">8552.61us</td><td align="right">2836.49us</td><td align="right">107</td></tr>
<tr><td>BLAKE2s (HMAC mode)</td><td align="right">18.54us</td><td align="right">3649.98us</td><td align="right">1214.81us</td><td align="right">171</td></tr> <tr><td>BLAKE2s (HMAC mode)</td><td align="right">20.65us</td><td align="right">4055.56us</td><td align="right">1350.00us</td><td align="right">107</td></tr>
<tr><td>Poly1305</td><td align="right">26.29us</td><td align="right">486.15us</td><td align="right">17.26us</td><td align="right">87</td></tr> <tr><td>Poly1305</td><td align="right">26.29us</td><td align="right">486.15us</td><td align="right">17.26us</td><td align="right">87</td></tr>
<tr><td>GHASH</td><td align="right">148.14us</td><td align="right">17.09us</td><td align="right">21.87us</td><td align="right">33</td></tr> <tr><td>GHASH</td><td align="right">148.14us</td><td align="right">17.09us</td><td align="right">21.87us</td><td align="right">33</td></tr>
<tr><td colspan="5"> </td></tr> <tr><td colspan="5"> </td></tr>
@ -156,13 +156,13 @@ All figures are for the Arduino Due running at 84 MHz:
<tr><td>SHA512</td><td align="right">2.87us</td><td align="right">370.37us</td><td align="right"> </td><td align="right">224</td></tr> <tr><td>SHA512</td><td align="right">2.87us</td><td align="right">370.37us</td><td align="right"> </td><td align="right">224</td></tr>
<tr><td>SHA3_256</td><td align="right">5.64us</td><td align="right">735.29us</td><td align="right"> </td><td align="right">224</td></tr> <tr><td>SHA3_256</td><td align="right">5.64us</td><td align="right">735.29us</td><td align="right"> </td><td align="right">224</td></tr>
<tr><td>SHA3_512</td><td align="right">10.42us</td><td align="right">735.49us</td><td align="right"> </td><td align="right">224</td></tr> <tr><td>SHA3_512</td><td align="right">10.42us</td><td align="right">735.49us</td><td align="right"> </td><td align="right">224</td></tr>
<tr><td>BLAKE2s</td><td align="right">0.76us</td><td align="right">50.88us</td><td align="right"> </td><td align="right">184</td></tr> <tr><td>BLAKE2s</td><td align="right">0.72us</td><td align="right">48.24us</td><td align="right"> </td><td align="right">120</td></tr>
<tr><td>BLAKE2b</td><td align="right">1.33us</td><td align="right">170.93us</td><td align="right"> </td><td align="right">352</td></tr> <tr><td>BLAKE2b</td><td align="right">1.29us</td><td align="right">165.28us</td><td align="right"> </td><td align="right">224</td></tr>
<tr><td colspan="5"> </td></tr> <tr><td colspan="5"> </td></tr>
<tr><td>Authentication Algorithm</td><td align="right">Hashing (per byte)</td><td align="right">Finalization</td><td>Key Setup</td><td>State Size (bytes)</td></tr> <tr><td>Authentication Algorithm</td><td align="right">Hashing (per byte)</td><td align="right">Finalization</td><td>Key Setup</td><td>State Size (bytes)</td></tr>
<tr><td>SHA1 (HMAC mode)</td><td align="right">0.94us</td><td align="right">193.92us</td><td align="right">65.09us</td><td align="right">112</td></tr> <tr><td>SHA1 (HMAC mode)</td><td align="right">0.94us</td><td align="right">193.92us</td><td align="right">65.09us</td><td align="right">112</td></tr>
<tr><td>SHA256 (HMAC mode)</td><td align="right">1.15us</td><td align="right">238.98us</td><td align="right">80.44us</td><td align="right">120</td></tr> <tr><td>SHA256 (HMAC mode)</td><td align="right">1.15us</td><td align="right">238.98us</td><td align="right">80.44us</td><td align="right">120</td></tr>
<tr><td>BLAKE2s (HMAC mode)</td><td align="right">0.76us</td><td align="right">165.64us</td><td align="right">59.92us</td><td align="right">184</td></tr> <tr><td>BLAKE2s (HMAC mode)</td><td align="right">0.72us</td><td align="right">157.75us</td><td align="right">57.18us</td><td align="right">120</td></tr>
<tr><td>Poly1305</td><td align="right">0.85us</td><td align="right">19.25us</td><td align="right">2.35us</td><td align="right">96</td></tr> <tr><td>Poly1305</td><td align="right">0.85us</td><td align="right">19.25us</td><td align="right">2.35us</td><td align="right">96</td></tr>
<tr><td>GHASH</td><td align="right">4.37us</td><td align="right">1.50us</td><td align="right">4.37us</td><td align="right">36</td></tr> <tr><td>GHASH</td><td align="right">4.37us</td><td align="right">1.50us</td><td align="right">4.37us</td><td align="right">36</td></tr>
<tr><td colspan="5"> </td></tr> <tr><td colspan="5"> </td></tr>

View File

@ -216,6 +216,7 @@ static const uint8_t sigma[12][16] PROGMEM = {
void BLAKE2b::processChunk(uint64_t f0) void BLAKE2b::processChunk(uint64_t f0)
{ {
uint8_t index; uint8_t index;
uint64_t v[16];
// Byte-swap the message buffer into little-endian if necessary. // Byte-swap the message buffer into little-endian if necessary.
#if !defined(CRYPTO_LITTLE_ENDIAN) #if !defined(CRYPTO_LITTLE_ENDIAN)
@ -224,32 +225,32 @@ void BLAKE2b::processChunk(uint64_t f0)
#endif #endif
// Format the block to be hashed. // Format the block to be hashed.
memcpy(state.v, state.h, sizeof(state.h)); memcpy(v, state.h, sizeof(state.h));
state.v[8] = BLAKE2b_IV0; v[8] = BLAKE2b_IV0;
state.v[9] = BLAKE2b_IV1; v[9] = BLAKE2b_IV1;
state.v[10] = BLAKE2b_IV2; v[10] = BLAKE2b_IV2;
state.v[11] = BLAKE2b_IV3; v[11] = BLAKE2b_IV3;
state.v[12] = BLAKE2b_IV4 ^ state.lengthLow; v[12] = BLAKE2b_IV4 ^ state.lengthLow;
state.v[13] = BLAKE2b_IV5 ^ state.lengthHigh; v[13] = BLAKE2b_IV5 ^ state.lengthHigh;
state.v[14] = BLAKE2b_IV6 ^ f0; v[14] = BLAKE2b_IV6 ^ f0;
state.v[15] = BLAKE2b_IV7; v[15] = BLAKE2b_IV7;
// Perform the 12 BLAKE2b rounds. // Perform the 12 BLAKE2b rounds.
for (index = 0; index < 12; ++index) { for (index = 0; index < 12; ++index) {
// Column round. // Column round.
quarterRound(state.v[0], state.v[4], state.v[8], state.v[12], 0); quarterRound(v[0], v[4], v[8], v[12], 0);
quarterRound(state.v[1], state.v[5], state.v[9], state.v[13], 1); quarterRound(v[1], v[5], v[9], v[13], 1);
quarterRound(state.v[2], state.v[6], state.v[10], state.v[14], 2); quarterRound(v[2], v[6], v[10], v[14], 2);
quarterRound(state.v[3], state.v[7], state.v[11], state.v[15], 3); quarterRound(v[3], v[7], v[11], v[15], 3);
// Diagonal round. // Diagonal round.
quarterRound(state.v[0], state.v[5], state.v[10], state.v[15], 4); quarterRound(v[0], v[5], v[10], v[15], 4);
quarterRound(state.v[1], state.v[6], state.v[11], state.v[12], 5); quarterRound(v[1], v[6], v[11], v[12], 5);
quarterRound(state.v[2], state.v[7], state.v[8], state.v[13], 6); quarterRound(v[2], v[7], v[8], v[13], 6);
quarterRound(state.v[3], state.v[4], state.v[9], state.v[14], 7); quarterRound(v[3], v[4], v[9], v[14], 7);
} }
// Combine the new and old hash values. // Combine the new and old hash values.
for (index = 0; index < 8; ++index) for (index = 0; index < 8; ++index)
state.h[index] ^= (state.v[index] ^ state.v[index + 8]); state.h[index] ^= (v[index] ^ v[index + 8]);
} }

View File

@ -48,7 +48,6 @@ private:
struct { struct {
uint64_t h[8]; uint64_t h[8];
uint64_t m[16]; uint64_t m[16];
uint64_t v[16];
uint64_t lengthLow; uint64_t lengthLow;
uint64_t lengthHigh; uint64_t lengthHigh;
uint8_t chunkSize; uint8_t chunkSize;

View File

@ -209,6 +209,7 @@ static const uint8_t sigma[10][16] PROGMEM = {
void BLAKE2s::processChunk(uint32_t f0) void BLAKE2s::processChunk(uint32_t f0)
{ {
uint8_t index; uint8_t index;
uint32_t v[16];
// Byte-swap the message buffer into little-endian if necessary. // Byte-swap the message buffer into little-endian if necessary.
#if !defined(CRYPTO_LITTLE_ENDIAN) #if !defined(CRYPTO_LITTLE_ENDIAN)
@ -217,32 +218,32 @@ void BLAKE2s::processChunk(uint32_t f0)
#endif #endif
// Format the block to be hashed. // Format the block to be hashed.
memcpy(state.v, state.h, sizeof(state.h)); memcpy(v, state.h, sizeof(state.h));
state.v[8] = BLAKE2s_IV0; v[8] = BLAKE2s_IV0;
state.v[9] = BLAKE2s_IV1; v[9] = BLAKE2s_IV1;
state.v[10] = BLAKE2s_IV2; v[10] = BLAKE2s_IV2;
state.v[11] = BLAKE2s_IV3; v[11] = BLAKE2s_IV3;
state.v[12] = BLAKE2s_IV4 ^ (uint32_t)(state.length); v[12] = BLAKE2s_IV4 ^ (uint32_t)(state.length);
state.v[13] = BLAKE2s_IV5 ^ (uint32_t)(state.length >> 32); v[13] = BLAKE2s_IV5 ^ (uint32_t)(state.length >> 32);
state.v[14] = BLAKE2s_IV6 ^ f0; v[14] = BLAKE2s_IV6 ^ f0;
state.v[15] = BLAKE2s_IV7; v[15] = BLAKE2s_IV7;
// Perform the 10 BLAKE2s rounds. // Perform the 10 BLAKE2s rounds.
for (index = 0; index < 10; ++index) { for (index = 0; index < 10; ++index) {
// Column round. // Column round.
quarterRound(state.v[0], state.v[4], state.v[8], state.v[12], 0); quarterRound(v[0], v[4], v[8], v[12], 0);
quarterRound(state.v[1], state.v[5], state.v[9], state.v[13], 1); quarterRound(v[1], v[5], v[9], v[13], 1);
quarterRound(state.v[2], state.v[6], state.v[10], state.v[14], 2); quarterRound(v[2], v[6], v[10], v[14], 2);
quarterRound(state.v[3], state.v[7], state.v[11], state.v[15], 3); quarterRound(v[3], v[7], v[11], v[15], 3);
// Diagonal round. // Diagonal round.
quarterRound(state.v[0], state.v[5], state.v[10], state.v[15], 4); quarterRound(v[0], v[5], v[10], v[15], 4);
quarterRound(state.v[1], state.v[6], state.v[11], state.v[12], 5); quarterRound(v[1], v[6], v[11], v[12], 5);
quarterRound(state.v[2], state.v[7], state.v[8], state.v[13], 6); quarterRound(v[2], v[7], v[8], v[13], 6);
quarterRound(state.v[3], state.v[4], state.v[9], state.v[14], 7); quarterRound(v[3], v[4], v[9], v[14], 7);
} }
// Combine the new and old hash values. // Combine the new and old hash values.
for (index = 0; index < 8; ++index) for (index = 0; index < 8; ++index)
state.h[index] ^= (state.v[index] ^ state.v[index + 8]); state.h[index] ^= (v[index] ^ v[index + 8]);
} }

View File

@ -48,7 +48,6 @@ private:
struct { struct {
uint32_t h[8]; uint32_t h[8];
uint32_t m[16]; uint32_t m[16];
uint32_t v[16];
uint64_t length; uint64_t length;
uint8_t chunkSize; uint8_t chunkSize;
} state; } state;