mirror of
https://github.com/taigrr/arduinolibs
synced 2025-01-18 04:33:12 -08:00
Reduce the state size for BLAKE2 by moving state.v to the stack
This commit is contained in:
parent
f52888517b
commit
1ae693127c
@ -99,13 +99,13 @@ Ardunino Mega 2560 running at 16 MHz are similar:
|
|||||||
<tr><td>SHA512</td><td align="right">122.82us</td><td align="right">15953.42us</td><td align="right"> </td><td align="right">211</td></tr>
|
<tr><td>SHA512</td><td align="right">122.82us</td><td align="right">15953.42us</td><td align="right"> </td><td align="right">211</td></tr>
|
||||||
<tr><td>SHA3_256</td><td align="right">60.69us</td><td align="right">8180.24us</td><td align="right"> </td><td align="right">205</td></tr>
|
<tr><td>SHA3_256</td><td align="right">60.69us</td><td align="right">8180.24us</td><td align="right"> </td><td align="right">205</td></tr>
|
||||||
<tr><td>SHA3_512</td><td align="right">113.88us</td><td align="right">8196.34us</td><td align="right"> </td><td align="right">205</td></tr>
|
<tr><td>SHA3_512</td><td align="right">113.88us</td><td align="right">8196.34us</td><td align="right"> </td><td align="right">205</td></tr>
|
||||||
<tr><td>BLAKE2s</td><td align="right">18.54us</td><td align="right">1200.06us</td><td align="right"> </td><td align="right">171</td></tr>
|
<tr><td>BLAKE2s</td><td align="right">20.65us</td><td align="right">1335.25us</td><td align="right"> </td><td align="right">107</td></tr>
|
||||||
<tr><td>BLAKE2b</td><td align="right">50.70us</td><td align="right">6515.87us</td><td align="right"> </td><td align="right">339</td></tr>
|
<tr><td>BLAKE2b</td><td align="right">65.22us</td><td align="right">8375.36us</td><td align="right"> </td><td align="right">211</td></tr>
|
||||||
<tr><td colspan="5"> </td></tr>
|
<tr><td colspan="5"> </td></tr>
|
||||||
<tr><td>Authentication Algorithm</td><td align="right">Hashing (per byte)</td><td align="right">Finalization</td><td>Key Setup</td><td>State Size (bytes)</td></tr>
|
<tr><td>Authentication Algorithm</td><td align="right">Hashing (per byte)</td><td align="right">Finalization</td><td>Key Setup</td><td>State Size (bytes)</td></tr>
|
||||||
<tr><td>SHA1 (HMAC mode)</td><td align="right">21.90us</td><td align="right">4296.33us</td><td align="right">1420.24us</td><td align="right">95</td></tr>
|
<tr><td>SHA1 (HMAC mode)</td><td align="right">21.90us</td><td align="right">4296.33us</td><td align="right">1420.24us</td><td align="right">95</td></tr>
|
||||||
<tr><td>SHA256 (HMAC mode)</td><td align="right">43.85us</td><td align="right">8552.61us</td><td align="right">2836.49us</td><td align="right">107</td></tr>
|
<tr><td>SHA256 (HMAC mode)</td><td align="right">43.85us</td><td align="right">8552.61us</td><td align="right">2836.49us</td><td align="right">107</td></tr>
|
||||||
<tr><td>BLAKE2s (HMAC mode)</td><td align="right">18.54us</td><td align="right">3649.98us</td><td align="right">1214.81us</td><td align="right">171</td></tr>
|
<tr><td>BLAKE2s (HMAC mode)</td><td align="right">20.65us</td><td align="right">4055.56us</td><td align="right">1350.00us</td><td align="right">107</td></tr>
|
||||||
<tr><td>Poly1305</td><td align="right">26.29us</td><td align="right">486.15us</td><td align="right">17.26us</td><td align="right">87</td></tr>
|
<tr><td>Poly1305</td><td align="right">26.29us</td><td align="right">486.15us</td><td align="right">17.26us</td><td align="right">87</td></tr>
|
||||||
<tr><td>GHASH</td><td align="right">148.14us</td><td align="right">17.09us</td><td align="right">21.87us</td><td align="right">33</td></tr>
|
<tr><td>GHASH</td><td align="right">148.14us</td><td align="right">17.09us</td><td align="right">21.87us</td><td align="right">33</td></tr>
|
||||||
<tr><td colspan="5"> </td></tr>
|
<tr><td colspan="5"> </td></tr>
|
||||||
@ -156,13 +156,13 @@ All figures are for the Arduino Due running at 84 MHz:
|
|||||||
<tr><td>SHA512</td><td align="right">2.87us</td><td align="right">370.37us</td><td align="right"> </td><td align="right">224</td></tr>
|
<tr><td>SHA512</td><td align="right">2.87us</td><td align="right">370.37us</td><td align="right"> </td><td align="right">224</td></tr>
|
||||||
<tr><td>SHA3_256</td><td align="right">5.64us</td><td align="right">735.29us</td><td align="right"> </td><td align="right">224</td></tr>
|
<tr><td>SHA3_256</td><td align="right">5.64us</td><td align="right">735.29us</td><td align="right"> </td><td align="right">224</td></tr>
|
||||||
<tr><td>SHA3_512</td><td align="right">10.42us</td><td align="right">735.49us</td><td align="right"> </td><td align="right">224</td></tr>
|
<tr><td>SHA3_512</td><td align="right">10.42us</td><td align="right">735.49us</td><td align="right"> </td><td align="right">224</td></tr>
|
||||||
<tr><td>BLAKE2s</td><td align="right">0.76us</td><td align="right">50.88us</td><td align="right"> </td><td align="right">184</td></tr>
|
<tr><td>BLAKE2s</td><td align="right">0.72us</td><td align="right">48.24us</td><td align="right"> </td><td align="right">120</td></tr>
|
||||||
<tr><td>BLAKE2b</td><td align="right">1.33us</td><td align="right">170.93us</td><td align="right"> </td><td align="right">352</td></tr>
|
<tr><td>BLAKE2b</td><td align="right">1.29us</td><td align="right">165.28us</td><td align="right"> </td><td align="right">224</td></tr>
|
||||||
<tr><td colspan="5"> </td></tr>
|
<tr><td colspan="5"> </td></tr>
|
||||||
<tr><td>Authentication Algorithm</td><td align="right">Hashing (per byte)</td><td align="right">Finalization</td><td>Key Setup</td><td>State Size (bytes)</td></tr>
|
<tr><td>Authentication Algorithm</td><td align="right">Hashing (per byte)</td><td align="right">Finalization</td><td>Key Setup</td><td>State Size (bytes)</td></tr>
|
||||||
<tr><td>SHA1 (HMAC mode)</td><td align="right">0.94us</td><td align="right">193.92us</td><td align="right">65.09us</td><td align="right">112</td></tr>
|
<tr><td>SHA1 (HMAC mode)</td><td align="right">0.94us</td><td align="right">193.92us</td><td align="right">65.09us</td><td align="right">112</td></tr>
|
||||||
<tr><td>SHA256 (HMAC mode)</td><td align="right">1.15us</td><td align="right">238.98us</td><td align="right">80.44us</td><td align="right">120</td></tr>
|
<tr><td>SHA256 (HMAC mode)</td><td align="right">1.15us</td><td align="right">238.98us</td><td align="right">80.44us</td><td align="right">120</td></tr>
|
||||||
<tr><td>BLAKE2s (HMAC mode)</td><td align="right">0.76us</td><td align="right">165.64us</td><td align="right">59.92us</td><td align="right">184</td></tr>
|
<tr><td>BLAKE2s (HMAC mode)</td><td align="right">0.72us</td><td align="right">157.75us</td><td align="right">57.18us</td><td align="right">120</td></tr>
|
||||||
<tr><td>Poly1305</td><td align="right">0.85us</td><td align="right">19.25us</td><td align="right">2.35us</td><td align="right">96</td></tr>
|
<tr><td>Poly1305</td><td align="right">0.85us</td><td align="right">19.25us</td><td align="right">2.35us</td><td align="right">96</td></tr>
|
||||||
<tr><td>GHASH</td><td align="right">4.37us</td><td align="right">1.50us</td><td align="right">4.37us</td><td align="right">36</td></tr>
|
<tr><td>GHASH</td><td align="right">4.37us</td><td align="right">1.50us</td><td align="right">4.37us</td><td align="right">36</td></tr>
|
||||||
<tr><td colspan="5"> </td></tr>
|
<tr><td colspan="5"> </td></tr>
|
||||||
|
@ -216,6 +216,7 @@ static const uint8_t sigma[12][16] PROGMEM = {
|
|||||||
void BLAKE2b::processChunk(uint64_t f0)
|
void BLAKE2b::processChunk(uint64_t f0)
|
||||||
{
|
{
|
||||||
uint8_t index;
|
uint8_t index;
|
||||||
|
uint64_t v[16];
|
||||||
|
|
||||||
// Byte-swap the message buffer into little-endian if necessary.
|
// Byte-swap the message buffer into little-endian if necessary.
|
||||||
#if !defined(CRYPTO_LITTLE_ENDIAN)
|
#if !defined(CRYPTO_LITTLE_ENDIAN)
|
||||||
@ -224,32 +225,32 @@ void BLAKE2b::processChunk(uint64_t f0)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Format the block to be hashed.
|
// Format the block to be hashed.
|
||||||
memcpy(state.v, state.h, sizeof(state.h));
|
memcpy(v, state.h, sizeof(state.h));
|
||||||
state.v[8] = BLAKE2b_IV0;
|
v[8] = BLAKE2b_IV0;
|
||||||
state.v[9] = BLAKE2b_IV1;
|
v[9] = BLAKE2b_IV1;
|
||||||
state.v[10] = BLAKE2b_IV2;
|
v[10] = BLAKE2b_IV2;
|
||||||
state.v[11] = BLAKE2b_IV3;
|
v[11] = BLAKE2b_IV3;
|
||||||
state.v[12] = BLAKE2b_IV4 ^ state.lengthLow;
|
v[12] = BLAKE2b_IV4 ^ state.lengthLow;
|
||||||
state.v[13] = BLAKE2b_IV5 ^ state.lengthHigh;
|
v[13] = BLAKE2b_IV5 ^ state.lengthHigh;
|
||||||
state.v[14] = BLAKE2b_IV6 ^ f0;
|
v[14] = BLAKE2b_IV6 ^ f0;
|
||||||
state.v[15] = BLAKE2b_IV7;
|
v[15] = BLAKE2b_IV7;
|
||||||
|
|
||||||
// Perform the 12 BLAKE2b rounds.
|
// Perform the 12 BLAKE2b rounds.
|
||||||
for (index = 0; index < 12; ++index) {
|
for (index = 0; index < 12; ++index) {
|
||||||
// Column round.
|
// Column round.
|
||||||
quarterRound(state.v[0], state.v[4], state.v[8], state.v[12], 0);
|
quarterRound(v[0], v[4], v[8], v[12], 0);
|
||||||
quarterRound(state.v[1], state.v[5], state.v[9], state.v[13], 1);
|
quarterRound(v[1], v[5], v[9], v[13], 1);
|
||||||
quarterRound(state.v[2], state.v[6], state.v[10], state.v[14], 2);
|
quarterRound(v[2], v[6], v[10], v[14], 2);
|
||||||
quarterRound(state.v[3], state.v[7], state.v[11], state.v[15], 3);
|
quarterRound(v[3], v[7], v[11], v[15], 3);
|
||||||
|
|
||||||
// Diagonal round.
|
// Diagonal round.
|
||||||
quarterRound(state.v[0], state.v[5], state.v[10], state.v[15], 4);
|
quarterRound(v[0], v[5], v[10], v[15], 4);
|
||||||
quarterRound(state.v[1], state.v[6], state.v[11], state.v[12], 5);
|
quarterRound(v[1], v[6], v[11], v[12], 5);
|
||||||
quarterRound(state.v[2], state.v[7], state.v[8], state.v[13], 6);
|
quarterRound(v[2], v[7], v[8], v[13], 6);
|
||||||
quarterRound(state.v[3], state.v[4], state.v[9], state.v[14], 7);
|
quarterRound(v[3], v[4], v[9], v[14], 7);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Combine the new and old hash values.
|
// Combine the new and old hash values.
|
||||||
for (index = 0; index < 8; ++index)
|
for (index = 0; index < 8; ++index)
|
||||||
state.h[index] ^= (state.v[index] ^ state.v[index + 8]);
|
state.h[index] ^= (v[index] ^ v[index + 8]);
|
||||||
}
|
}
|
||||||
|
@ -48,7 +48,6 @@ private:
|
|||||||
struct {
|
struct {
|
||||||
uint64_t h[8];
|
uint64_t h[8];
|
||||||
uint64_t m[16];
|
uint64_t m[16];
|
||||||
uint64_t v[16];
|
|
||||||
uint64_t lengthLow;
|
uint64_t lengthLow;
|
||||||
uint64_t lengthHigh;
|
uint64_t lengthHigh;
|
||||||
uint8_t chunkSize;
|
uint8_t chunkSize;
|
||||||
|
@ -209,6 +209,7 @@ static const uint8_t sigma[10][16] PROGMEM = {
|
|||||||
void BLAKE2s::processChunk(uint32_t f0)
|
void BLAKE2s::processChunk(uint32_t f0)
|
||||||
{
|
{
|
||||||
uint8_t index;
|
uint8_t index;
|
||||||
|
uint32_t v[16];
|
||||||
|
|
||||||
// Byte-swap the message buffer into little-endian if necessary.
|
// Byte-swap the message buffer into little-endian if necessary.
|
||||||
#if !defined(CRYPTO_LITTLE_ENDIAN)
|
#if !defined(CRYPTO_LITTLE_ENDIAN)
|
||||||
@ -217,32 +218,32 @@ void BLAKE2s::processChunk(uint32_t f0)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Format the block to be hashed.
|
// Format the block to be hashed.
|
||||||
memcpy(state.v, state.h, sizeof(state.h));
|
memcpy(v, state.h, sizeof(state.h));
|
||||||
state.v[8] = BLAKE2s_IV0;
|
v[8] = BLAKE2s_IV0;
|
||||||
state.v[9] = BLAKE2s_IV1;
|
v[9] = BLAKE2s_IV1;
|
||||||
state.v[10] = BLAKE2s_IV2;
|
v[10] = BLAKE2s_IV2;
|
||||||
state.v[11] = BLAKE2s_IV3;
|
v[11] = BLAKE2s_IV3;
|
||||||
state.v[12] = BLAKE2s_IV4 ^ (uint32_t)(state.length);
|
v[12] = BLAKE2s_IV4 ^ (uint32_t)(state.length);
|
||||||
state.v[13] = BLAKE2s_IV5 ^ (uint32_t)(state.length >> 32);
|
v[13] = BLAKE2s_IV5 ^ (uint32_t)(state.length >> 32);
|
||||||
state.v[14] = BLAKE2s_IV6 ^ f0;
|
v[14] = BLAKE2s_IV6 ^ f0;
|
||||||
state.v[15] = BLAKE2s_IV7;
|
v[15] = BLAKE2s_IV7;
|
||||||
|
|
||||||
// Perform the 10 BLAKE2s rounds.
|
// Perform the 10 BLAKE2s rounds.
|
||||||
for (index = 0; index < 10; ++index) {
|
for (index = 0; index < 10; ++index) {
|
||||||
// Column round.
|
// Column round.
|
||||||
quarterRound(state.v[0], state.v[4], state.v[8], state.v[12], 0);
|
quarterRound(v[0], v[4], v[8], v[12], 0);
|
||||||
quarterRound(state.v[1], state.v[5], state.v[9], state.v[13], 1);
|
quarterRound(v[1], v[5], v[9], v[13], 1);
|
||||||
quarterRound(state.v[2], state.v[6], state.v[10], state.v[14], 2);
|
quarterRound(v[2], v[6], v[10], v[14], 2);
|
||||||
quarterRound(state.v[3], state.v[7], state.v[11], state.v[15], 3);
|
quarterRound(v[3], v[7], v[11], v[15], 3);
|
||||||
|
|
||||||
// Diagonal round.
|
// Diagonal round.
|
||||||
quarterRound(state.v[0], state.v[5], state.v[10], state.v[15], 4);
|
quarterRound(v[0], v[5], v[10], v[15], 4);
|
||||||
quarterRound(state.v[1], state.v[6], state.v[11], state.v[12], 5);
|
quarterRound(v[1], v[6], v[11], v[12], 5);
|
||||||
quarterRound(state.v[2], state.v[7], state.v[8], state.v[13], 6);
|
quarterRound(v[2], v[7], v[8], v[13], 6);
|
||||||
quarterRound(state.v[3], state.v[4], state.v[9], state.v[14], 7);
|
quarterRound(v[3], v[4], v[9], v[14], 7);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Combine the new and old hash values.
|
// Combine the new and old hash values.
|
||||||
for (index = 0; index < 8; ++index)
|
for (index = 0; index < 8; ++index)
|
||||||
state.h[index] ^= (state.v[index] ^ state.v[index + 8]);
|
state.h[index] ^= (v[index] ^ v[index + 8]);
|
||||||
}
|
}
|
||||||
|
@ -48,7 +48,6 @@ private:
|
|||||||
struct {
|
struct {
|
||||||
uint32_t h[8];
|
uint32_t h[8];
|
||||||
uint32_t m[16];
|
uint32_t m[16];
|
||||||
uint32_t v[16];
|
|
||||||
uint64_t length;
|
uint64_t length;
|
||||||
uint8_t chunkSize;
|
uint8_t chunkSize;
|
||||||
} state;
|
} state;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user