1
0
mirror of https://github.com/taigrr/arduinolibs synced 2025-01-18 04:33:12 -08:00

Move the Keccak B state from the object to the stack

This commit is contained in:
Rhys Weatherley 2016-01-15 17:50:36 +10:00
parent 4079e6c2b7
commit f52888517b
3 changed files with 39 additions and 39 deletions

View File

@ -97,8 +97,8 @@ Ardunino Mega 2560 running at 16 MHz are similar:
<tr><td>SHA1</td><td align="right">21.90us</td><td align="right">1423.28us</td><td align="right"> </td><td align="right">95</td></tr> <tr><td>SHA1</td><td align="right">21.90us</td><td align="right">1423.28us</td><td align="right"> </td><td align="right">95</td></tr>
<tr><td>SHA256</td><td align="right">43.85us</td><td align="right">2841.04us</td><td align="right"> </td><td align="right">107</td></tr> <tr><td>SHA256</td><td align="right">43.85us</td><td align="right">2841.04us</td><td align="right"> </td><td align="right">107</td></tr>
<tr><td>SHA512</td><td align="right">122.82us</td><td align="right">15953.42us</td><td align="right"> </td><td align="right">211</td></tr> <tr><td>SHA512</td><td align="right">122.82us</td><td align="right">15953.42us</td><td align="right"> </td><td align="right">211</td></tr>
<tr><td>SHA3_256</td><td align="right">61.78us</td><td align="right">8328.70us</td><td align="right"> </td><td align="right">405</td></tr> <tr><td>SHA3_256</td><td align="right">60.69us</td><td align="right">8180.24us</td><td align="right"> </td><td align="right">205</td></tr>
<tr><td>SHA3_512</td><td align="right">115.94us</td><td align="right">8344.80us</td><td align="right"> </td><td align="right">405</td></tr> <tr><td>SHA3_512</td><td align="right">113.88us</td><td align="right">8196.34us</td><td align="right"> </td><td align="right">205</td></tr>
<tr><td>BLAKE2s</td><td align="right">18.54us</td><td align="right">1200.06us</td><td align="right"> </td><td align="right">171</td></tr> <tr><td>BLAKE2s</td><td align="right">18.54us</td><td align="right">1200.06us</td><td align="right"> </td><td align="right">171</td></tr>
<tr><td>BLAKE2b</td><td align="right">50.70us</td><td align="right">6515.87us</td><td align="right"> </td><td align="right">339</td></tr> <tr><td>BLAKE2b</td><td align="right">50.70us</td><td align="right">6515.87us</td><td align="right"> </td><td align="right">339</td></tr>
<tr><td colspan="5"> </td></tr> <tr><td colspan="5"> </td></tr>
@ -154,8 +154,8 @@ All figures are for the Arduino Due running at 84 MHz:
<tr><td>SHA1</td><td align="right">0.94us</td><td align="right">62.55us</td><td align="right"> </td><td align="right">112</td></tr> <tr><td>SHA1</td><td align="right">0.94us</td><td align="right">62.55us</td><td align="right"> </td><td align="right">112</td></tr>
<tr><td>SHA256</td><td align="right">1.15us</td><td align="right">76.60us</td><td align="right"> </td><td align="right">120</td></tr> <tr><td>SHA256</td><td align="right">1.15us</td><td align="right">76.60us</td><td align="right"> </td><td align="right">120</td></tr>
<tr><td>SHA512</td><td align="right">2.87us</td><td align="right">370.37us</td><td align="right"> </td><td align="right">224</td></tr> <tr><td>SHA512</td><td align="right">2.87us</td><td align="right">370.37us</td><td align="right"> </td><td align="right">224</td></tr>
<tr><td>SHA3_256</td><td align="right">5.36us</td><td align="right">697.65us</td><td align="right"> </td><td align="right">424</td></tr> <tr><td>SHA3_256</td><td align="right">5.64us</td><td align="right">735.29us</td><td align="right"> </td><td align="right">224</td></tr>
<tr><td>SHA3_512</td><td align="right">9.89us</td><td align="right">697.81us</td><td align="right"> </td><td align="right">424</td></tr> <tr><td>SHA3_512</td><td align="right">10.42us</td><td align="right">735.49us</td><td align="right"> </td><td align="right">224</td></tr>
<tr><td>BLAKE2s</td><td align="right">0.76us</td><td align="right">50.88us</td><td align="right"> </td><td align="right">184</td></tr> <tr><td>BLAKE2s</td><td align="right">0.76us</td><td align="right">50.88us</td><td align="right"> </td><td align="right">184</td></tr>
<tr><td>BLAKE2b</td><td align="right">1.33us</td><td align="right">170.93us</td><td align="right"> </td><td align="right">352</td></tr> <tr><td>BLAKE2b</td><td align="right">1.33us</td><td align="right">170.93us</td><td align="right"> </td><td align="right">352</td></tr>
<tr><td colspan="5"> </td></tr> <tr><td colspan="5"> </td></tr>

View File

@ -273,6 +273,7 @@ void KeccakCore::setHMACKey(const void *key, size_t len, uint8_t pad, size_t has
*/ */
void KeccakCore::keccakp() void KeccakCore::keccakp()
{ {
uint64_t B[5][5];
#if defined(__AVR__) #if defined(__AVR__)
// This assembly code was generated by the "genkeccak.c" program. // This assembly code was generated by the "genkeccak.c" program.
// Do not modify this code directly. Instead modify "genkeccak.c" // Do not modify this code directly. Instead modify "genkeccak.c"
@ -1880,7 +1881,7 @@ void KeccakCore::keccakp()
"pop r29\n" "pop r29\n"
// Done // Done
: : "x"(state.B), "z"(state.A) : : "x"(B), "z"(state.A)
: "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"r16", "r17", "r18", "r19", "r20", "r21", "memory" "r16", "r17", "r18", "r19", "r20", "r21", "memory"
); );
@ -1896,52 +1897,52 @@ void KeccakCore::keccakp()
// arrays of size 5 called C and D. To save a bit of memory, // arrays of size 5 called C and D. To save a bit of memory,
// we use the first row of B to store C and compute D on the fly. // we use the first row of B to store C and compute D on the fly.
for (index = 0; index < 5; ++index) { for (index = 0; index < 5; ++index) {
state.B[0][index] = state.A[0][index] ^ state.A[1][index] ^ B[0][index] = state.A[0][index] ^ state.A[1][index] ^
state.A[2][index] ^ state.A[3][index] ^ state.A[2][index] ^ state.A[3][index] ^
state.A[4][index]; state.A[4][index];
} }
for (index = 0; index < 5; ++index) { for (index = 0; index < 5; ++index) {
D = state.B[0][addMod5(index, 4)] ^ D = B[0][addMod5(index, 4)] ^
leftRotate1_64(state.B[0][addMod5(index, 1)]); leftRotate1_64(B[0][addMod5(index, 1)]);
for (index2 = 0; index2 < 5; ++index2) for (index2 = 0; index2 < 5; ++index2)
state.A[index2][index] ^= D; state.A[index2][index] ^= D;
} }
// Step mapping rho and pi combined into a single step. // Step mapping rho and pi combined into a single step.
// Rotate all lanes by a specific offset and rearrange. // Rotate all lanes by a specific offset and rearrange.
state.B[0][0] = state.A[0][0]; B[0][0] = state.A[0][0];
state.B[1][0] = leftRotate28_64(state.A[0][3]); B[1][0] = leftRotate28_64(state.A[0][3]);
state.B[2][0] = leftRotate1_64 (state.A[0][1]); B[2][0] = leftRotate1_64 (state.A[0][1]);
state.B[3][0] = leftRotate27_64(state.A[0][4]); B[3][0] = leftRotate27_64(state.A[0][4]);
state.B[4][0] = leftRotate62_64(state.A[0][2]); B[4][0] = leftRotate62_64(state.A[0][2]);
state.B[0][1] = leftRotate44_64(state.A[1][1]); B[0][1] = leftRotate44_64(state.A[1][1]);
state.B[1][1] = leftRotate20_64(state.A[1][4]); B[1][1] = leftRotate20_64(state.A[1][4]);
state.B[2][1] = leftRotate6_64 (state.A[1][2]); B[2][1] = leftRotate6_64 (state.A[1][2]);
state.B[3][1] = leftRotate36_64(state.A[1][0]); B[3][1] = leftRotate36_64(state.A[1][0]);
state.B[4][1] = leftRotate55_64(state.A[1][3]); B[4][1] = leftRotate55_64(state.A[1][3]);
state.B[0][2] = leftRotate43_64(state.A[2][2]); B[0][2] = leftRotate43_64(state.A[2][2]);
state.B[1][2] = leftRotate3_64 (state.A[2][0]); B[1][2] = leftRotate3_64 (state.A[2][0]);
state.B[2][2] = leftRotate25_64(state.A[2][3]); B[2][2] = leftRotate25_64(state.A[2][3]);
state.B[3][2] = leftRotate10_64(state.A[2][1]); B[3][2] = leftRotate10_64(state.A[2][1]);
state.B[4][2] = leftRotate39_64(state.A[2][4]); B[4][2] = leftRotate39_64(state.A[2][4]);
state.B[0][3] = leftRotate21_64(state.A[3][3]); B[0][3] = leftRotate21_64(state.A[3][3]);
state.B[1][3] = leftRotate45_64(state.A[3][1]); B[1][3] = leftRotate45_64(state.A[3][1]);
state.B[2][3] = leftRotate8_64 (state.A[3][4]); B[2][3] = leftRotate8_64 (state.A[3][4]);
state.B[3][3] = leftRotate15_64(state.A[3][2]); B[3][3] = leftRotate15_64(state.A[3][2]);
state.B[4][3] = leftRotate41_64(state.A[3][0]); B[4][3] = leftRotate41_64(state.A[3][0]);
state.B[0][4] = leftRotate14_64(state.A[4][4]); B[0][4] = leftRotate14_64(state.A[4][4]);
state.B[1][4] = leftRotate61_64(state.A[4][2]); B[1][4] = leftRotate61_64(state.A[4][2]);
state.B[2][4] = leftRotate18_64(state.A[4][0]); B[2][4] = leftRotate18_64(state.A[4][0]);
state.B[3][4] = leftRotate56_64(state.A[4][3]); B[3][4] = leftRotate56_64(state.A[4][3]);
state.B[4][4] = leftRotate2_64 (state.A[4][1]); B[4][4] = leftRotate2_64 (state.A[4][1]);
// Step mapping chi. Combine each lane with two other lanes in its row. // Step mapping chi. Combine each lane with two other lanes in its row.
for (index = 0; index < 5; ++index) { for (index = 0; index < 5; ++index) {
for (index2 = 0; index2 < 5; ++index2) { for (index2 = 0; index2 < 5; ++index2) {
state.A[index2][index] = state.A[index2][index] =
state.B[index2][index] ^ B[index2][index] ^
((~state.B[index2][addMod5(index, 1)]) & ((~B[index2][addMod5(index, 1)]) &
state.B[index2][addMod5(index, 2)]); B[index2][addMod5(index, 2)]);
} }
} }
#endif #endif

View File

@ -51,7 +51,6 @@ public:
private: private:
struct { struct {
uint64_t A[5][5]; uint64_t A[5][5];
uint64_t B[5][5];
uint8_t inputSize; uint8_t inputSize;
uint8_t outputSize; uint8_t outputSize;
} state; } state;