mirror of
https://github.com/taigrr/arduinolibs
synced 2025-01-18 04:33:12 -08:00
Reduce the object state size for Poly1305
This commit is contained in:
parent
b852d222b4
commit
95313613b7
@ -85,7 +85,7 @@ Ardunino Mega 2560 running at 16 MHz are similar:
|
||||
<tr><td>SpeckLowMemory (256-bit key, ECB mode)</td><td align="right">37.87us</td><td align="right"> </td><td align="right">16.89us</td><td align="right">35</td></tr>
|
||||
<tr><td colspan="5"> </td></tr>
|
||||
<tr><td>AEAD Algorithm</td><td align="right">Encryption (per byte)</td><td align="right">Decryption (per byte)</td><td>Key Setup</td><td>State Size (bytes)</td></tr>
|
||||
<tr><td>ChaChaPoly</td><td align="right">41.23us</td><td align="right">41.23us</td><td align="right">902.55us</td><td align="right">255</td></tr>
|
||||
<tr><td>ChaChaPoly</td><td align="right">41.20us</td><td align="right">41.19us</td><td align="right">902.36us</td><td align="right">221</td></tr>
|
||||
<tr><td>GCM<AES128></td><td align="right">183.25us</td><td align="right">182.80us</td><td align="right">1272.73us</td><td align="right">284</td></tr>
|
||||
<tr><td>GCM<AES192></td><td align="right">189.92us</td><td align="right">189.47us</td><td align="right">1492.60us</td><td align="right">316</td></tr>
|
||||
<tr><td>GCM<AES256></td><td align="right">196.59us</td><td align="right">196.13us</td><td align="right">1767.33us</td><td align="right">348</td></tr>
|
||||
@ -106,7 +106,7 @@ Ardunino Mega 2560 running at 16 MHz are similar:
|
||||
<tr><td>SHA1 (HMAC mode)</td><td align="right">21.90us</td><td align="right">4296.33us</td><td align="right">1420.24us</td><td align="right">95</td></tr>
|
||||
<tr><td>SHA256 (HMAC mode)</td><td align="right">43.85us</td><td align="right">8552.61us</td><td align="right">2836.49us</td><td align="right">107</td></tr>
|
||||
<tr><td>BLAKE2s (HMAC mode)</td><td align="right">20.65us</td><td align="right">4055.56us</td><td align="right">1350.00us</td><td align="right">107</td></tr>
|
||||
<tr><td>Poly1305</td><td align="right">26.29us</td><td align="right">486.15us</td><td align="right">17.26us</td><td align="right">87</td></tr>
|
||||
<tr><td>Poly1305</td><td align="right">26.26us</td><td align="right">489.11us</td><td align="right">17.06us</td><td align="right">53</td></tr>
|
||||
<tr><td>GHASH</td><td align="right">148.14us</td><td align="right">17.09us</td><td align="right">21.87us</td><td align="right">33</td></tr>
|
||||
<tr><td colspan="5"> </td></tr>
|
||||
<tr><td>Public Key Operation</td><td align="right">Time (per operation)</td><td colspan="3">Comment</td></tr>
|
||||
@ -142,7 +142,7 @@ All figures are for the Arduino Due running at 84 MHz:
|
||||
<tr><td>SpeckLowMemory (256-bit key, ECB mode)</td><td align="right">2.90us</td><td align="right"> </td><td align="right">1.83us</td><td align="right">48</td></tr>
|
||||
<tr><td colspan="5"> </td></tr>
|
||||
<tr><td>AEAD Algorithm</td><td align="right">Encryption (per byte)</td><td align="right">Decryption (per byte)</td><td>Key Setup</td><td>State Size (bytes)</td></tr>
|
||||
<tr><td>ChaChaPoly</td><td align="right">1.66us</td><td align="right">1.66us</td><td align="right">45.02us</td><td align="right">280</td></tr>
|
||||
<tr><td>ChaChaPoly</td><td align="right">1.71us</td><td align="right">1.71us</td><td align="right">45.08us</td><td align="right">240</td></tr>
|
||||
<tr><td>GCM<AES128></td><td align="right">10.29us</td><td align="right">10.29us</td><td align="right">223.82us</td><td align="right">312</td></tr>
|
||||
<tr><td>GCM<AES192></td><td align="right">11.50us</td><td align="right">11.51us</td><td align="right">265.62us</td><td align="right">344</td></tr>
|
||||
<tr><td>GCM<AES256></td><td align="right">12.67us</td><td align="right">12.67us</td><td align="right">313.06us</td><td align="right">376</td></tr>
|
||||
@ -163,7 +163,7 @@ All figures are for the Arduino Due running at 84 MHz:
|
||||
<tr><td>SHA1 (HMAC mode)</td><td align="right">0.94us</td><td align="right">193.92us</td><td align="right">65.09us</td><td align="right">112</td></tr>
|
||||
<tr><td>SHA256 (HMAC mode)</td><td align="right">1.15us</td><td align="right">238.98us</td><td align="right">80.44us</td><td align="right">120</td></tr>
|
||||
<tr><td>BLAKE2s (HMAC mode)</td><td align="right">0.72us</td><td align="right">157.75us</td><td align="right">57.18us</td><td align="right">120</td></tr>
|
||||
<tr><td>Poly1305</td><td align="right">0.85us</td><td align="right">19.25us</td><td align="right">2.35us</td><td align="right">96</td></tr>
|
||||
<tr><td>Poly1305</td><td align="right">0.81us</td><td align="right">19.01us</td><td align="right">2.57us</td><td align="right">60</td></tr>
|
||||
<tr><td>GHASH</td><td align="right">4.37us</td><td align="right">1.50us</td><td align="right">4.37us</td><td align="right">36</td></tr>
|
||||
<tr><td colspan="5"> </td></tr>
|
||||
<tr><td>Public Key Operation</td><td align="right">Time (per operation)</td><td colspan="3">Comment</td></tr>
|
||||
|
@ -180,6 +180,7 @@ void Poly1305::finalize(const void *nonce, void *token, size_t len)
|
||||
{
|
||||
dlimb_t carry;
|
||||
uint8_t i;
|
||||
limb_t t[NUM_LIMBS_256BIT + 1];
|
||||
|
||||
// Pad and flush the final chunk.
|
||||
if (state.chunkSize > 0) {
|
||||
@ -211,7 +212,7 @@ void Poly1305::finalize(const void *nonce, void *token, size_t len)
|
||||
carry = 5;
|
||||
for (i = 0; i < NUM_LIMBS_130BIT; ++i) {
|
||||
carry += state.h[i];
|
||||
state.t[i] = (limb_t)carry;
|
||||
t[i] = (limb_t)carry;
|
||||
carry >>= LIMB_BITS;
|
||||
}
|
||||
|
||||
@ -221,10 +222,10 @@ void Poly1305::finalize(const void *nonce, void *token, size_t len)
|
||||
// of the result because we are about to drop it in the next step.
|
||||
// We have to do it this way to avoid giving away any information
|
||||
// about the value of h in the instruction timing.
|
||||
limb_t mask = (~((state.t[NUM_LIMBS_128BIT] >> 2) & 1)) + 1;
|
||||
limb_t mask = (~((t[NUM_LIMBS_128BIT] >> 2) & 1)) + 1;
|
||||
limb_t nmask = ~mask;
|
||||
for (i = 0; i < NUM_LIMBS_128BIT; ++i) {
|
||||
state.h[i] = (state.h[i] & nmask) | (state.t[i] & mask);
|
||||
state.h[i] = (state.h[i] & nmask) | (t[i] & mask);
|
||||
}
|
||||
|
||||
// Add the encrypted nonce and format the final hash.
|
||||
@ -271,6 +272,8 @@ void Poly1305::clear()
|
||||
*/
|
||||
void Poly1305::processChunk()
|
||||
{
|
||||
limb_t t[NUM_LIMBS_256BIT + 1];
|
||||
|
||||
// Compute h = ((h + c) * r) mod (2^130 - 5).
|
||||
|
||||
// Start with h += c. We assume that h is less than (2^130 - 5) * 6
|
||||
@ -292,28 +295,28 @@ void Poly1305::processChunk()
|
||||
limb_t word = state.r[0];
|
||||
for (i = 0; i < NUM_LIMBS_130BIT; ++i) {
|
||||
carry += ((dlimb_t)(state.h[i])) * word;
|
||||
state.t[i] = (limb_t)carry;
|
||||
t[i] = (limb_t)carry;
|
||||
carry >>= LIMB_BITS;
|
||||
}
|
||||
state.t[NUM_LIMBS_130BIT] = (limb_t)carry;
|
||||
t[NUM_LIMBS_130BIT] = (limb_t)carry;
|
||||
for (i = 1; i < NUM_LIMBS_128BIT; ++i) {
|
||||
word = state.r[i];
|
||||
carry = 0;
|
||||
for (j = 0; j < NUM_LIMBS_130BIT; ++j) {
|
||||
carry += ((dlimb_t)(state.h[j])) * word;
|
||||
carry += state.t[i + j];
|
||||
state.t[i + j] = (limb_t)carry;
|
||||
carry += t[i + j];
|
||||
t[i + j] = (limb_t)carry;
|
||||
carry >>= LIMB_BITS;
|
||||
}
|
||||
state.t[i + NUM_LIMBS_130BIT] = (limb_t)carry;
|
||||
t[i + NUM_LIMBS_130BIT] = (limb_t)carry;
|
||||
}
|
||||
|
||||
// Reduce h * r modulo (2^130 - 5) by multiplying the high 130 bits by 5
|
||||
// and adding them to the low 130 bits. See the explaination in the
|
||||
// comments for Curve25519::reduce() for a description of how this works.
|
||||
carry = ((dlimb_t)(state.t[NUM_LIMBS_128BIT] >> 2)) +
|
||||
(state.t[NUM_LIMBS_128BIT] & ~((limb_t)3));
|
||||
state.t[NUM_LIMBS_128BIT] &= 0x0003;
|
||||
carry = ((dlimb_t)(t[NUM_LIMBS_128BIT] >> 2)) +
|
||||
(t[NUM_LIMBS_128BIT] & ~((limb_t)3));
|
||||
t[NUM_LIMBS_128BIT] &= 0x0003;
|
||||
for (i = 0; i < NUM_LIMBS_128BIT; ++i) {
|
||||
// Shift the next word of t up by (LIMB_BITS - 2) bits and then
|
||||
// multiply it by 5. Breaking it down, we can add the results
|
||||
@ -323,14 +326,14 @@ void Poly1305::processChunk()
|
||||
// fit within a dlimb_t variable. However, we can defer adding
|
||||
// (word << LIMB_BITS) until after the "carry >>= LIMB_BITS" step
|
||||
// because it won't affect the low bits of the carry.
|
||||
word = state.t[i + NUM_LIMBS_130BIT];
|
||||
word = t[i + NUM_LIMBS_130BIT];
|
||||
carry += ((dlimb_t)word) << (LIMB_BITS - 2);
|
||||
carry += state.t[i];
|
||||
carry += t[i];
|
||||
state.h[i] = (limb_t)carry;
|
||||
carry >>= LIMB_BITS;
|
||||
carry += word;
|
||||
}
|
||||
state.h[i] = (limb_t)(carry + state.t[NUM_LIMBS_128BIT]);
|
||||
state.h[i] = (limb_t)(carry + t[NUM_LIMBS_128BIT]);
|
||||
|
||||
// At this point, h is either the answer of reducing modulo (2^130 - 5)
|
||||
// or it is at most 5 subtractions away from the answer we want.
|
||||
|
@ -45,7 +45,6 @@ private:
|
||||
limb_t h[(16 / sizeof(limb_t)) + 1];
|
||||
limb_t c[(16 / sizeof(limb_t)) + 1];
|
||||
limb_t r[(16 / sizeof(limb_t))];
|
||||
limb_t t[(32 / sizeof(limb_t)) + 1];
|
||||
uint8_t chunkSize;
|
||||
} state;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user