diff --git a/doc/crypto.dox b/doc/crypto.dox
index 97da849f..e546e163 100644
--- a/doc/crypto.dox
+++ b/doc/crypto.dox
@@ -85,7 +85,7 @@ Ardunino Mega 2560 running at 16 MHz are similar:
SpeckLowMemory (256-bit key, ECB mode) | 37.87us | | 16.89us | 35 |
|
AEAD Algorithm | Encryption (per byte) | Decryption (per byte) | Key Setup | State Size (bytes) |
-ChaChaPoly | 41.23us | 41.23us | 902.55us | 255 |
+ChaChaPoly | 41.20us | 41.19us | 902.36us | 221 |
GCM<AES128> | 183.25us | 182.80us | 1272.73us | 284 |
GCM<AES192> | 189.92us | 189.47us | 1492.60us | 316 |
GCM<AES256> | 196.59us | 196.13us | 1767.33us | 348 |
@@ -106,7 +106,7 @@ Ardunino Mega 2560 running at 16 MHz are similar:
SHA1 (HMAC mode) | 21.90us | 4296.33us | 1420.24us | 95 |
SHA256 (HMAC mode) | 43.85us | 8552.61us | 2836.49us | 107 |
BLAKE2s (HMAC mode) | 20.65us | 4055.56us | 1350.00us | 107 |
-Poly1305 | 26.29us | 486.15us | 17.26us | 87 |
+Poly1305 | 26.26us | 489.11us | 17.06us | 53 |
GHASH | 148.14us | 17.09us | 21.87us | 33 |
|
Public Key Operation | Time (per operation) | Comment |
@@ -142,7 +142,7 @@ All figures are for the Arduino Due running at 84 MHz:
SpeckLowMemory (256-bit key, ECB mode) | 2.90us | | 1.83us | 48 |
|
AEAD Algorithm | Encryption (per byte) | Decryption (per byte) | Key Setup | State Size (bytes) |
-ChaChaPoly | 1.66us | 1.66us | 45.02us | 280 |
+ChaChaPoly | 1.71us | 1.71us | 45.08us | 240 |
GCM<AES128> | 10.29us | 10.29us | 223.82us | 312 |
GCM<AES192> | 11.50us | 11.51us | 265.62us | 344 |
GCM<AES256> | 12.67us | 12.67us | 313.06us | 376 |
@@ -163,7 +163,7 @@ All figures are for the Arduino Due running at 84 MHz:
SHA1 (HMAC mode) | 0.94us | 193.92us | 65.09us | 112 |
SHA256 (HMAC mode) | 1.15us | 238.98us | 80.44us | 120 |
BLAKE2s (HMAC mode) | 0.72us | 157.75us | 57.18us | 120 |
-Poly1305 | 0.85us | 19.25us | 2.35us | 96 |
+Poly1305 | 0.81us | 19.01us | 2.57us | 60 |
GHASH | 4.37us | 1.50us | 4.37us | 36 |
|
Public Key Operation | Time (per operation) | Comment |
diff --git a/libraries/Crypto/Poly1305.cpp b/libraries/Crypto/Poly1305.cpp
index 0cfbfda0..01ed6546 100644
--- a/libraries/Crypto/Poly1305.cpp
+++ b/libraries/Crypto/Poly1305.cpp
@@ -180,6 +180,7 @@ void Poly1305::finalize(const void *nonce, void *token, size_t len)
{
dlimb_t carry;
uint8_t i;
+ limb_t t[NUM_LIMBS_256BIT + 1];
// Pad and flush the final chunk.
if (state.chunkSize > 0) {
@@ -211,7 +212,7 @@ void Poly1305::finalize(const void *nonce, void *token, size_t len)
carry = 5;
for (i = 0; i < NUM_LIMBS_130BIT; ++i) {
carry += state.h[i];
- state.t[i] = (limb_t)carry;
+ t[i] = (limb_t)carry;
carry >>= LIMB_BITS;
}
@@ -221,10 +222,10 @@ void Poly1305::finalize(const void *nonce, void *token, size_t len)
// of the result because we are about to drop it in the next step.
// We have to do it this way to avoid giving away any information
// about the value of h in the instruction timing.
- limb_t mask = (~((state.t[NUM_LIMBS_128BIT] >> 2) & 1)) + 1;
+ limb_t mask = (~((t[NUM_LIMBS_128BIT] >> 2) & 1)) + 1;
limb_t nmask = ~mask;
for (i = 0; i < NUM_LIMBS_128BIT; ++i) {
- state.h[i] = (state.h[i] & nmask) | (state.t[i] & mask);
+ state.h[i] = (state.h[i] & nmask) | (t[i] & mask);
}
// Add the encrypted nonce and format the final hash.
@@ -271,6 +272,8 @@ void Poly1305::clear()
*/
void Poly1305::processChunk()
{
+ limb_t t[NUM_LIMBS_256BIT + 1];
+
// Compute h = ((h + c) * r) mod (2^130 - 5).
// Start with h += c. We assume that h is less than (2^130 - 5) * 6
@@ -292,28 +295,28 @@ void Poly1305::processChunk()
limb_t word = state.r[0];
for (i = 0; i < NUM_LIMBS_130BIT; ++i) {
carry += ((dlimb_t)(state.h[i])) * word;
- state.t[i] = (limb_t)carry;
+ t[i] = (limb_t)carry;
carry >>= LIMB_BITS;
}
- state.t[NUM_LIMBS_130BIT] = (limb_t)carry;
+ t[NUM_LIMBS_130BIT] = (limb_t)carry;
for (i = 1; i < NUM_LIMBS_128BIT; ++i) {
word = state.r[i];
carry = 0;
for (j = 0; j < NUM_LIMBS_130BIT; ++j) {
carry += ((dlimb_t)(state.h[j])) * word;
- carry += state.t[i + j];
- state.t[i + j] = (limb_t)carry;
+ carry += t[i + j];
+ t[i + j] = (limb_t)carry;
carry >>= LIMB_BITS;
}
- state.t[i + NUM_LIMBS_130BIT] = (limb_t)carry;
+ t[i + NUM_LIMBS_130BIT] = (limb_t)carry;
}
// Reduce h * r modulo (2^130 - 5) by multiplying the high 130 bits by 5
// and adding them to the low 130 bits. See the explaination in the
// comments for Curve25519::reduce() for a description of how this works.
- carry = ((dlimb_t)(state.t[NUM_LIMBS_128BIT] >> 2)) +
- (state.t[NUM_LIMBS_128BIT] & ~((limb_t)3));
- state.t[NUM_LIMBS_128BIT] &= 0x0003;
+ carry = ((dlimb_t)(t[NUM_LIMBS_128BIT] >> 2)) +
+ (t[NUM_LIMBS_128BIT] & ~((limb_t)3));
+ t[NUM_LIMBS_128BIT] &= 0x0003;
for (i = 0; i < NUM_LIMBS_128BIT; ++i) {
// Shift the next word of t up by (LIMB_BITS - 2) bits and then
// multiply it by 5. Breaking it down, we can add the results
@@ -323,14 +326,14 @@ void Poly1305::processChunk()
// fit within a dlimb_t variable. However, we can defer adding
// (word << LIMB_BITS) until after the "carry >>= LIMB_BITS" step
// because it won't affect the low bits of the carry.
- word = state.t[i + NUM_LIMBS_130BIT];
+ word = t[i + NUM_LIMBS_130BIT];
carry += ((dlimb_t)word) << (LIMB_BITS - 2);
- carry += state.t[i];
+ carry += t[i];
state.h[i] = (limb_t)carry;
carry >>= LIMB_BITS;
carry += word;
}
- state.h[i] = (limb_t)(carry + state.t[NUM_LIMBS_128BIT]);
+ state.h[i] = (limb_t)(carry + t[NUM_LIMBS_128BIT]);
// At this point, h is either the answer of reducing modulo (2^130 - 5)
// or it is at most 5 subtractions away from the answer we want.
diff --git a/libraries/Crypto/Poly1305.h b/libraries/Crypto/Poly1305.h
index ef51596c..ae1e4539 100644
--- a/libraries/Crypto/Poly1305.h
+++ b/libraries/Crypto/Poly1305.h
@@ -45,7 +45,6 @@ private:
limb_t h[(16 / sizeof(limb_t)) + 1];
limb_t c[(16 / sizeof(limb_t)) + 1];
limb_t r[(16 / sizeof(limb_t))];
- limb_t t[(32 / sizeof(limb_t)) + 1];
uint8_t chunkSize;
} state;