From 95313613b7868884ae46c2ef69ffc9a7132496e1 Mon Sep 17 00:00:00 2001 From: Rhys Weatherley Date: Sat, 16 Jan 2016 08:57:46 +1000 Subject: [PATCH] Reduce the object state size for Poly1305 --- doc/crypto.dox | 8 ++++---- libraries/Crypto/Poly1305.cpp | 31 +++++++++++++++++-------------- libraries/Crypto/Poly1305.h | 1 - 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/doc/crypto.dox b/doc/crypto.dox index 97da849f..e546e163 100644 --- a/doc/crypto.dox +++ b/doc/crypto.dox @@ -85,7 +85,7 @@ Ardunino Mega 2560 running at 16 MHz are similar: SpeckLowMemory (256-bit key, ECB mode)37.87us 16.89us35 AEAD AlgorithmEncryption (per byte)Decryption (per byte)Key SetupState Size (bytes) -ChaChaPoly41.23us41.23us902.55us255 +ChaChaPoly41.20us41.19us902.36us221 GCM<AES128>183.25us182.80us1272.73us284 GCM<AES192>189.92us189.47us1492.60us316 GCM<AES256>196.59us196.13us1767.33us348 @@ -106,7 +106,7 @@ Ardunino Mega 2560 running at 16 MHz are similar: SHA1 (HMAC mode)21.90us4296.33us1420.24us95 SHA256 (HMAC mode)43.85us8552.61us2836.49us107 BLAKE2s (HMAC mode)20.65us4055.56us1350.00us107 -Poly130526.29us486.15us17.26us87 +Poly130526.26us489.11us17.06us53 GHASH148.14us17.09us21.87us33 Public Key OperationTime (per operation)Comment @@ -142,7 +142,7 @@ All figures are for the Arduino Due running at 84 MHz: SpeckLowMemory (256-bit key, ECB mode)2.90us 1.83us48 AEAD AlgorithmEncryption (per byte)Decryption (per byte)Key SetupState Size (bytes) -ChaChaPoly1.66us1.66us45.02us280 +ChaChaPoly1.71us1.71us45.08us240 GCM<AES128>10.29us10.29us223.82us312 GCM<AES192>11.50us11.51us265.62us344 GCM<AES256>12.67us12.67us313.06us376 @@ -163,7 +163,7 @@ All figures are for the Arduino Due running at 84 MHz: SHA1 (HMAC mode)0.94us193.92us65.09us112 SHA256 (HMAC mode)1.15us238.98us80.44us120 BLAKE2s (HMAC mode)0.72us157.75us57.18us120 -Poly13050.85us19.25us2.35us96 +Poly13050.81us19.01us2.57us60 GHASH4.37us1.50us4.37us36 Public Key OperationTime (per operation)Comment diff --git a/libraries/Crypto/Poly1305.cpp b/libraries/Crypto/Poly1305.cpp index 0cfbfda0..01ed6546 100644 --- a/libraries/Crypto/Poly1305.cpp +++ b/libraries/Crypto/Poly1305.cpp @@ -180,6 +180,7 @@ void Poly1305::finalize(const void *nonce, void *token, size_t len) { dlimb_t carry; uint8_t i; + limb_t t[NUM_LIMBS_256BIT + 1]; // Pad and flush the final chunk. if (state.chunkSize > 0) { @@ -211,7 +212,7 @@ void Poly1305::finalize(const void *nonce, void *token, size_t len) carry = 5; for (i = 0; i < NUM_LIMBS_130BIT; ++i) { carry += state.h[i]; - state.t[i] = (limb_t)carry; + t[i] = (limb_t)carry; carry >>= LIMB_BITS; } @@ -221,10 +222,10 @@ void Poly1305::finalize(const void *nonce, void *token, size_t len) // of the result because we are about to drop it in the next step. // We have to do it this way to avoid giving away any information // about the value of h in the instruction timing. - limb_t mask = (~((state.t[NUM_LIMBS_128BIT] >> 2) & 1)) + 1; + limb_t mask = (~((t[NUM_LIMBS_128BIT] >> 2) & 1)) + 1; limb_t nmask = ~mask; for (i = 0; i < NUM_LIMBS_128BIT; ++i) { - state.h[i] = (state.h[i] & nmask) | (state.t[i] & mask); + state.h[i] = (state.h[i] & nmask) | (t[i] & mask); } // Add the encrypted nonce and format the final hash. @@ -271,6 +272,8 @@ void Poly1305::clear() */ void Poly1305::processChunk() { + limb_t t[NUM_LIMBS_256BIT + 1]; + // Compute h = ((h + c) * r) mod (2^130 - 5). // Start with h += c. We assume that h is less than (2^130 - 5) * 6 @@ -292,28 +295,28 @@ void Poly1305::processChunk() limb_t word = state.r[0]; for (i = 0; i < NUM_LIMBS_130BIT; ++i) { carry += ((dlimb_t)(state.h[i])) * word; - state.t[i] = (limb_t)carry; + t[i] = (limb_t)carry; carry >>= LIMB_BITS; } - state.t[NUM_LIMBS_130BIT] = (limb_t)carry; + t[NUM_LIMBS_130BIT] = (limb_t)carry; for (i = 1; i < NUM_LIMBS_128BIT; ++i) { word = state.r[i]; carry = 0; for (j = 0; j < NUM_LIMBS_130BIT; ++j) { carry += ((dlimb_t)(state.h[j])) * word; - carry += state.t[i + j]; - state.t[i + j] = (limb_t)carry; + carry += t[i + j]; + t[i + j] = (limb_t)carry; carry >>= LIMB_BITS; } - state.t[i + NUM_LIMBS_130BIT] = (limb_t)carry; + t[i + NUM_LIMBS_130BIT] = (limb_t)carry; } // Reduce h * r modulo (2^130 - 5) by multiplying the high 130 bits by 5 // and adding them to the low 130 bits. See the explaination in the // comments for Curve25519::reduce() for a description of how this works. - carry = ((dlimb_t)(state.t[NUM_LIMBS_128BIT] >> 2)) + - (state.t[NUM_LIMBS_128BIT] & ~((limb_t)3)); - state.t[NUM_LIMBS_128BIT] &= 0x0003; + carry = ((dlimb_t)(t[NUM_LIMBS_128BIT] >> 2)) + + (t[NUM_LIMBS_128BIT] & ~((limb_t)3)); + t[NUM_LIMBS_128BIT] &= 0x0003; for (i = 0; i < NUM_LIMBS_128BIT; ++i) { // Shift the next word of t up by (LIMB_BITS - 2) bits and then // multiply it by 5. Breaking it down, we can add the results @@ -323,14 +326,14 @@ void Poly1305::processChunk() // fit within a dlimb_t variable. However, we can defer adding // (word << LIMB_BITS) until after the "carry >>= LIMB_BITS" step // because it won't affect the low bits of the carry. - word = state.t[i + NUM_LIMBS_130BIT]; + word = t[i + NUM_LIMBS_130BIT]; carry += ((dlimb_t)word) << (LIMB_BITS - 2); - carry += state.t[i]; + carry += t[i]; state.h[i] = (limb_t)carry; carry >>= LIMB_BITS; carry += word; } - state.h[i] = (limb_t)(carry + state.t[NUM_LIMBS_128BIT]); + state.h[i] = (limb_t)(carry + t[NUM_LIMBS_128BIT]); // At this point, h is either the answer of reducing modulo (2^130 - 5) // or it is at most 5 subtractions away from the answer we want. diff --git a/libraries/Crypto/Poly1305.h b/libraries/Crypto/Poly1305.h index ef51596c..ae1e4539 100644 --- a/libraries/Crypto/Poly1305.h +++ b/libraries/Crypto/Poly1305.h @@ -45,7 +45,6 @@ private: limb_t h[(16 / sizeof(limb_t)) + 1]; limb_t c[(16 / sizeof(limb_t)) + 1]; limb_t r[(16 / sizeof(limb_t))]; - limb_t t[(32 / sizeof(limb_t)) + 1]; uint8_t chunkSize; } state;