From a3144aab4e17290945e9694b1111c741e17b9d6f Mon Sep 17 00:00:00 2001 From: Rhys Weatherley Date: Fri, 3 Apr 2015 05:03:48 +1000 Subject: [PATCH] Improve performance of Curve25519::eval() by 619ms --- doc/crypto.dox | 6 +-- libraries/Crypto/Curve25519.cpp | 93 ++++++++++++++++++++++----------- libraries/Crypto/Curve25519.h | 4 +- 3 files changed, 67 insertions(+), 36 deletions(-) diff --git a/doc/crypto.dox b/doc/crypto.dox index 093e87b0..bcbf3774 100644 --- a/doc/crypto.dox +++ b/doc/crypto.dox @@ -93,9 +93,9 @@ Ardunino Mega 2560 running at 16 MHz are similar: GHASH148.14us17.09us21.87us33 Public Key OperationTime (per operation)Comment -Curve25519::eval()3738msRaw curve evaluation -Curve25519::dh1()3740msFirst half of Diffie-Hellman key agreement -Curve25519::dh2()3738msSecond half of Diffie-Hellman key agreement +Curve25519::eval()3119msRaw curve evaluation +Curve25519::dh1()3121msFirst half of Diffie-Hellman key agreement +Curve25519::dh2()3120msSecond half of Diffie-Hellman key agreement Where a cipher supports more than one key size (such as ChaCha), the values diff --git a/libraries/Crypto/Curve25519.cpp b/libraries/Crypto/Curve25519.cpp index 64ee1597..9dde379f 100644 --- a/libraries/Crypto/Curve25519.cpp +++ b/libraries/Crypto/Curve25519.cpp @@ -31,7 +31,7 @@ * \brief Diffie-Hellman key agreement based on the elliptic curve * modulo 2^255 - 19. * - * \note This public functions in this class need a substantial amount of + * \note The public functions in this class need a substantial amount of * stack space to store intermediate results while the curve function is * being evaluated. About 1k of free stack space is recommended for safety. * @@ -46,6 +46,17 @@ // Number of bits in limb_t. #define LIMB_BITS (8 * sizeof(limb_t)) +// The overhead of clean() calls in mul(), reduceQuick(), etc can +// add up to a lot of processing time during eval(). Only do such +// cleanups if strict mode has been enabled. Other implementations +// like curve25519-donna don't do any cleaning at all so the value +// of cleaning up the stack is dubious at best anyway. +#if defined(CURVE25519_STRICT_CLEAN) +#define strict_clean(x) clean(x) +#else +#define strict_clean(x) do { ; } while (0) +#endif + /** * \brief Evaluates the raw Curve25519 function. * @@ -473,14 +484,18 @@ limb_t Curve25519::reduceQuick(limb_t *x) limb_t temp[NUM_LIMBS]; dlimb_t carry; uint8_t posn; + limb_t *xx; + limb_t *tt; // Perform a trial subtraction of (2^255 - 19) from "x" which is // equivalent to adding 19 and subtracting 2^255. We add 19 here; // the subtraction of 2^255 occurs in the next step. carry = 19U; + xx = x; + tt = temp; for (posn = 0; posn < NUM_LIMBS; ++posn) { - carry += x[posn]; - temp[posn] = (limb_t)carry; + carry += *xx++; + *tt++ = (limb_t)carry; carry >>= LIMB_BITS; } @@ -492,12 +507,15 @@ limb_t Curve25519::reduceQuick(limb_t *x) limb_t mask = (limb_t)(((slimb_t)(temp[NUM_LIMBS - 1])) >> (LIMB_BITS - 1)); limb_t nmask = ~mask; temp[NUM_LIMBS - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1); + xx = x; + tt = temp; for (posn = 0; posn < NUM_LIMBS; ++posn) { - x[posn] = (x[posn] & nmask) | (temp[posn] & mask); + *xx = ((*xx) & nmask) | ((*tt++) & mask); + ++xx; } // Clean up "temp". - clean(temp); + strict_clean(temp); // Return a zero value if we actually subtracted (2^255 - 19) from "x". return nmask; @@ -519,33 +537,39 @@ void Curve25519::mul(limb_t *result, const limb_t *x, const limb_t *y) uint8_t i, j; dlimb_t carry; limb_t word; + const limb_t *yy; + limb_t *tt; // Multiply the lowest word of x by y. carry = 0; word = x[0]; + yy = y; + tt = temp; for (i = 0; i < NUM_LIMBS; ++i) { - carry += ((dlimb_t)(y[i])) * word; - temp[i] = (limb_t)carry; + carry += ((dlimb_t)(*yy++)) * word; + *tt++ = (limb_t)carry; carry >>= LIMB_BITS; } - temp[NUM_LIMBS] = (limb_t)carry; + *tt = (limb_t)carry; // Multiply and add the remaining words of x by y. for (i = 1; i < NUM_LIMBS; ++i) { word = x[i]; carry = 0; + yy = y; + tt = temp + i; for (j = 0; j < NUM_LIMBS; ++j) { - carry += ((dlimb_t)(y[j])) * word; - carry += temp[i + j]; - temp[i + j] = (limb_t)carry; + carry += ((dlimb_t)(*yy++)) * word; + carry += *tt; + *tt++ = (limb_t)carry; carry >>= LIMB_BITS; } - temp[i + NUM_LIMBS] = (limb_t)carry; + *tt = (limb_t)carry; } // Reduce the intermediate result modulo 2^255 - 19. reduce(result, temp, NUM_LIMBS); - clean(temp); + strict_clean(temp); } /** @@ -589,29 +613,33 @@ void Curve25519::mulA24(limb_t *result, const limb_t *x) uint8_t i, j; dlimb_t carry = 0; limb_t word = pgm_read_a24(0); + const limb_t *xx = x; + limb_t *tt = temp; for (i = 0; i < NUM_LIMBS; ++i) { - carry += ((dlimb_t)(x[i])) * word; - temp[i] = (limb_t)carry; + carry += ((dlimb_t)(*xx++)) * word; + *tt++ = (limb_t)carry; carry >>= LIMB_BITS; } - temp[NUM_LIMBS] = (limb_t)carry; + *tt = (limb_t)carry; // Multiply and add the remaining limbs of a24. for (i = 1; i < NUM_A24_LIMBS; ++i) { word = pgm_read_a24(i); carry = 0; + xx = x; + tt = temp + i; for (j = 0; j < NUM_LIMBS; ++j) { - carry += ((dlimb_t)(x[j])) * word; - carry += temp[i + j]; - temp[i + j] = (limb_t)carry; + carry += ((dlimb_t)(*xx++)) * word; + carry += *tt; + *tt++ = (limb_t)carry; carry >>= LIMB_BITS; } - temp[i + NUM_LIMBS] = (limb_t)carry; + *tt = (limb_t)carry; } // Reduce the intermediate result modulo 2^255 - 19. reduce(result, temp, NUM_A24_LIMBS); - clean(temp); + strict_clean(temp); } /** @@ -628,12 +656,13 @@ void Curve25519::add(limb_t *result, const limb_t *x, const limb_t *y) { dlimb_t carry = 0; uint8_t posn; + limb_t *rr = result; // Add the two arrays to obtain the intermediate result. for (posn = 0; posn < NUM_LIMBS; ++posn) { - carry += x[posn]; - carry += y[posn]; - result[posn] = (limb_t)carry; + carry += *x++; + carry += *y++; + *rr++ = (limb_t)carry; carry >>= LIMB_BITS; } @@ -655,12 +684,13 @@ void Curve25519::sub(limb_t *result, const limb_t *x, const limb_t *y) { dlimb_t borrow; uint8_t posn; + limb_t *rr = result; // Subtract y from x to generate the intermediate result. borrow = 0; for (posn = 0; posn < NUM_LIMBS; ++posn) { - borrow = ((dlimb_t)x[posn]) - y[posn] - ((borrow >> LIMB_BITS) & 0x01); - result[posn] = (limb_t)borrow; + borrow = ((dlimb_t)(*x++)) - (*y++) - ((borrow >> LIMB_BITS) & 0x01); + *rr++ = (limb_t)borrow; } // If we had a borrow, then the result has gone negative and we @@ -668,14 +698,15 @@ void Curve25519::sub(limb_t *result, const limb_t *x, const limb_t *y) // The top bits of "borrow" will be all 1's if there is a borrow // or it will be all 0's if there was no borrow. Easiest is to // conditionally subtract 19 and then mask off the high bit. + rr = result; borrow = (borrow >> LIMB_BITS) & 19U; - borrow = ((dlimb_t)result[0]) - borrow; - result[0] = (limb_t)borrow; + borrow = ((dlimb_t)(*rr)) - borrow; + *rr++ = (limb_t)borrow; for (posn = 1; posn < NUM_LIMBS; ++posn) { - borrow = ((dlimb_t)result[posn]) - ((borrow >> LIMB_BITS) & 0x01); - result[posn] = (limb_t)borrow; + borrow = ((dlimb_t)(*rr)) - ((borrow >> LIMB_BITS) & 0x01); + *rr++ = (limb_t)borrow; } - result[NUM_LIMBS - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1); + *(--rr) &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1); } /** diff --git a/libraries/Crypto/Curve25519.h b/libraries/Crypto/Curve25519.h index 567fae5c..aa183f92 100644 --- a/libraries/Crypto/Curve25519.h +++ b/libraries/Crypto/Curve25519.h @@ -20,8 +20,8 @@ * DEALINGS IN THE SOFTWARE. */ -#ifndef CRYPTO_CURVE15519_h -#define CRYPTO_CURVE15519_h +#ifndef CRYPTO_CURVE25519_h +#define CRYPTO_CURVE25519_h #include "BigNumberUtil.h" #include