mirror of
https://github.com/taigrr/arduinolibs
synced 2025-01-18 04:33:12 -08:00
Improve performance of Curve25519::eval() by 619ms
This commit is contained in:
parent
47ab405e7f
commit
a3144aab4e
@ -93,9 +93,9 @@ Ardunino Mega 2560 running at 16 MHz are similar:
|
|||||||
<tr><td>GHASH</td><td align="right">148.14us</td><td align="right">17.09us</td><td align="right">21.87us</td><td align="right">33</td></tr>
|
<tr><td>GHASH</td><td align="right">148.14us</td><td align="right">17.09us</td><td align="right">21.87us</td><td align="right">33</td></tr>
|
||||||
<tr><td colspan="5"> </td></tr>
|
<tr><td colspan="5"> </td></tr>
|
||||||
<tr><td>Public Key Operation</td><td align="right">Time (per operation)</td><td colspan="3">Comment</td></tr>
|
<tr><td>Public Key Operation</td><td align="right">Time (per operation)</td><td colspan="3">Comment</td></tr>
|
||||||
<tr><td>Curve25519::eval()</td><td align="right">3738ms</td><td colspan="3">Raw curve evaluation</td></tr>
|
<tr><td>Curve25519::eval()</td><td align="right">3119ms</td><td colspan="3">Raw curve evaluation</td></tr>
|
||||||
<tr><td>Curve25519::dh1()</td><td align="right">3740ms</td><td colspan="3">First half of Diffie-Hellman key agreement</td></tr>
|
<tr><td>Curve25519::dh1()</td><td align="right">3121ms</td><td colspan="3">First half of Diffie-Hellman key agreement</td></tr>
|
||||||
<tr><td>Curve25519::dh2()</td><td align="right">3738ms</td><td colspan="3">Second half of Diffie-Hellman key agreement</td></tr>
|
<tr><td>Curve25519::dh2()</td><td align="right">3120ms</td><td colspan="3">Second half of Diffie-Hellman key agreement</td></tr>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
Where a cipher supports more than one key size (such as ChaCha), the values
|
Where a cipher supports more than one key size (such as ChaCha), the values
|
||||||
|
@ -31,7 +31,7 @@
|
|||||||
* \brief Diffie-Hellman key agreement based on the elliptic curve
|
* \brief Diffie-Hellman key agreement based on the elliptic curve
|
||||||
* modulo 2^255 - 19.
|
* modulo 2^255 - 19.
|
||||||
*
|
*
|
||||||
* \note This public functions in this class need a substantial amount of
|
* \note The public functions in this class need a substantial amount of
|
||||||
* stack space to store intermediate results while the curve function is
|
* stack space to store intermediate results while the curve function is
|
||||||
* being evaluated. About 1k of free stack space is recommended for safety.
|
* being evaluated. About 1k of free stack space is recommended for safety.
|
||||||
*
|
*
|
||||||
@ -46,6 +46,17 @@
|
|||||||
// Number of bits in limb_t.
|
// Number of bits in limb_t.
|
||||||
#define LIMB_BITS (8 * sizeof(limb_t))
|
#define LIMB_BITS (8 * sizeof(limb_t))
|
||||||
|
|
||||||
|
// The overhead of clean() calls in mul(), reduceQuick(), etc can
|
||||||
|
// add up to a lot of processing time during eval(). Only do such
|
||||||
|
// cleanups if strict mode has been enabled. Other implementations
|
||||||
|
// like curve25519-donna don't do any cleaning at all so the value
|
||||||
|
// of cleaning up the stack is dubious at best anyway.
|
||||||
|
#if defined(CURVE25519_STRICT_CLEAN)
|
||||||
|
#define strict_clean(x) clean(x)
|
||||||
|
#else
|
||||||
|
#define strict_clean(x) do { ; } while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Evaluates the raw Curve25519 function.
|
* \brief Evaluates the raw Curve25519 function.
|
||||||
*
|
*
|
||||||
@ -473,14 +484,18 @@ limb_t Curve25519::reduceQuick(limb_t *x)
|
|||||||
limb_t temp[NUM_LIMBS];
|
limb_t temp[NUM_LIMBS];
|
||||||
dlimb_t carry;
|
dlimb_t carry;
|
||||||
uint8_t posn;
|
uint8_t posn;
|
||||||
|
limb_t *xx;
|
||||||
|
limb_t *tt;
|
||||||
|
|
||||||
// Perform a trial subtraction of (2^255 - 19) from "x" which is
|
// Perform a trial subtraction of (2^255 - 19) from "x" which is
|
||||||
// equivalent to adding 19 and subtracting 2^255. We add 19 here;
|
// equivalent to adding 19 and subtracting 2^255. We add 19 here;
|
||||||
// the subtraction of 2^255 occurs in the next step.
|
// the subtraction of 2^255 occurs in the next step.
|
||||||
carry = 19U;
|
carry = 19U;
|
||||||
|
xx = x;
|
||||||
|
tt = temp;
|
||||||
for (posn = 0; posn < NUM_LIMBS; ++posn) {
|
for (posn = 0; posn < NUM_LIMBS; ++posn) {
|
||||||
carry += x[posn];
|
carry += *xx++;
|
||||||
temp[posn] = (limb_t)carry;
|
*tt++ = (limb_t)carry;
|
||||||
carry >>= LIMB_BITS;
|
carry >>= LIMB_BITS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -492,12 +507,15 @@ limb_t Curve25519::reduceQuick(limb_t *x)
|
|||||||
limb_t mask = (limb_t)(((slimb_t)(temp[NUM_LIMBS - 1])) >> (LIMB_BITS - 1));
|
limb_t mask = (limb_t)(((slimb_t)(temp[NUM_LIMBS - 1])) >> (LIMB_BITS - 1));
|
||||||
limb_t nmask = ~mask;
|
limb_t nmask = ~mask;
|
||||||
temp[NUM_LIMBS - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1);
|
temp[NUM_LIMBS - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1);
|
||||||
|
xx = x;
|
||||||
|
tt = temp;
|
||||||
for (posn = 0; posn < NUM_LIMBS; ++posn) {
|
for (posn = 0; posn < NUM_LIMBS; ++posn) {
|
||||||
x[posn] = (x[posn] & nmask) | (temp[posn] & mask);
|
*xx = ((*xx) & nmask) | ((*tt++) & mask);
|
||||||
|
++xx;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clean up "temp".
|
// Clean up "temp".
|
||||||
clean(temp);
|
strict_clean(temp);
|
||||||
|
|
||||||
// Return a zero value if we actually subtracted (2^255 - 19) from "x".
|
// Return a zero value if we actually subtracted (2^255 - 19) from "x".
|
||||||
return nmask;
|
return nmask;
|
||||||
@ -519,33 +537,39 @@ void Curve25519::mul(limb_t *result, const limb_t *x, const limb_t *y)
|
|||||||
uint8_t i, j;
|
uint8_t i, j;
|
||||||
dlimb_t carry;
|
dlimb_t carry;
|
||||||
limb_t word;
|
limb_t word;
|
||||||
|
const limb_t *yy;
|
||||||
|
limb_t *tt;
|
||||||
|
|
||||||
// Multiply the lowest word of x by y.
|
// Multiply the lowest word of x by y.
|
||||||
carry = 0;
|
carry = 0;
|
||||||
word = x[0];
|
word = x[0];
|
||||||
|
yy = y;
|
||||||
|
tt = temp;
|
||||||
for (i = 0; i < NUM_LIMBS; ++i) {
|
for (i = 0; i < NUM_LIMBS; ++i) {
|
||||||
carry += ((dlimb_t)(y[i])) * word;
|
carry += ((dlimb_t)(*yy++)) * word;
|
||||||
temp[i] = (limb_t)carry;
|
*tt++ = (limb_t)carry;
|
||||||
carry >>= LIMB_BITS;
|
carry >>= LIMB_BITS;
|
||||||
}
|
}
|
||||||
temp[NUM_LIMBS] = (limb_t)carry;
|
*tt = (limb_t)carry;
|
||||||
|
|
||||||
// Multiply and add the remaining words of x by y.
|
// Multiply and add the remaining words of x by y.
|
||||||
for (i = 1; i < NUM_LIMBS; ++i) {
|
for (i = 1; i < NUM_LIMBS; ++i) {
|
||||||
word = x[i];
|
word = x[i];
|
||||||
carry = 0;
|
carry = 0;
|
||||||
|
yy = y;
|
||||||
|
tt = temp + i;
|
||||||
for (j = 0; j < NUM_LIMBS; ++j) {
|
for (j = 0; j < NUM_LIMBS; ++j) {
|
||||||
carry += ((dlimb_t)(y[j])) * word;
|
carry += ((dlimb_t)(*yy++)) * word;
|
||||||
carry += temp[i + j];
|
carry += *tt;
|
||||||
temp[i + j] = (limb_t)carry;
|
*tt++ = (limb_t)carry;
|
||||||
carry >>= LIMB_BITS;
|
carry >>= LIMB_BITS;
|
||||||
}
|
}
|
||||||
temp[i + NUM_LIMBS] = (limb_t)carry;
|
*tt = (limb_t)carry;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reduce the intermediate result modulo 2^255 - 19.
|
// Reduce the intermediate result modulo 2^255 - 19.
|
||||||
reduce(result, temp, NUM_LIMBS);
|
reduce(result, temp, NUM_LIMBS);
|
||||||
clean(temp);
|
strict_clean(temp);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -589,29 +613,33 @@ void Curve25519::mulA24(limb_t *result, const limb_t *x)
|
|||||||
uint8_t i, j;
|
uint8_t i, j;
|
||||||
dlimb_t carry = 0;
|
dlimb_t carry = 0;
|
||||||
limb_t word = pgm_read_a24(0);
|
limb_t word = pgm_read_a24(0);
|
||||||
|
const limb_t *xx = x;
|
||||||
|
limb_t *tt = temp;
|
||||||
for (i = 0; i < NUM_LIMBS; ++i) {
|
for (i = 0; i < NUM_LIMBS; ++i) {
|
||||||
carry += ((dlimb_t)(x[i])) * word;
|
carry += ((dlimb_t)(*xx++)) * word;
|
||||||
temp[i] = (limb_t)carry;
|
*tt++ = (limb_t)carry;
|
||||||
carry >>= LIMB_BITS;
|
carry >>= LIMB_BITS;
|
||||||
}
|
}
|
||||||
temp[NUM_LIMBS] = (limb_t)carry;
|
*tt = (limb_t)carry;
|
||||||
|
|
||||||
// Multiply and add the remaining limbs of a24.
|
// Multiply and add the remaining limbs of a24.
|
||||||
for (i = 1; i < NUM_A24_LIMBS; ++i) {
|
for (i = 1; i < NUM_A24_LIMBS; ++i) {
|
||||||
word = pgm_read_a24(i);
|
word = pgm_read_a24(i);
|
||||||
carry = 0;
|
carry = 0;
|
||||||
|
xx = x;
|
||||||
|
tt = temp + i;
|
||||||
for (j = 0; j < NUM_LIMBS; ++j) {
|
for (j = 0; j < NUM_LIMBS; ++j) {
|
||||||
carry += ((dlimb_t)(x[j])) * word;
|
carry += ((dlimb_t)(*xx++)) * word;
|
||||||
carry += temp[i + j];
|
carry += *tt;
|
||||||
temp[i + j] = (limb_t)carry;
|
*tt++ = (limb_t)carry;
|
||||||
carry >>= LIMB_BITS;
|
carry >>= LIMB_BITS;
|
||||||
}
|
}
|
||||||
temp[i + NUM_LIMBS] = (limb_t)carry;
|
*tt = (limb_t)carry;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reduce the intermediate result modulo 2^255 - 19.
|
// Reduce the intermediate result modulo 2^255 - 19.
|
||||||
reduce(result, temp, NUM_A24_LIMBS);
|
reduce(result, temp, NUM_A24_LIMBS);
|
||||||
clean(temp);
|
strict_clean(temp);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -628,12 +656,13 @@ void Curve25519::add(limb_t *result, const limb_t *x, const limb_t *y)
|
|||||||
{
|
{
|
||||||
dlimb_t carry = 0;
|
dlimb_t carry = 0;
|
||||||
uint8_t posn;
|
uint8_t posn;
|
||||||
|
limb_t *rr = result;
|
||||||
|
|
||||||
// Add the two arrays to obtain the intermediate result.
|
// Add the two arrays to obtain the intermediate result.
|
||||||
for (posn = 0; posn < NUM_LIMBS; ++posn) {
|
for (posn = 0; posn < NUM_LIMBS; ++posn) {
|
||||||
carry += x[posn];
|
carry += *x++;
|
||||||
carry += y[posn];
|
carry += *y++;
|
||||||
result[posn] = (limb_t)carry;
|
*rr++ = (limb_t)carry;
|
||||||
carry >>= LIMB_BITS;
|
carry >>= LIMB_BITS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -655,12 +684,13 @@ void Curve25519::sub(limb_t *result, const limb_t *x, const limb_t *y)
|
|||||||
{
|
{
|
||||||
dlimb_t borrow;
|
dlimb_t borrow;
|
||||||
uint8_t posn;
|
uint8_t posn;
|
||||||
|
limb_t *rr = result;
|
||||||
|
|
||||||
// Subtract y from x to generate the intermediate result.
|
// Subtract y from x to generate the intermediate result.
|
||||||
borrow = 0;
|
borrow = 0;
|
||||||
for (posn = 0; posn < NUM_LIMBS; ++posn) {
|
for (posn = 0; posn < NUM_LIMBS; ++posn) {
|
||||||
borrow = ((dlimb_t)x[posn]) - y[posn] - ((borrow >> LIMB_BITS) & 0x01);
|
borrow = ((dlimb_t)(*x++)) - (*y++) - ((borrow >> LIMB_BITS) & 0x01);
|
||||||
result[posn] = (limb_t)borrow;
|
*rr++ = (limb_t)borrow;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we had a borrow, then the result has gone negative and we
|
// If we had a borrow, then the result has gone negative and we
|
||||||
@ -668,14 +698,15 @@ void Curve25519::sub(limb_t *result, const limb_t *x, const limb_t *y)
|
|||||||
// The top bits of "borrow" will be all 1's if there is a borrow
|
// The top bits of "borrow" will be all 1's if there is a borrow
|
||||||
// or it will be all 0's if there was no borrow. Easiest is to
|
// or it will be all 0's if there was no borrow. Easiest is to
|
||||||
// conditionally subtract 19 and then mask off the high bit.
|
// conditionally subtract 19 and then mask off the high bit.
|
||||||
|
rr = result;
|
||||||
borrow = (borrow >> LIMB_BITS) & 19U;
|
borrow = (borrow >> LIMB_BITS) & 19U;
|
||||||
borrow = ((dlimb_t)result[0]) - borrow;
|
borrow = ((dlimb_t)(*rr)) - borrow;
|
||||||
result[0] = (limb_t)borrow;
|
*rr++ = (limb_t)borrow;
|
||||||
for (posn = 1; posn < NUM_LIMBS; ++posn) {
|
for (posn = 1; posn < NUM_LIMBS; ++posn) {
|
||||||
borrow = ((dlimb_t)result[posn]) - ((borrow >> LIMB_BITS) & 0x01);
|
borrow = ((dlimb_t)(*rr)) - ((borrow >> LIMB_BITS) & 0x01);
|
||||||
result[posn] = (limb_t)borrow;
|
*rr++ = (limb_t)borrow;
|
||||||
}
|
}
|
||||||
result[NUM_LIMBS - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1);
|
*(--rr) &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -20,8 +20,8 @@
|
|||||||
* DEALINGS IN THE SOFTWARE.
|
* DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef CRYPTO_CURVE15519_h
|
#ifndef CRYPTO_CURVE25519_h
|
||||||
#define CRYPTO_CURVE15519_h
|
#define CRYPTO_CURVE25519_h
|
||||||
|
|
||||||
#include "BigNumberUtil.h"
|
#include "BigNumberUtil.h"
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user