diff --git a/doc/crypto.dox b/doc/crypto.dox index 1e80ba50..e26d84ad 100644 --- a/doc/crypto.dox +++ b/doc/crypto.dox @@ -30,6 +30,7 @@ \li Block cipher modes: CTR, CFB, CBC, OFB \li Stream ciphers: ChaCha \li Hash algorithms: SHA1, SHA256, BLAKE2s +\li Public key algorithms: Curve25519 \li Random number generation: \link RNGClass RNG\endlink, TransistorNoiseSource All cryptographic algorithms have been optimized for 8-bit Arduino platforms @@ -49,10 +50,6 @@ with a 256-bit hash output. It is intended as a high performance drop-in replacement for SHA256 for when speed is critical but exact SHA256 compatibility is not. -\section crypto_examples Examples - -TBD - \section crypto_performance Performance All figures are for the Arduino Uno running at 16 MHz. Figures for the @@ -75,4 +72,13 @@ Where a cipher supports more than one key size (such as ChaCha), the values are typically almost identical for 128-bit and 256-bit keys so only the maximum is shown above. +Public key algorithms have the following results on an Arduino Uno: + + + + + + +
AlgorithmOperationTimeComment
Curve25519\link Curve25519::eval() eval()\endlink3738 msRaw curve evaluation
Curve25519\link Curve25519::dh1() dh1()\endlink3740 msFirst half of Diffie-Hellman key agreement
Curve25519\link Curve25519::dh2() dh2()\endlink3738 msSecond half of Diffie-Hellman key agreement
+ */ diff --git a/doc/mainpage.dox b/doc/mainpage.dox index d3f5a709..bfc545d0 100644 --- a/doc/mainpage.dox +++ b/doc/mainpage.dox @@ -94,6 +94,7 @@ realtime clock and the LCD library to implement an alarm clock. \li Block cipher modes: CTR, CFB, CBC, OFB \li Stream ciphers: ChaCha \li Hash algorithms: SHA1, SHA256, BLAKE2s +\li Public key algorithms: Curve25519 \li Random number generation: \link RNGClass RNG\endlink, TransistorNoiseSource More information can be found on the \ref crypto "Cryptographic Library" page. diff --git a/libraries/Crypto/Curve25519.cpp b/libraries/Crypto/Curve25519.cpp new file mode 100644 index 00000000..d68bfda1 --- /dev/null +++ b/libraries/Crypto/Curve25519.cpp @@ -0,0 +1,831 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "Curve25519.h" +#include "Crypto.h" +#include "RNG.h" +#include "utility/ProgMemUtil.h" +#include + +/** + * \class Curve25519 Curve25519.h + * \brief Diffie-Hellman key agreement based on the elliptic curve + * modulo 2^255 - 19. + * + * \note This public functions in this class need a substantial amount of + * stack space to store intermediate results while the curve function is + * being evaluated. About 1k of free stack space is recommended for safety. + * + * References: http://cr.yp.to/ecdh.html + * https://tools.ietf.org/html/draft-irtf-cfrg-curves-02 + */ + +// Number of limbs in a value from the field modulo 2^255 - 19. +// We assume that sizeof(limb_t) is a power of 2: 1, 2, 4, etc. +#define NUM_LIMBS (32 / sizeof(limb_t)) + +// Number of bits in limb_t. +#define LIMB_BITS (8 * sizeof(limb_t)) + +/** + * \brief Evaluates the raw Curve25519 function. + * + * \param result The result of evaluating the curve function. + * \param s The S parameter to the curve function. + * \param x The X(Q) parameter to the curve function. If this pointer is + * NULL then the value 9 is used for \a x. + * + * This function is provided to assist with implementating other + * algorithms with the curve. Normally applications should use dh1() + * and dh2() directly instead. + * + * \return Returns true if the function was evaluated; false if \a x is + * not a proper member of the field modulo (2^255 - 19). + * + * Reference: https://tools.ietf.org/html/draft-irtf-cfrg-curves-02 + * + * \sa dh1(), dh2() + */ +bool Curve25519::eval(uint8_t result[32], const uint8_t s[32], const uint8_t x[32]) +{ + limb_t x_1[NUM_LIMBS]; + limb_t x_2[NUM_LIMBS]; + limb_t x_3[NUM_LIMBS]; + limb_t z_2[NUM_LIMBS]; + limb_t z_3[NUM_LIMBS]; + limb_t A[NUM_LIMBS]; + limb_t B[NUM_LIMBS]; + limb_t C[NUM_LIMBS]; + limb_t D[NUM_LIMBS]; + limb_t E[NUM_LIMBS]; + limb_t AA[NUM_LIMBS]; + limb_t BB[NUM_LIMBS]; + limb_t DA[NUM_LIMBS]; + limb_t CB[NUM_LIMBS]; + uint8_t mask; + uint8_t sposn; + uint8_t select; + uint8_t swap; + bool retval; + + // Unpack the "x" argument into the limb representation + // which also masks off the high bit. NULL means 9. + if (x) { + unpack(x_1, x); // x_1 = x + } else { + memset(x_1, 0, sizeof(x_1)); // x_1 = 9 + x_1[0] = 9; + } + + // Check that "x" is within the range of the modulo field. + // We can do this with a reduction - if there was no borrow + // then the value of "x" was out of range. Timing is sensitive + // here so that we don't reveal anything about the value of "x". + // If there was a reduction, then continue executing the rest + // of this function with the (now) in-range "x" value and + // report the failure at the end. + retval = (bool)(reduceQuick(x_1) & 0x01); + + // Initialize the other temporary variables. + memset(x_2, 0, sizeof(x_2)); // x_2 = 1 + x_2[0] = 1; + memset(z_2, 0, sizeof(z_2)); // z_2 = 0 + memcpy(x_3, x_1, sizeof(x_1)); // x_3 = x + memcpy(z_3, x_2, sizeof(x_2)); // z_3 = 1 + + // Iterate over all 255 bits of "s" from the highest to the lowest. + // We ignore the high bit of the 256-bit representation of "s". + mask = 0x40; + sposn = 31; + swap = 0; + for (uint8_t t = 255; t > 0; --t) { + // Conditional swaps on entry to this bit but only if we + // didn't swap on the previous bit. + select = s[sposn] & mask; + swap ^= select; + cswap(swap, x_2, x_3); + cswap(swap, z_2, z_3); + + // Evaluate the curve. + add(A, x_2, z_2); // A = x_2 + z_2 + square(AA, A); // AA = A^2 + sub(B, x_2, z_2); // B = x_2 - z_2 + square(BB, B); // BB = B^2 + sub(E, AA, BB); // E = AA - BB + add(C, x_3, z_3); // C = x_3 + z_3 + sub(D, x_3, z_3); // D = x_3 - z_3 + mul(DA, D, A); // DA = D * A + mul(CB, C, B); // CB = C * B + add(x_3, DA, CB); // x_3 = (DA + CB)^2 + square(x_3, x_3); + sub(z_3, DA, CB); // z_3 = x_1 * (DA - CB)^2 + square(z_3, z_3); + mul(z_3, z_3, x_1); + mul(x_2, AA, BB); // x_2 = AA * BB + mulA24(z_2, E); // z_2 = E * (AA + a24 * E) + add(z_2, z_2, AA); + mul(z_2, z_2, E); + + // Move onto the next lower bit of "s". + mask >>= 1; + if (!mask) { + --sposn; + mask = 0x80; + swap = select << 7; + } else { + swap = select >> 1; + } + } + + // Final conditional swaps. + cswap(swap, x_2, x_3); + cswap(swap, z_2, z_3); + + // Compute x_2 * (z_2 ^ (p - 2)) where p = 2^255 - 19. + recip(z_3, z_2); + mul(x_2, x_2, z_3); + + // Pack the result into the return array. + pack(result, x_2); + + // Clean up and exit. + clean(x_1); + clean(x_2); + clean(x_3); + clean(z_2); + clean(z_3); + clean(A); + clean(B); + clean(C); + clean(D); + clean(E); + clean(AA); + clean(BB); + clean(DA); + clean(CB); + return retval; +} + +/** + * \brief Performs phase 1 of a Diffie-Hellman key exchange using Curve25519. + * + * \param k The key value to send to the other party as part of the exchange. + * \param f The generated secret value for this party. This must not be + * transmitted to any party or stored in permanent storage. It only needs + * to be kept in memory until dh2() is called. + * + * The \a f value is generated with \link RNGClass::rand() RNG.rand()\endlink. + * It is the caller's responsibility to ensure that the global random number + * pool has sufficient entropy to generate the 32 bytes of \a f safely + * before calling this function. + * + * The following example demonstrates how to perform a full Diffie-Hellman + * key exchange using dh1() and dh2(): + * + * \code + * uint8_t f[32]; + * uint8_t k[32]; + * + * // Generate the secret value "f" and the public value "k". + * Curve25519::dh1(k, f); + * + * // Send "k" to the other party. + * ... + * + * // Read the "k" value that the other party sent to us. + * ... + * + * // Generate the shared secret in "k" using the previous secret value "f". + * if (!Curve25519::dh2(k, f)) { + * // The received "k" value was invalid - abort the session. + * ... + * } + * + * // The "k" value can now be used to generate session keys for encryption. + * ... + * \endcode + * + * Reference: https://tools.ietf.org/html/draft-irtf-cfrg-curves-02 + * + * \sa dh2() + */ +void Curve25519::dh1(uint8_t k[32], uint8_t f[32]) +{ + do { + // Generate a random "f" value and then adjust the value to make + // it valid as an "s" value for eval(). According to the specification + // we need to mask off the 3 right-most bits of f[0], mask off the + // left-most bit of f[31], and set the second to left-most bit of f[31]. + RNG.rand(f, 32); + f[0] &= 0xF8; + f[31] = (f[31] & 0x7F) | 0x40; + + // Evaluate the curve function: k = Curve25519::eval(f, 9). + // We pass NULL to eval() to indicate the value 9. There is no + // need to check the return value from eval() because we know + // that 9 is a valid field element. + eval(k, f, 0); + + // If "k" is weak for contributory behaviour then reject it, + // generate another "f" value, and try again. This case is + // highly unlikely but we still perform the check just in case. + } while (isWeakPoint(k)); +} + +/** + * \brief Performs phase 2 of a Diffie-Hellman key exchange using Curve25519. + * + * \param k On entry, this is the key value that was received from the other + * party as part of the exchange. On exit, this will be the shared secret. + * \param f The secret value for this party that was generated by dh1(). + * The \a f value will be destroyed by this function. + * + * \return Returns true if the key exchange was successful, or false if + * the \a k value is invalid. + * + * Reference: https://tools.ietf.org/html/draft-irtf-cfrg-curves-02 + * + * \sa dh1() + */ +bool Curve25519::dh2(uint8_t k[32], uint8_t f[32]) +{ + uint8_t weak; + + // Evaluate the curve function: k = Curve25519::eval(f, k). + // If "k" is weak for contributory behaviour before or after + // the curve evaluation, then fail the exchange. For safety + // we perform every phase of the weak checks even if we could + // bail out earlier so that the execution takes the same + // amount of time for weak and non-weak "k" values. + weak = isWeakPoint(k); // Is "k" weak before? + weak |= ((eval(k, f, k) ^ 0x01) & 0x01); // Is "k" weak during? + weak |= isWeakPoint(k); // Is "k" weak after? + clean(f, 32); + return (bool)((weak ^ 0x01) & 0x01); +} + +/** + * \brief Determines if a Curve25519 point is weak for contributory behaviour. + * + * \param k The point to check. + * \return Returns 1 if \a k is weak for contributory behavior or + * returns zero if \a k is not weak. + */ +uint8_t Curve25519::isWeakPoint(const uint8_t k[32]) +{ + // List of weak points from http://cr.yp.to/ecdh.html + // That page lists some others but they are variants on these + // of the form "point + i * (2^255 - 19)" for i = 0, 1, 2. + // Here we mask off the high bit and eval() catches the rest. + static const uint8_t points[5][32] PROGMEM = { + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0xE0, 0xEB, 0x7A, 0x7C, 0x3B, 0x41, 0xB8, 0xAE, + 0x16, 0x56, 0xE3, 0xFA, 0xF1, 0x9F, 0xC4, 0x6A, + 0xDA, 0x09, 0x8D, 0xEB, 0x9C, 0x32, 0xB1, 0xFD, + 0x86, 0x62, 0x05, 0x16, 0x5F, 0x49, 0xB8, 0x00}, + {0x5F, 0x9C, 0x95, 0xBC, 0xA3, 0x50, 0x8C, 0x24, + 0xB1, 0xD0, 0xB1, 0x55, 0x9C, 0x83, 0xEF, 0x5B, + 0x04, 0x44, 0x5C, 0xC4, 0x58, 0x1C, 0x8E, 0x86, + 0xD8, 0x22, 0x4E, 0xDD, 0xD0, 0x9F, 0x11, 0x57}, + {0xEC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F} + }; + + // Check each of the weak points in turn. We perform the + // comparisons carefully so as not to reveal the value of "k" + // in the instruction timing. If "k" is indeed weak then + // we still check everything so as not to reveal which + // weak point it is. + uint8_t result = 0; + for (uint8_t posn = 0; posn < 5; ++posn) { + const uint8_t *point = points[posn]; + uint8_t check = (pgm_read_byte(point + 31) ^ k[31]) & 0x7F; + for (uint8_t index = 31; index > 0; --index) + check |= (pgm_read_byte(point + index - 1) ^ k[index - 1]); + result |= (uint8_t)((((uint16_t)0x0100) - check) >> 8); + } + + // The "result" variable will be non-zero if there was a match. + return result; +} + +/** + * \brief Reduces a number modulo 2^255 - 19. + * + * \param result The array that will contain the result when the + * function exits. Must be NUM_LIMBS limbs in size. + * \param x The number to be reduced, which must be NUM_LIMBS * 2 limbs in + * size and less than or equal to square(2^255 - 19 - 1). This array will + * be modified by the reduction process. + * \param size The size of the high order half of \a x. This indicates + * the size of \a x in limbs. If it is shorter than NUM_LIMBS then the + * reduction can be performed quicker. + */ +void Curve25519::reduce(limb_t *result, limb_t *x, uint8_t size) +{ + /* + Note: This explaination is best viewed with a UTF-8 text viewer. + + To help explain what this function is doing, the following describes + how to efficiently compute reductions modulo a base of the form (2ⁿ - b) + where b is greater than zero and (b + 1)² <= 2ⁿ. + + Here we are interested in reducing the result of multiplying two + numbers that are less than or equal to (2ⁿ - b - 1). That is, + multiplying numbers that have already been reduced. + + Given some x less than or equal to (2ⁿ - b - 1)², we want to find a + y less than (2ⁿ - b) such that: + + y ≡ x mod (2ⁿ - b) + + We know that for all integer values of k >= 0: + + y ≡ x - k * (2ⁿ - b) + ≡ x - k * 2ⁿ + k * b + + In our case we choose k = ⌊x / 2ⁿ⌋ and then let: + + w = (x mod 2ⁿ) + ⌊x / 2ⁿ⌋ * b + + The value w will either be the answer y or y can be obtained by + repeatedly subtracting (2ⁿ - b) from w until it is less than (2ⁿ - b). + At most b subtractions will be required. + + In our case b is 19 which is more subtractions than we would like to do, + but we can handle that by performing the above reduction twice and then + performing a single trial subtraction: + + w = (x mod 2ⁿ) + ⌊x / 2ⁿ⌋ * b + y = (w mod 2ⁿ) + ⌊w / 2ⁿ⌋ * b + if y >= (2ⁿ - b) + y -= (2ⁿ - b) + + The value y is the answer we want for reducing x modulo (2ⁿ - b). + */ + + dlimb_t carry; + uint8_t posn; + + // Calculate (x mod 2^255) + ((x / 2^255) * 19) which will + // either produce the answer we want or it will produce a + // value of the form "answer + j * (2^255 - 19)". + carry = ((dlimb_t)(x[NUM_LIMBS - 1] >> (LIMB_BITS - 1))) * 19U; + x[NUM_LIMBS - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1); + for (posn = 0; posn < size; ++posn) { + carry += ((dlimb_t)(x[posn + NUM_LIMBS])) * 38U; + carry += x[posn]; + x[posn] = (limb_t)carry; + carry >>= LIMB_BITS; + } + if (size < NUM_LIMBS) { + // The high order half of the number is short; e.g. for mulA24(). + // Propagate the carry through the rest of the low order part. + for (posn = size; posn < NUM_LIMBS; ++posn) { + carry += x[posn]; + x[posn] = (limb_t)carry; + carry >>= LIMB_BITS; + } + } + + // The "j" value may still be too large due to the final carry-out. + // We must repeat the reduction. If we already have the answer, + // then this won't do any harm but we must still do the calculation + // to preserve the overall timing. + carry *= 38U; + carry += ((dlimb_t)(x[NUM_LIMBS - 1] >> (LIMB_BITS - 1))) * 19U; + x[NUM_LIMBS - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1); + for (posn = 0; posn < NUM_LIMBS; ++posn) { + carry += x[posn]; + x[posn] = (limb_t)carry; + carry >>= LIMB_BITS; + } + + // At this point "x" will either be the answer or it will be the + // answer plus (2^255 - 19). Perform a trial subtraction which + // is equivalent to adding 19 and subtracting 2^255. We put the + // trial answer into the top-most limbs of the original "x" array. + // We add 19 here; the subtraction of 2^255 occurs in the next step. + carry = 19U; + for (posn = 0; posn < NUM_LIMBS; ++posn) { + carry += x[posn]; + x[posn + NUM_LIMBS] = (limb_t)carry; + carry >>= LIMB_BITS; + } + + // If there was a borrow, then the bottom-most limbs of "x" are the + // correct answer. If there was no borrow, then the top-most limbs + // of "x" are the correct answer. Select the correct answer but do + // it in a way that instruction timing will not reveal which value + // was selected. Borrow will occur if the high bit of the previous + // result is 0: turn the high bit into a selection mask. + limb_t mask = (limb_t)(((slimb_t)(x[NUM_LIMBS * 2 - 1])) >> (LIMB_BITS - 1)); + limb_t nmask = ~mask; + x[NUM_LIMBS * 2 - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1); + for (posn = 0; posn < NUM_LIMBS; ++posn) { + result[posn] = (x[posn] & nmask) | (x[posn + NUM_LIMBS] & mask); + } +} + +/** + * \brief Quickly reduces a number modulo 2^255 - 19. + * + * \param x The number to be reduced, which must be NUM_LIMBS limbs in size + * and less than or equal to 2 * (2^255 - 19 - 1). + * \return Zero if \a x was greater than or equal to (2^255 - 19). + * + * The answer is also put into \a x and will consist of NUM_LIMBS limbs. + * + * This function is intended for reducing the result of additions where + * the caller knows that \a x is within the described range. A single + * trial subtraction is all that is needed to reduce the number. + */ +Curve25519::limb_t Curve25519::reduceQuick(limb_t *x) +{ + limb_t temp[NUM_LIMBS]; + dlimb_t carry; + uint8_t posn; + + // Perform a trial subtraction of (2^255 - 19) from "x" which is + // equivalent to adding 19 and subtracting 2^255. We add 19 here; + // the subtraction of 2^255 occurs in the next step. + carry = 19U; + for (posn = 0; posn < NUM_LIMBS; ++posn) { + carry += x[posn]; + temp[posn] = (limb_t)carry; + carry >>= LIMB_BITS; + } + + // If there was a borrow, then the original "x" is the correct answer. + // If there was no borrow, then "temp" is the correct answer. Select the + // correct answer but do it in a way that instruction timing will not + // reveal which value was selected. Borrow will occur if the high bit + // of "temp" is 0: turn the high bit into a selection mask. + limb_t mask = (limb_t)(((slimb_t)(temp[NUM_LIMBS - 1])) >> (LIMB_BITS - 1)); + limb_t nmask = ~mask; + temp[NUM_LIMBS - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1); + for (posn = 0; posn < NUM_LIMBS; ++posn) { + x[posn] = (x[posn] & nmask) | (temp[posn] & mask); + } + + // Clean up "temp". + clean(temp); + + // Return a zero value if we actually subtracted (2^255 - 19) from "x". + return nmask; +} + +/** + * \brief Multiplies two values and then reduces the result modulo 2^255 - 19. + * + * \param result The result, which must be NUM_LIMBS limbs in size and can + * be the same array as \a x or \a y. + * \param x The first value to multiply, which must be NUM_LIMBS limbs in size + * and less than 2^255 - 19. + * \param y The second value to multiply, which must be NUM_LIMBS limbs in size + * and less than 2^255 - 19. This can be the same array as \a x. + */ +void Curve25519::mul(limb_t *result, const limb_t *x, const limb_t *y) +{ + limb_t temp[NUM_LIMBS * 2]; + uint8_t i, j; + dlimb_t carry; + limb_t word; + + // Multiply the lowest word of x by y. + carry = 0; + word = x[0]; + for (i = 0; i < NUM_LIMBS; ++i) { + carry += ((dlimb_t)(y[i])) * word; + temp[i] = (limb_t)carry; + carry >>= LIMB_BITS; + } + temp[NUM_LIMBS] = (limb_t)carry; + + // Multiply and add the remaining words of x by y. + for (i = 1; i < NUM_LIMBS; ++i) { + word = x[i]; + carry = 0; + for (j = 0; j < NUM_LIMBS; ++j) { + carry += ((dlimb_t)(y[j])) * word; + carry += temp[i + j]; + temp[i + j] = (limb_t)carry; + carry >>= LIMB_BITS; + } + temp[i + NUM_LIMBS] = (limb_t)carry; + } + + // Reduce the intermediate result modulo 2^255 - 19. + reduce(result, temp, NUM_LIMBS); + clean(temp); +} + +/** + * \fn void Curve25519::square(limb_t *result, const limb_t *x) + * \brief Squares a value and then reduces it modulo 2^255 - 19. + * + * \param result The result, which must be NUM_LIMBS limbs in size and + * can be the same array as \a x. + * \param x The value to square, which must be NUM_LIMBS limbs in size + * and less than 2^255 - 19. + */ + +/** + * \brief Multiplies a value by the a24 constant and then reduces the result + * modulo 2^255 - 19. + * + * \param result The result, which must be NUM_LIMBS limbs in size and can + * be the same array as \a x. + * \param x The value to multiply by a24, which must be NUM_LIMBS limbs in size + * and less than 2^255 - 19. + */ +void Curve25519::mulA24(limb_t *result, const limb_t *x) +{ + // The constant a24 = 121665 (0x1DB41) as a limb array. +#if CURVE25519_LIMB_8BIT + static limb_t const a24[3] PROGMEM = {0x41, 0xDB, 0x01}; + #define pgm_read_a24(index) (pgm_read_byte(&(a24[(index)]))) +#elif CURVE25519_LIMB_16BIT + static limb_t const a24[2] PROGMEM = {0xDB41, 0x0001}; + #define pgm_read_a24(index) (pgm_read_word(&(a24[(index)]))) +#elif CURVE25519_LIMB_32BIT + static limb_t const a24[1] PROGMEM = {0x0001DB41}; + #define pgm_read_a24(index) (pgm_read_dword(&(a24[(index)]))) +#else + #error "limb_t must be 8, 16, or 32 bits in size" +#endif + #define NUM_A24_LIMBS (sizeof(a24) / sizeof(limb_t)) + + // Multiply the lowest limb of a24 by x and zero-extend into the result. + limb_t temp[NUM_LIMBS * 2]; + uint8_t i, j; + dlimb_t carry = 0; + limb_t word = pgm_read_a24(0); + for (i = 0; i < NUM_LIMBS; ++i) { + carry += ((dlimb_t)(x[i])) * word; + temp[i] = (limb_t)carry; + carry >>= LIMB_BITS; + } + temp[NUM_LIMBS] = (limb_t)carry; + + // Multiply and add the remaining limbs of a24. + for (i = 1; i < NUM_A24_LIMBS; ++i) { + word = pgm_read_a24(i); + carry = 0; + for (j = 0; j < NUM_LIMBS; ++j) { + carry += ((dlimb_t)(x[j])) * word; + carry += temp[i + j]; + temp[i + j] = (limb_t)carry; + carry >>= LIMB_BITS; + } + temp[i + NUM_LIMBS] = (limb_t)carry; + } + + // Reduce the intermediate result modulo 2^255 - 19. + reduce(result, temp, NUM_A24_LIMBS); + clean(temp); +} + +/** + * \brief Adds two values and then reduces the result modulo 2^255 - 19. + * + * \param result The result, which must be NUM_LIMBS limbs in size and can + * be the same array as \a x or \a y. + * \param x The first value to multiply, which must be NUM_LIMBS limbs in size + * and less than 2^255 - 19. + * \param y The second value to multiply, which must be NUM_LIMBS limbs in size + * and less than 2^255 - 19. + */ +void Curve25519::add(limb_t *result, const limb_t *x, const limb_t *y) +{ + dlimb_t carry = 0; + uint8_t posn; + + // Add the two arrays to obtain the intermediate result. + for (posn = 0; posn < NUM_LIMBS; ++posn) { + carry += x[posn]; + carry += y[posn]; + result[posn] = (limb_t)carry; + carry >>= LIMB_BITS; + } + + // Reduce the result using the quick trial subtraction method. + reduceQuick(result); +} + +/** + * \brief Subtracts two values and then reduces the result modulo 2^255 - 19. + * + * \param result The result, which must be NUM_LIMBS limbs in size and can + * be the same array as \a x or \a y. + * \param x The first value to multiply, which must be NUM_LIMBS limbs in size + * and less than 2^255 - 19. + * \param y The second value to multiply, which must be NUM_LIMBS limbs in size + * and less than 2^255 - 19. + */ +void Curve25519::sub(limb_t *result, const limb_t *x, const limb_t *y) +{ + dlimb_t borrow; + uint8_t posn; + + // Subtract y from x to generate the intermediate result. + borrow = 0; + for (posn = 0; posn < NUM_LIMBS; ++posn) { + borrow = ((dlimb_t)x[posn]) - y[posn] - ((borrow >> LIMB_BITS) & 0x01); + result[posn] = (limb_t)borrow; + } + + // If we had a borrow, then the result has gone negative and we + // have to add 2^255 - 19 to the result to make it positive again. + // The top bits of "borrow" will be all 1's if there is a borrow + // or it will be all 0's if there was no borrow. Easiest is to + // conditionally subtract 19 and then mask off the high bit. + borrow = (borrow >> LIMB_BITS) & 19U; + borrow = ((dlimb_t)result[0]) - borrow; + result[0] = (limb_t)borrow; + for (posn = 1; posn < NUM_LIMBS; ++posn) { + borrow = ((dlimb_t)result[posn]) - ((borrow >> LIMB_BITS) & 0x01); + result[posn] = (limb_t)borrow; + } + result[NUM_LIMBS - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1); +} + +/** + * \brief Conditionally swaps two values if a selection value is non-zero. + * + * \param select Non-zero to swap \a x and \a y, zero to leave them unchanged. + * \param x The first value to conditionally swap. + * \param y The second value to conditionally swap. + * + * The swap is performed in a way that it should take the same amount of + * time irrespective of the value of \a select. + */ +void Curve25519::cswap(uint8_t select, limb_t *x, limb_t *y) +{ + uint8_t posn; + limb_t dummy; + limb_t sel; + + // Turn "select" into an all-zeroes or all-ones mask. We don't care + // which bit or bits is set in the original "select" value. + sel = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - select) >> LIMB_BITS); + --sel; + + // Swap the two values based on "select". Algorithm from: + // https://tools.ietf.org/html/draft-irtf-cfrg-curves-02 + for (posn = 0; posn < NUM_LIMBS; ++posn) { + dummy = sel & (x[posn] ^ y[posn]); + x[posn] ^= dummy; + y[posn] ^= dummy; + } +} + +/** + * \brief Computes the reciprocal of a number modulo 2^255 - 19. + * + * \param result The result as a array of NUM_LIMBS limbs in size. This can + * be the same array as \a x. + * \param x The number to compute the reciprocal for. + */ +void Curve25519::recip(limb_t *result, const limb_t *x) +{ + limb_t t1[NUM_LIMBS]; + uint8_t i, j; + + // The reciprocal is the same as x ^ (p - 2) where p = 2^255 - 19. + // The big-endian hexadecimal expansion of (p - 2) is: + // 7FFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFEB + // + // The naive implementation needs to do 2 multiplications per 1 bit and + // 1 multiplication per 0 bit. We can improve upon this by creating a + // pattern 0000000001 ... 0000000001. If we square and multiply the + // pattern by itself we can turn the pattern into the partial results + // 0000000011 ... 0000000011, 0000000111 ... 0000000111, etc. + // This averages out to about 1.1 multiplications per 1 bit instead of 2. + + // Build a pattern of 250 bits in length of repeated copies of 0000000001. + #define RECIP_GROUP_SIZE 10 + #define RECIP_GROUP_BITS 250 // Must be a multiple of RECIP_GROUP_SIZE. + square(t1, x); + for (j = 0; j < (RECIP_GROUP_SIZE - 1); ++j) + square(t1, t1); + mul(result, t1, x); + for (i = 0; i < ((RECIP_GROUP_BITS / RECIP_GROUP_SIZE) - 2); ++i) { + for (j = 0; j < RECIP_GROUP_SIZE; ++j) + square(t1, t1); + mul(result, result, t1); + } + + // Multiply bit-shifted versions of the 0000000001 pattern into + // the result to "fill in" the gaps in the pattern. + square(t1, result); + mul(result, result, t1); + for (j = 0; j < (RECIP_GROUP_SIZE - 2); ++j) { + square(t1, t1); + mul(result, result, t1); + } + + // Deal with the 5 lowest bits of (p - 2), 01011, from highest to lowest. + square(result, result); + square(result, result); + mul(result, result, x); + square(result, result); + square(result, result); + mul(result, result, x); + square(result, result); + mul(result, result, x); + + // Clean up and exit. + clean(t1); +} + +/** + * \brief Unpacks the little-endian byte representation of a field element + * into a limb array. + * + * \param result The limb array. + * \param x The byte representation. + * + * The top-most bit of \a result will be set to zero so that the value + * is guaranteed to be 255 bits rather than 256. + * + * \sa pack() + */ +void Curve25519::unpack(limb_t *result, const uint8_t *x) +{ +#if CURVE25519_LIMB_8BIT + memcpy(result, x, 32); + result[31] &= 0x7F; +#elif CURVE25519_LIMB_16BIT + for (uint8_t posn = 0; posn < 16; ++posn) { + result[posn] = ((limb_t)x[posn * 2]) | (((limb_t)x[posn * 2 + 1]) << 8); + } + result[15] &= 0x7FFF; +#elif CURVE25519_LIMB_32BIT + for (uint8_t posn = 0; posn < 8; ++posn) { + result[posn] = ((limb_t)x[posn * 4]) | + (((limb_t)x[posn * 4 + 1]) << 8) | + (((limb_t)x[posn * 4 + 2]) << 16) | + (((limb_t)x[posn * 4 + 3]) << 24); + } + result[7] &= 0x7FFFFFFF; +#endif +} + +/** + * \brief Packs the limb array representation of a field element into a + * byte array. + * + * \param result The byte array. + * \param x The limb representation. + * + * \sa unpack() + */ +void Curve25519::pack(uint8_t *result, const limb_t *x) +{ +#if CURVE25519_LIMB_8BIT + memcpy(result, x, 32); +#elif CURVE25519_LIMB_16BIT + for (uint8_t posn = 0; posn < 16; ++posn) { + limb_t value = x[posn]; + result[posn * 2] = (uint8_t)value; + result[posn * 2 + 1] = (uint8_t)(value >> 8); + } +#elif CURVE25519_LIMB_32BIT + for (uint8_t posn = 0; posn < 8; ++posn) { + limb_t value = x[posn]; + result[posn * 4] = (uint8_t)value; + result[posn * 4 + 1] = (uint8_t)(value >> 8); + result[posn * 4 + 2] = (uint8_t)(value >> 16); + result[posn * 4 + 3] = (uint8_t)(value >> 24); + } +#endif +} diff --git a/libraries/Crypto/Curve25519.h b/libraries/Crypto/Curve25519.h new file mode 100644 index 00000000..4f702c47 --- /dev/null +++ b/libraries/Crypto/Curve25519.h @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_CURVE15519_h +#define CRYPTO_CURVE15519_h + +#include +#include + +// Define exactly one of these to 1 to set the size of the basic limb type. +// 16-bit limbs seems to give the best performance on 8-bit AVR micros. +#define CURVE25519_LIMB_8BIT 0 +#define CURVE25519_LIMB_16BIT 1 +#define CURVE25519_LIMB_32BIT 0 + +class Curve25519 +{ +public: + static bool eval(uint8_t result[32], const uint8_t s[32], const uint8_t x[32]); + + static void dh1(uint8_t k[32], uint8_t f[32]); + static bool dh2(uint8_t k[32], uint8_t f[32]); + +#if defined(TEST_CURVE25519_FIELD_OPS) +public: +#else +private: +#endif + // Define the limb types to use on this platform. + #if CURVE25519_LIMB_8BIT + typedef uint8_t limb_t; + typedef int8_t slimb_t; + typedef uint16_t dlimb_t; + #elif CURVE25519_LIMB_16BIT + typedef uint16_t limb_t; + typedef int16_t slimb_t; + typedef uint32_t dlimb_t; + #elif CURVE25519_LIMB_32BIT + typedef uint32_t limb_t; + typedef int32_t slimb_t; + typedef uint64_t dlimb_t; + #else + #error "limb_t must be 8, 16, or 32 bits in size" + #endif + + static uint8_t isWeakPoint(const uint8_t k[32]); + + static void reduce(limb_t *result, limb_t *x, uint8_t size); + static limb_t reduceQuick(limb_t *x); + + static void mul(limb_t *result, const limb_t *x, const limb_t *y); + static void square(limb_t *result, const limb_t *x) + { + mul(result, x, x); + } + + static void mulA24(limb_t *result, const limb_t *x); + + static void add(limb_t *result, const limb_t *x, const limb_t *y); + static void sub(limb_t *result, const limb_t *x, const limb_t *y); + + static void cswap(uint8_t select, limb_t *x, limb_t *y); + + static void recip(limb_t *result, const limb_t *x); + + static void unpack(limb_t *result, const uint8_t *x); + static void pack(uint8_t *result, const limb_t *x); + + // Constructor and destructor are private - cannot instantiate this class. + Curve25519() {} + ~Curve25519() {} +}; + +#endif diff --git a/libraries/Crypto/examples/TestCurve25519/TestCurve25519.ino b/libraries/Crypto/examples/TestCurve25519/TestCurve25519.ino new file mode 100644 index 00000000..8be8011f --- /dev/null +++ b/libraries/Crypto/examples/TestCurve25519/TestCurve25519.ino @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* +This example runs tests on the Curve25519 algorithm. +*/ + +#include +#include +#include +#include + +void printNumber(const char *name, const uint8_t *x) +{ + static const char hexchars[] = "0123456789ABCDEF"; + Serial.print(name); + Serial.print(" = "); + for (uint8_t posn = 0; posn < 32; ++posn) { + Serial.print(hexchars[(x[posn] >> 4) & 0x0F]); + Serial.print(hexchars[x[posn] & 0x0F]); + } + Serial.println(); +} + +// Check the eval() function using the test vectors from: +// https://tools.ietf.org/html/draft-turner-thecurve25519function-01 +void testEval() +{ + static uint8_t alice_private[32] = { + 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d, + 0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45, + 0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a, + 0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a + }; + static uint8_t const alice_public[32] = { + 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54, + 0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a, + 0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4, + 0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a + }; + static uint8_t bob_private[32] = { + 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b, + 0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6, + 0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd, + 0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb + }; + static uint8_t const bob_public[32] = { + 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4, + 0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37, + 0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d, + 0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f + }; + static uint8_t const shared_secret[32] = { + 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, + 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, + 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, + 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 + }; + + // Fix up the private keys by applying the standard masks. + alice_private[0] &= 0xF8; + alice_private[31] = (alice_private[31] & 0x7F) | 0x40; + bob_private[0] &= 0xF8; + bob_private[31] = (bob_private[31] & 0x7F) | 0x40; + + // Evaluate the curve function and check the public keys. + uint8_t result[32]; + Serial.println("Fixed test vectors:"); + Serial.print("Computing Alice's public key ... "); + Serial.flush(); + unsigned long start = micros(); + Curve25519::eval(result, alice_private, 0); + unsigned long elapsed = micros() - start; + if (memcmp(result, alice_public, 32) == 0) { + Serial.print("ok"); + } else { + Serial.println("failed"); + printNumber("actual ", result); + printNumber("expected", alice_public); + } + Serial.print(" (elapsed "); + Serial.print(elapsed); + Serial.println(" us)"); + Serial.print("Computing Bob's public key ... "); + Serial.flush(); + start = micros(); + Curve25519::eval(result, bob_private, 0); + elapsed = micros() - start; + if (memcmp(result, bob_public, 32) == 0) { + Serial.print("ok"); + } else { + Serial.println("failed"); + printNumber("actual ", result); + printNumber("expected", bob_public); + } + Serial.print(" (elapsed "); + Serial.print(elapsed); + Serial.println(" us)"); + + // Compute the shared secret from each side. + Serial.print("Computing Alice's shared secret ... "); + Serial.flush(); + start = micros(); + Curve25519::eval(result, alice_private, bob_public); + elapsed = micros() - start; + if (memcmp(result, shared_secret, 32) == 0) { + Serial.print("ok"); + } else { + Serial.println("failed"); + printNumber("actual ", result); + printNumber("expected", shared_secret); + } + Serial.print(" (elapsed "); + Serial.print(elapsed); + Serial.println(" us)"); + Serial.print("Computing Bob's shared secret ... "); + Serial.flush(); + start = micros(); + Curve25519::eval(result, bob_private, alice_public); + elapsed = micros() - start; + if (memcmp(result, shared_secret, 32) == 0) { + Serial.print("ok"); + } else { + Serial.println("failed"); + printNumber("actual ", result); + printNumber("expected", shared_secret); + } + Serial.print(" (elapsed "); + Serial.print(elapsed); + Serial.println(" us)"); +} + +void testDH() +{ + static uint8_t alice_k[32]; + static uint8_t alice_f[32]; + static uint8_t bob_k[32]; + static uint8_t bob_f[32]; + + Serial.println("Diffie-Hellman key exchange:"); + Serial.print("Generate random k/f for Alice ... "); + Serial.flush(); + unsigned long start = micros(); + Curve25519::dh1(alice_k, alice_f); + unsigned long elapsed = micros() - start; + Serial.print("elapsed "); + Serial.print(elapsed); + Serial.println(" us"); + + Serial.print("Generate random k/f for Bob ... "); + Serial.flush(); + start = micros(); + Curve25519::dh1(bob_k, bob_f); + elapsed = micros() - start; + Serial.print("elapsed "); + Serial.print(elapsed); + Serial.println(" us"); + + Serial.print("Generate shared secret for Alice ... "); + Serial.flush(); + start = micros(); + Curve25519::dh2(bob_k, alice_f); + elapsed = micros() - start; + Serial.print("elapsed "); + Serial.print(elapsed); + Serial.println(" us"); + + Serial.print("Generate shared secret for Bob ... "); + Serial.flush(); + start = micros(); + Curve25519::dh2(alice_k, bob_f); + elapsed = micros() - start; + Serial.print("elapsed "); + Serial.print(elapsed); + Serial.println(" us"); + + Serial.print("Check that the shared secrets match ... "); + if (memcmp(alice_k, bob_k, 32) == 0) + Serial.println("ok"); + else + Serial.println("failed"); +} + +void setup() +{ + Serial.begin(9600); + + // Start the random number generator. We don't initialise a noise + // source here because we don't need one for testing purposes. + // Real DH applications should of course use a proper noise source. + RNG.begin("TestCurve25519 1.0", 500); + + // Perform the tests. + testEval(); + Serial.println(); + testDH(); + Serial.println(); +} + +void loop() +{ +} diff --git a/libraries/Crypto/examples/TestCurve25519Math/TestCurve25519Math.ino b/libraries/Crypto/examples/TestCurve25519Math/TestCurve25519Math.ino new file mode 100644 index 00000000..06a2e977 --- /dev/null +++ b/libraries/Crypto/examples/TestCurve25519Math/TestCurve25519Math.ino @@ -0,0 +1,622 @@ +/* + * Copyright (C) 2015 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* +This example runs tests on the Curve25519 field mathematics independent +of the full curve operation itself. +*/ + +// Enable access to the internals of Curve25519 to test the raw field ops. +#define TEST_CURVE25519_FIELD_OPS 1 + +#include +#include +#include +#include + +// Copy some definitions from the Curve25519 class for convenience. +#define NUM_LIMBS (32 / sizeof(Curve25519::limb_t)) +#define LIMB_BITS (8 * sizeof(Curve25519::limb_t)) +#define limb_t Curve25519::limb_t +#define dlimb_t Curve25519::dlimb_t +#define INVERSE_LIMB (~((limb_t)0)) + +// For simpleMod() below we need a type that is 4 times the size of limb_t. +#if CURVE25519_LIMB_8BIT +#define qlimb_t uint32_t +#elif CURVE25519_LIMB_16BIT +#define qlimb_t uint64_t +#else +#define CURVE25519_NO_QLIMB 1 +#endif + +limb_t arg1[NUM_LIMBS]; +limb_t arg2[NUM_LIMBS]; +limb_t result[NUM_LIMBS]; +limb_t result2[NUM_LIMBS * 2 + 1]; +limb_t temp[NUM_LIMBS]; + +// Convert a decimal string in program memory into a number. +void fromString(limb_t *x, uint8_t size, const char *str) +{ + uint8_t ch, posn; + memset(x, 0, sizeof(limb_t) * size); + while ((ch = pgm_read_byte((uint8_t *)str)) != '\0') { + if (ch >= '0' && ch <= '9') { + // Quick and simple method to multiply by 10 and add the new digit. + dlimb_t carry = ch - '0'; + for (posn = 0; posn < size; ++posn) { + carry += ((dlimb_t)x[posn]) * 10U; + x[posn] = (limb_t)carry; + carry >>= LIMB_BITS; + } + } + ++str; + } +} + +// Compare two numbers of NUM_LIMBS in length. Returns -1, 0, or 1. +int compare(const limb_t *x, const limb_t *y) +{ + for (uint8_t posn = NUM_LIMBS; posn > 0; --posn) { + limb_t a = x[posn - 1]; + limb_t b = y[posn - 1]; + if (a < b) + return -1; + else if (a > b) + return 1; + } + return 0; +} + +// Compare two numbers where one is a decimal string. Returns -1, 0, or 1. +int compare(const limb_t *x, const char *y) +{ + limb_t val[NUM_LIMBS]; + fromString(val, NUM_LIMBS, y); + return compare(x, val); +} + +void printNumber(const char *name, const limb_t *x) +{ + static const char hexchars[] = "0123456789ABCDEF"; + Serial.print(name); + Serial.print(" = "); + for (uint8_t posn = 0; posn < NUM_LIMBS; ++posn) { + for (uint8_t bit = LIMB_BITS; bit > 0; ) { + bit -= 4; + Serial.print(hexchars[(x[posn] >> bit) & 0x0F]); + } + Serial.print(' '); + } + Serial.println(); +} + +// Standard numbers that are useful in field operation tests. +char const num_0[] PROGMEM = "0"; +char const num_1[] PROGMEM = "1"; +char const num_2[] PROGMEM = "2"; +char const num_4[] PROGMEM = "4"; +char const num_5[] PROGMEM = "5"; +char const num_128[] PROGMEM = "128"; +char const num_256[] PROGMEM = "256"; +char const num_2_64_m7[] PROGMEM = "18446744073709551609"; // 2^64 - 7 +char const num_2_129_m5[] PROGMEM = "680564733841876926926749214863536422907"; // 2^129 - 5 +char const num_pi[] PROGMEM = "31415926535897932384626433832795028841971693993751058209749445923078164062862"; // 77 digits of pi +char const num_2_255_m253[] PROGMEM = "57896044618658097711785492504343953926634992332820282019728792003956564819715"; // 2^255 - 253 +char const num_2_255_m20[] PROGMEM = "57896044618658097711785492504343953926634992332820282019728792003956564819948"; // 2^255 - 20 +char const num_2_255_m19[] PROGMEM = "57896044618658097711785492504343953926634992332820282019728792003956564819949"; // 2^255 - 19 +char const num_2_255_m19_x2[] PROGMEM = "115792089237316195423570985008687907853269984665640564039457584007913129639898"; // (2^255 - 19) * 2 +char const num_a24[] PROGMEM = "121665"; + +// Table of useful numbers less than 2^255 - 19. +const char * const numbers[] = { + num_0, + num_1, + num_2, + num_4, + num_5, + num_128, + num_256, + num_2_64_m7, + num_2_129_m5, + num_pi, + num_2_255_m253, + num_2_255_m20, + 0 +}; +#define numbers_count ((sizeof(numbers) / sizeof(numbers[0])) - 1) + +#define foreach_number(var) \ + const char *var = numbers[0]; \ + for (unsigned index##var = 0; index##var < numbers_count; \ + ++index##var, var = numbers[index##var]) + +void printProgMem(const char *str) +{ + uint8_t ch; + while ((ch = pgm_read_byte((uint8_t *)str)) != '\0') { + Serial.print((char)ch); + ++str; + } +} + +// Simple implementation of modular addition to cross-check the library. +void simpleAdd(limb_t *result, const limb_t *x, const limb_t *y) +{ + uint8_t posn; + dlimb_t carry = 0; + for (posn = 0; posn < NUM_LIMBS; ++posn) { + carry += x[posn]; + carry += y[posn]; + result[posn] = (limb_t)carry; + carry >>= LIMB_BITS; + } + if (compare(result, num_2_255_m19) >= 0) { + // Subtract 2^255 - 19 to get the final result. + // Same as add 19 and then subtract 2^255. + carry = 19; + for (posn = 0; posn < NUM_LIMBS; ++posn) { + carry += result[posn]; + result[posn] = (limb_t)carry; + carry >>= LIMB_BITS; + } + result[NUM_LIMBS - 1] -= ((limb_t)1) << (LIMB_BITS - 1); + } +} + +// Simple implementation of subtraction to cross-check the library. +// Note: this does not reduce the result modulo 2^255 - 19 and we +// assume that x is greater than or equal to y. +void simpleSub(limb_t *result, const limb_t *x, const limb_t *y) +{ + uint8_t posn; + dlimb_t borrow = 0; + for (posn = 0; posn < NUM_LIMBS; ++posn) { + borrow = ((dlimb_t)x[posn]) - y[posn] - borrow; + result[posn] = (limb_t)borrow; + borrow = (borrow >> LIMB_BITS) != 0; + } +} + +// Simple implementation of multiplication to cross-check the library. +// Note: this does not reduce the result modulo 2^255 - 19. +// The "result" buffer must contain at least NUM_LIMBS * 2 limbs. +void simpleMul(limb_t *result, const limb_t *x, const limb_t *y) +{ + memset(result, 0, NUM_LIMBS * 2 * sizeof(limb_t)); + for (uint8_t i = 0; i < NUM_LIMBS; ++i) { + for (uint8_t j = 0; j < NUM_LIMBS; ++j) { + uint8_t n = i + j; + dlimb_t carry = + ((dlimb_t)x[i]) * y[j] + result[n]; + result[n] = (limb_t)carry; + carry >>= LIMB_BITS; + ++n; + while (carry != 0 && n < (NUM_LIMBS * 2)) { + carry += result[n]; + result[n] = (limb_t)carry; + carry >>= LIMB_BITS; + ++n; + } + } + } +} + +#if defined(CURVE25519_NO_QLIMB) + +// Quick check to correct the estimate on a quotient word. +static inline limb_t correctEstimate + (limb_t q, limb_t y1, limb_t y2, dlimb_t x01, limb_t x2) +{ + // Algorithm D from section 4.3.1 of "The Art Of Computer Programming", + // D. Knuth, Volume 2, "Seminumerical Algorithms", Second Edition, 1981. + // + // We want to check if (y2 * q) > ((x01 - y1 * q) * b + x2) where + // b is (1 << LIMB_BITS). If it is, then q must be reduced by 1. + // + // One wrinkle that isn't obvious from Knuth's description is that it + // is possible for (x01 - y1 * q) >= b, especially in the case where + // x0 = y1 and q = b - 1. This will cause an overflow of the intermediate + // double-word result ((x01 - y1 * q) * b). + // + // In assembly language, we could use the carry flag to detect when + // (x01 - y1 * q) * b overflows, but we can't access the carry flag + // in C++. So we have to account for the carry in a different way here. + + // Calculate the remainder using the estimated quotient. + dlimb_t r = x01 - ((dlimb_t)y1) * q; + + // If there will be a double-word carry when we calculate (r * b), + // then (y2 * q) is obviously going to be less than (r * b), so we + // can stop here. The estimated quotient is correct. + if (r & (((dlimb_t)INVERSE_LIMB) << LIMB_BITS)) + return q; + + // Bail out if (y2 * q) <= (r * b + x2). The estimate is correct. + dlimb_t y2q = ((dlimb_t)y2) * q; + if (y2q <= ((r << LIMB_BITS) + x2)) + return q; + + // Correct for the estimated quotient being off by 1. + --q; + + // Now repeat the check to correct for q values that are off by 2. + r += y1; // r' = (x01 - y1 * (q - 1)) = (x01 - y1 * q + y2) = r + y1 + if (r & (((dlimb_t)INVERSE_LIMB) << LIMB_BITS)) + return q; + // y2q' = (y2 * (q - 1)) = (y2 * q - y2) = y2q - y2 + if ((y2q - y2) <= ((r << LIMB_BITS) + x2)) + return q; + + // Perform the final correction for q values that are off by 2. + return q - 1; +} + +#endif + +// Simple implementation of modular division to cross-check the library. +// Calling this "simple" is a bit of a misnomer. It is a full implementation +// of Algorithm D from section 4.3.1 of "The Art Of Computer Programming", +// D. Knuth, Volume 2, "Seminumerical Algorithms", Second Edition, 1981. +// This is quite slow on embedded platforms, but it should be correct. +// Note: "x" is assumed to be (NUM_LIMBS * 2 + 1) limbs in size because +// we need a limb for the extra leading zero word added by step D1. +void simpleMod(limb_t *x) +{ + limb_t divisor[NUM_LIMBS]; + uint8_t j, k; + + // Step D1. Normalize. + // The divisor (2^255 - 19) and "x" need to be shifted left until + // the top-most bit of the divisor is 1. Since we know that the + // next-to-top-most bit of (2^255 - 19) is already 1 and the top-most + // bit of "x" is zero, shifting everything into place is pretty easy. + fromString(divisor, NUM_LIMBS, num_2_255_m19_x2); + for (j = (NUM_LIMBS * 2); j > 1; --j) { + x[j - 1] = (x[j - 1] << 1) | (x[j - 2] >> (LIMB_BITS - 1)); + } + x[0] <<= 1; + x[NUM_LIMBS * 2] = 0; // Extra leading word. + + // Step D2/D7. Loop on j + for (j = 0; j <= NUM_LIMBS; ++j) { + // Step D3. Calculate an estimate of the top-most quotient word. + limb_t *u = x + NUM_LIMBS * 2 - 2 - j; + limb_t *v = divisor + NUM_LIMBS - 2; + limb_t q; + dlimb_t uword = ((((dlimb_t)u[2]) << LIMB_BITS) + u[1]); + if (u[2] == v[1]) + q = ~((limb_t)0); + else + q = (limb_t)(uword / v[1]); + + // Step D3, part 2. Correct the estimate downwards by 1 or 2. + // One subtlety of Knuth's algorithm is that it looks like the test + // is working with double-word quantities but it is actually using + // double-word plus a carry bit. So we need to use qlimb_t for this. +#if !defined(CURVE25519_NO_QLIMB) + qlimb_t test = ((((qlimb_t)uword) - ((dlimb_t)q) * v[1]) << LIMB_BITS) + u[0]; + if ((((dlimb_t)q) * v[0]) > test) { + --q; + test = ((((qlimb_t)uword) - ((dlimb_t)q) * v[1]) << LIMB_BITS) + u[0]; + if ((((dlimb_t)q) * v[0]) > test) + --q; + } +#else + // 32-bit platform - we don't have a 128-bit numeric type so we have + // to calculate the estimate in another way to preserve the carry bit. + q = correctEstimate(q, v[0], v[1], uword, u[0]); +#endif + + // Step D4. Multiply and subtract. + u = x + (NUM_LIMBS - j); + v = divisor; + dlimb_t carry = 0; + dlimb_t borrow = 0; + for (k = 0; k < NUM_LIMBS; ++k) { + carry += ((dlimb_t)v[k]) * q; + borrow = ((dlimb_t)u[k]) - ((limb_t)carry) - borrow; + u[k] = (dlimb_t)borrow; + carry >>= LIMB_BITS; + borrow = ((borrow >> LIMB_BITS) != 0); + } + borrow = ((dlimb_t)u[k]) - ((limb_t)carry) - borrow; + u[k] = (dlimb_t)borrow; + + // Step D5. Test remainder. Nothing further to do if no borrow. + if ((borrow >> LIMB_BITS) == 0) + continue; + + // Step D6. Borrow occurred: add back. + carry = 0; + for (k = 0; k < NUM_LIMBS; ++k) { + carry += u[k]; + carry += v[k]; + u[k] = (limb_t)carry; + carry >>= LIMB_BITS; + } + u[k] += (limb_t)carry; + } + + // Step D8. Unnormalize. + // Shift the remainder right by 1 bit to undo the earlier left shift. + for (j = 0; j < (NUM_LIMBS - 1); ++j) { + x[j] = (x[j] >> 1) | (x[j + 1] << (LIMB_BITS - 1)); + } + x[NUM_LIMBS - 1] >>= 1; +} + +void testAdd(const char *x, const char *y) +{ + printProgMem(x); + Serial.print(" + "); + printProgMem(y); + Serial.print(": "); + Serial.flush(); + + fromString(arg1, NUM_LIMBS, x); + fromString(arg2, NUM_LIMBS, y); + Curve25519::add(result, arg1, arg2); + + simpleAdd(result2, arg1, arg2); + + if (compare(result, result2) == 0) { + Serial.println("ok"); + } else { + Serial.println("failed"); + printNumber("actual ", result); + printNumber("expected", result2); + } +} + +void testAdd() +{ + Serial.println("Addition:"); + foreach_number (x) { + foreach_number (y) { + testAdd(x, y); + } + } + Serial.println(); +} + +void testSub(const char *x, const char *y) +{ + printProgMem(x); + Serial.print(" - "); + printProgMem(y); + Serial.print(": "); + Serial.flush(); + + fromString(arg1, NUM_LIMBS, x); + fromString(arg2, NUM_LIMBS, y); + Curve25519::sub(result, arg1, arg2); + + if (compare(arg1, arg2) >= 0) { + // First argument is larger than the second. + simpleSub(result2, arg1, arg2); + } else { + // First argument is smaller than the second. + // Compute arg1 + (2^255 - 19 - arg2). + fromString(temp, NUM_LIMBS, num_2_255_m19); + simpleSub(result2, temp, arg2); + simpleAdd(result2, arg1, result2); + } + + if (compare(result, result2) == 0) { + Serial.println("ok"); + } else { + Serial.println("failed"); + printNumber("actual ", result); + printNumber("expected", result2); + } +} + +void testSub() +{ + Serial.println("Subtraction:"); + foreach_number (x) { + foreach_number (y) { + testSub(x, y); + } + } + Serial.println(); +} + +void testMul(const char *x, const char *y) +{ + printProgMem(x); + Serial.print(" * "); + printProgMem(y); + Serial.print(": "); + Serial.flush(); + + fromString(arg1, NUM_LIMBS, x); + fromString(arg2, NUM_LIMBS, y); + + if (compare(arg1, arg2) != 0) + Curve25519::mul(result, arg1, arg2); + else + Curve25519::square(result, arg1); + + simpleMul(result2, arg1, arg2); + simpleMod(result2); + + if (compare(result, result2) == 0) { + Serial.println("ok"); + } else { + Serial.println("failed"); + printNumber("actual ", result); + printNumber("expected", result2); + } +} + +void testMul() +{ + Serial.println("Multiplication:"); + foreach_number (x) { + foreach_number (y) { + testMul(x, y); + } + } + Serial.println(); +} + +void testMulA24(const char *x) +{ + printProgMem(x); + Serial.print(" * "); + printProgMem(num_a24); + Serial.print(": "); + Serial.flush(); + + fromString(arg1, NUM_LIMBS, x); + fromString(arg2, NUM_LIMBS, num_a24); + Curve25519::mulA24(result, arg1); + + simpleMul(result2, arg1, arg2); + simpleMod(result2); + + if (compare(result, result2) == 0) { + Serial.println("ok"); + } else { + Serial.println("failed"); + printNumber("actual ", result); + printNumber("expected", result2); + } +} + +void testMulA24() +{ + Serial.println("Multiplication by a24:"); + foreach_number (x) { + testMulA24(x); + } + Serial.println(); +} + +void testSwap(const char *x, const char *y, uint8_t select) +{ + printProgMem(x); + Serial.print(" <-> "); + printProgMem(y); + Serial.print(": "); + Serial.flush(); + + fromString(arg1, NUM_LIMBS, x); + fromString(arg2, NUM_LIMBS, y); + + memcpy(result, arg1, NUM_LIMBS * sizeof(limb_t)); + memcpy(result2, arg2, NUM_LIMBS * sizeof(limb_t)); + + // Swap the values using the selection bit. + Curve25519::cswap(select, result, result2); + bool ok = compare(result, arg2) == 0 && compare(result2, arg1) == 0; + + // Don't swap the values back yet. + Curve25519::cswap(0, result, result2); + if (ok) + ok = compare(result, arg2) == 0 && compare(result2, arg1) == 0; + + // Swap the values back. + Curve25519::cswap(select, result, result2); + if (ok) + ok = compare(result, arg1) == 0 && compare(result2, arg2) == 0; + + // No swap. + Curve25519::cswap(0, result, result2); + if (ok) + ok = compare(result, arg1) == 0 && compare(result2, arg2) == 0; + + if (ok) { + Serial.println("ok"); + } else { + Serial.println("failed"); + } +} + +void testSwap() +{ + Serial.println("Swap:"); + uint8_t bit = 0; + foreach_number (x) { + foreach_number (y) { + testSwap(x, y, ((uint8_t)1) << bit); + bit = (bit + 1) % 8; + } + } + Serial.println(); +} + +void testRecip(const char *x) +{ + printProgMem(x); + Serial.print("^-1"); + Serial.print(": "); + Serial.flush(); + + fromString(arg1, NUM_LIMBS, x); + Curve25519::recip(result, arg1); + + bool ok; + if (compare(arg1, num_0) == 0) { + // 0^-1 = 0 + ok = (compare(result, num_0) == 0); + } else { + // Multiply the result with arg1 - we expect 1 as the result. + Curve25519::mul(result2, result, arg1); + ok = (compare(result2, num_1) == 0); + } + + if (ok) { + Serial.println("ok"); + } else { + Serial.println("failed"); + printNumber("actual", result); + } +} + +void testRecip() +{ + Serial.println("Reciprocal:"); + foreach_number (x) { + testRecip(x); + } + Serial.println(); +} + +void setup() +{ + Serial.begin(9600); + + testAdd(); + testSub(); + testMul(); + testMulA24(); + testSwap(); + testRecip(); +} + +void loop() +{ +} diff --git a/libraries/Crypto/keywords.txt b/libraries/Crypto/keywords.txt index 39c75b65..e638495b 100644 --- a/libraries/Crypto/keywords.txt +++ b/libraries/Crypto/keywords.txt @@ -7,6 +7,8 @@ BLAKE2s KEYWORD1 SHA1 KEYWORD1 SHA256 KEYWORD1 +Curve25519 KEYWORD1 + CBC KEYWORD1 CFB KEYWORD1 CTR KEYWORD1 @@ -38,3 +40,7 @@ save KEYWORD2 loop KEYWORD2 destroy KEYWORD2 calibrating KEYWORD2 + +eval KEYWORD2 +dh1 KEYWORD2 +dh2 KEYWORD2