An implementation of Curve25519

2025-01-18 04:33:12 -08:00 · 2015-03-12 18:59:55 +10:00
parent ccffa1ec87
commit 0f975de733
7 changed files with 1784 additions and 4 deletions
--- a/doc/crypto.dox
+++ b/doc/crypto.dox
@@ -30,6 +30,7 @@
 \li Block cipher modes: CTR, CFB, CBC, OFB
 \li Stream ciphers: ChaCha
 \li Hash algorithms: SHA1, SHA256, BLAKE2s
+\li Public key algorithms: Curve25519
 \li Random number generation: \link RNGClass RNG\endlink, TransistorNoiseSource

 All cryptographic algorithms have been optimized for 8-bit Arduino platforms
@@ -49,10 +50,6 @@ with a 256-bit hash output.  It is intended as a high performance drop-in
 replacement for SHA256 for when speed is critical but exact SHA256
 compatibility is not.

-\section crypto_examples Examples
-
-TBD
-
 \section crypto_performance Performance

 All figures are for the Arduino Uno running at 16 MHz.  Figures for the
@@ -75,4 +72,13 @@ Where a cipher supports more than one key size (such as ChaCha), the values
 are typically almost identical for 128-bit and 256-bit keys so only the
 maximum is shown above.

+Public key algorithms have the following results on an Arduino Uno:
+
+<table>
+<tr><td>Algorithm</td><td>Operation</td><td>Time</td><td>Comment</td></tr>
+<tr><td>Curve25519</td><td>\link Curve25519::eval() eval()\endlink</td><td>3738 ms</td><td>Raw curve evaluation</td></tr>
+<tr><td>Curve25519</td><td>\link Curve25519::dh1() dh1()\endlink</td><td>3740 ms</td><td>First half of Diffie-Hellman key agreement</td></tr>
+<tr><td>Curve25519</td><td>\link Curve25519::dh2() dh2()\endlink</td><td>3738 ms</td><td>Second half of Diffie-Hellman key agreement</td></tr>
+</table>
+
 */
--- a/doc/mainpage.dox
+++ b/doc/mainpage.dox
@@ -94,6 +94,7 @@ realtime clock and the LCD library to implement an alarm clock.
 \li Block cipher modes: CTR, CFB, CBC, OFB
 \li Stream ciphers: ChaCha
 \li Hash algorithms: SHA1, SHA256, BLAKE2s
+\li Public key algorithms: Curve25519
 \li Random number generation: \link RNGClass RNG\endlink, TransistorNoiseSource

 More information can be found on the \ref crypto "Cryptographic Library" page.
--- a/libraries/Crypto/Curve25519.cpp
+++ b/libraries/Crypto/Curve25519.cpp
@@ -0,0 +1,831 @@
+/*
+ * Copyright (C) 2015 Southern Storm Software, Pty Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "Curve25519.h"
+#include "Crypto.h"
+#include "RNG.h"
+#include "utility/ProgMemUtil.h"
+#include <string.h>
+
+/**
+ * \class Curve25519 Curve25519.h <Curve25519.h>
+ * \brief Diffie-Hellman key agreement based on the elliptic curve
+ * modulo 2^255 - 19.
+ *
+ * \note This public functions in this class need a substantial amount of
+ * stack space to store intermediate results while the curve function is
+ * being evaluated.  About 1k of free stack space is recommended for safety.
+ *
+ * References: http://cr.yp.to/ecdh.html
+ * https://tools.ietf.org/html/draft-irtf-cfrg-curves-02
+ */
+
+// Number of limbs in a value from the field modulo 2^255 - 19.
+// We assume that sizeof(limb_t) is a power of 2: 1, 2, 4, etc.
+#define NUM_LIMBS   (32 / sizeof(limb_t))
+
+// Number of bits in limb_t.
+#define LIMB_BITS   (8 * sizeof(limb_t))
+
+/**
+ * \brief Evaluates the raw Curve25519 function.
+ *
+ * \param result The result of evaluating the curve function.
+ * \param s The S parameter to the curve function.
+ * \param x The X(Q) parameter to the curve function.  If this pointer is
+ * NULL then the value 9 is used for \a x.
+ *
+ * This function is provided to assist with implementating other
+ * algorithms with the curve.  Normally applications should use dh1()
+ * and dh2() directly instead.
+ *
+ * \return Returns true if the function was evaluated; false if \a x is
+ * not a proper member of the field modulo (2^255 - 19).
+ *
+ * Reference: https://tools.ietf.org/html/draft-irtf-cfrg-curves-02
+ *
+ * \sa dh1(), dh2()
+ */
+bool Curve25519::eval(uint8_t result[32], const uint8_t s[32], const uint8_t x[32])
+{
+    limb_t x_1[NUM_LIMBS];
+    limb_t x_2[NUM_LIMBS];
+    limb_t x_3[NUM_LIMBS];
+    limb_t z_2[NUM_LIMBS];
+    limb_t z_3[NUM_LIMBS];
+    limb_t A[NUM_LIMBS];
+    limb_t B[NUM_LIMBS];
+    limb_t C[NUM_LIMBS];
+    limb_t D[NUM_LIMBS];
+    limb_t E[NUM_LIMBS];
+    limb_t AA[NUM_LIMBS];
+    limb_t BB[NUM_LIMBS];
+    limb_t DA[NUM_LIMBS];
+    limb_t CB[NUM_LIMBS];
+    uint8_t mask;
+    uint8_t sposn;
+    uint8_t select;
+    uint8_t swap;
+    bool retval;
+
+    // Unpack the "x" argument into the limb representation
+    // which also masks off the high bit.  NULL means 9.
+    if (x) {
+        unpack(x_1, x);                 // x_1 = x
+    } else {
+        memset(x_1, 0, sizeof(x_1));    // x_1 = 9
+        x_1[0] = 9;
+    }
+
+    // Check that "x" is within the range of the modulo field.
+    // We can do this with a reduction - if there was no borrow
+    // then the value of "x" was out of range.  Timing is sensitive
+    // here so that we don't reveal anything about the value of "x".
+    // If there was a reduction, then continue executing the rest
+    // of this function with the (now) in-range "x" value and
+    // report the failure at the end.
+    retval = (bool)(reduceQuick(x_1) & 0x01);
+
+    // Initialize the other temporary variables.
+    memset(x_2, 0, sizeof(x_2));        // x_2 = 1
+    x_2[0] = 1;
+    memset(z_2, 0, sizeof(z_2));        // z_2 = 0
+    memcpy(x_3, x_1, sizeof(x_1));      // x_3 = x
+    memcpy(z_3, x_2, sizeof(x_2));      // z_3 = 1
+
+    // Iterate over all 255 bits of "s" from the highest to the lowest.
+    // We ignore the high bit of the 256-bit representation of "s".
+    mask = 0x40;
+    sposn = 31;
+    swap = 0;
+    for (uint8_t t = 255; t > 0; --t) {
+        // Conditional swaps on entry to this bit but only if we
+        // didn't swap on the previous bit.
+        select = s[sposn] & mask;
+        swap ^= select;
+        cswap(swap, x_2, x_3);
+        cswap(swap, z_2, z_3);
+
+        // Evaluate the curve.
+        add(A, x_2, z_2);               // A = x_2 + z_2
+        square(AA, A);                  // AA = A^2
+        sub(B, x_2, z_2);               // B = x_2 - z_2
+        square(BB, B);                  // BB = B^2
+        sub(E, AA, BB);                 // E = AA - BB
+        add(C, x_3, z_3);               // C = x_3 + z_3
+        sub(D, x_3, z_3);               // D = x_3 - z_3
+        mul(DA, D, A);                  // DA = D * A
+        mul(CB, C, B);                  // CB = C * B
+        add(x_3, DA, CB);               // x_3 = (DA + CB)^2
+        square(x_3, x_3);
+        sub(z_3, DA, CB);               // z_3 = x_1 * (DA - CB)^2
+        square(z_3, z_3);
+        mul(z_3, z_3, x_1);
+        mul(x_2, AA, BB);               // x_2 = AA * BB
+        mulA24(z_2, E);                 // z_2 = E * (AA + a24 * E)
+        add(z_2, z_2, AA);
+        mul(z_2, z_2, E);
+
+        // Move onto the next lower bit of "s".
+        mask >>= 1;
+        if (!mask) {
+            --sposn;
+            mask = 0x80;
+            swap = select << 7;
+        } else {
+            swap = select >> 1;
+        }
+    }
+
+    // Final conditional swaps.
+    cswap(swap, x_2, x_3);
+    cswap(swap, z_2, z_3);
+
+    // Compute x_2 * (z_2 ^ (p - 2)) where p = 2^255 - 19.
+    recip(z_3, z_2);
+    mul(x_2, x_2, z_3);
+
+    // Pack the result into the return array.
+    pack(result, x_2);
+
+    // Clean up and exit.
+    clean(x_1);
+    clean(x_2);
+    clean(x_3);
+    clean(z_2);
+    clean(z_3);
+    clean(A);
+    clean(B);
+    clean(C);
+    clean(D);
+    clean(E);
+    clean(AA);
+    clean(BB);
+    clean(DA);
+    clean(CB);
+    return retval;
+}
+
+/**
+ * \brief Performs phase 1 of a Diffie-Hellman key exchange using Curve25519.
+ *
+ * \param k The key value to send to the other party as part of the exchange.
+ * \param f The generated secret value for this party.  This must not be
+ * transmitted to any party or stored in permanent storage.  It only needs
+ * to be kept in memory until dh2() is called.
+ *
+ * The \a f value is generated with \link RNGClass::rand() RNG.rand()\endlink.
+ * It is the caller's responsibility to ensure that the global random number
+ * pool has sufficient entropy to generate the 32 bytes of \a f safely
+ * before calling this function.
+ *
+ * The following example demonstrates how to perform a full Diffie-Hellman
+ * key exchange using dh1() and dh2():
+ *
+ * \code
+ * uint8_t f[32];
+ * uint8_t k[32];
+ *
+ * // Generate the secret value "f" and the public value "k".
+ * Curve25519::dh1(k, f);
+ *
+ * // Send "k" to the other party.
+ * ...
+ *
+ * // Read the "k" value that the other party sent to us.
+ * ...
+ *
+ * // Generate the shared secret in "k" using the previous secret value "f".
+ * if (!Curve25519::dh2(k, f)) {
+ *     // The received "k" value was invalid - abort the session.
+ *     ...
+ * }
+ *
+ * // The "k" value can now be used to generate session keys for encryption.
+ * ...
+ * \endcode
+ *
+ * Reference: https://tools.ietf.org/html/draft-irtf-cfrg-curves-02
+ *
+ * \sa dh2()
+ */
+void Curve25519::dh1(uint8_t k[32], uint8_t f[32])
+{
+    do {
+        // Generate a random "f" value and then adjust the value to make
+        // it valid as an "s" value for eval().  According to the specification
+        // we need to mask off the 3 right-most bits of f[0], mask off the
+        // left-most bit of f[31], and set the second to left-most bit of f[31].
+        RNG.rand(f, 32);
+        f[0] &= 0xF8;
+        f[31] = (f[31] & 0x7F) | 0x40;
+
+        // Evaluate the curve function: k = Curve25519::eval(f, 9).
+        // We pass NULL to eval() to indicate the value 9.  There is no
+        // need to check the return value from eval() because we know
+        // that 9 is a valid field element.
+        eval(k, f, 0);
+
+        // If "k" is weak for contributory behaviour then reject it,
+        // generate another "f" value, and try again.  This case is
+        // highly unlikely but we still perform the check just in case.
+    } while (isWeakPoint(k));
+}
+
+/**
+ * \brief Performs phase 2 of a Diffie-Hellman key exchange using Curve25519.
+ *
+ * \param k On entry, this is the key value that was received from the other
+ * party as part of the exchange.  On exit, this will be the shared secret.
+ * \param f The secret value for this party that was generated by dh1().
+ * The \a f value will be destroyed by this function.
+ *
+ * \return Returns true if the key exchange was successful, or false if
+ * the \a k value is invalid.
+ *
+ * Reference: https://tools.ietf.org/html/draft-irtf-cfrg-curves-02
+ *
+ * \sa dh1()
+ */
+bool Curve25519::dh2(uint8_t k[32], uint8_t f[32])
+{
+    uint8_t weak;
+
+    // Evaluate the curve function: k = Curve25519::eval(f, k).
+    // If "k" is weak for contributory behaviour before or after
+    // the curve evaluation, then fail the exchange.  For safety
+    // we perform every phase of the weak checks even if we could
+    // bail out earlier so that the execution takes the same
+    // amount of time for weak and non-weak "k" values.
+    weak  = isWeakPoint(k);                     // Is "k" weak before?
+    weak |= ((eval(k, f, k) ^ 0x01) & 0x01);    // Is "k" weak during?
+    weak |= isWeakPoint(k);                     // Is "k" weak after?
+    clean(f, 32);
+    return (bool)((weak ^ 0x01) & 0x01);
+}
+
+/**
+ * \brief Determines if a Curve25519 point is weak for contributory behaviour.
+ *
+ * \param k The point to check.
+ * \return Returns 1 if \a k is weak for contributory behavior or
+ * returns zero if \a k is not weak.
+ */
+uint8_t Curve25519::isWeakPoint(const uint8_t k[32])
+{
+    // List of weak points from http://cr.yp.to/ecdh.html
+    // That page lists some others but they are variants on these
+    // of the form "point + i * (2^255 - 19)" for i = 0, 1, 2.
+    // Here we mask off the high bit and eval() catches the rest.
+    static const uint8_t points[5][32] PROGMEM = {
+        {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+        {0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+        {0xE0, 0xEB, 0x7A, 0x7C, 0x3B, 0x41, 0xB8, 0xAE,
+         0x16, 0x56, 0xE3, 0xFA, 0xF1, 0x9F, 0xC4, 0x6A,
+         0xDA, 0x09, 0x8D, 0xEB, 0x9C, 0x32, 0xB1, 0xFD,
+         0x86, 0x62, 0x05, 0x16, 0x5F, 0x49, 0xB8, 0x00},
+        {0x5F, 0x9C, 0x95, 0xBC, 0xA3, 0x50, 0x8C, 0x24,
+         0xB1, 0xD0, 0xB1, 0x55, 0x9C, 0x83, 0xEF, 0x5B,
+         0x04, 0x44, 0x5C, 0xC4, 0x58, 0x1C, 0x8E, 0x86,
+         0xD8, 0x22, 0x4E, 0xDD, 0xD0, 0x9F, 0x11, 0x57},
+        {0xEC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F}
+    };
+
+    // Check each of the weak points in turn.  We perform the
+    // comparisons carefully so as not to reveal the value of "k"
+    // in the instruction timing.  If "k" is indeed weak then
+    // we still check everything so as not to reveal which
+    // weak point it is.
+    uint8_t result = 0;
+    for (uint8_t posn = 0; posn < 5; ++posn) {
+        const uint8_t *point = points[posn];
+        uint8_t check = (pgm_read_byte(point + 31) ^ k[31]) & 0x7F;
+        for (uint8_t index = 31; index > 0; --index)
+            check |= (pgm_read_byte(point + index - 1) ^ k[index - 1]);
+        result |= (uint8_t)((((uint16_t)0x0100) - check) >> 8);
+    }
+
+    // The "result" variable will be non-zero if there was a match.
+    return result;
+}
+
+/**
+ * \brief Reduces a number modulo 2^255 - 19.
+ *
+ * \param result The array that will contain the result when the
+ * function exits.  Must be NUM_LIMBS limbs in size.
+ * \param x The number to be reduced, which must be NUM_LIMBS * 2 limbs in
+ * size and less than or equal to square(2^255 - 19 - 1).  This array will
+ * be modified by the reduction process.
+ * \param size The size of the high order half of \a x.  This indicates
+ * the size of \a x in limbs.  If it is shorter than NUM_LIMBS then the
+ * reduction can be performed quicker.
+ */
+void Curve25519::reduce(limb_t *result, limb_t *x, uint8_t size)
+{
+    /*
+    Note: This explaination is best viewed with a UTF-8 text viewer.
+
+    To help explain what this function is doing, the following describes
+    how to efficiently compute reductions modulo a base of the form (2ⁿ - b)
+    where b is greater than zero and (b + 1)² <= 2ⁿ.
+
+    Here we are interested in reducing the result of multiplying two
+    numbers that are less than or equal to (2ⁿ - b - 1).  That is,
+    multiplying numbers that have already been reduced.
+
+    Given some x less than or equal to (2ⁿ - b - 1)², we want to find a
+    y less than (2ⁿ - b) such that:
+
+        y ≡ x mod (2ⁿ - b)
+
+    We know that for all integer values of k >= 0:
+
+        y ≡ x - k * (2ⁿ - b)
+          ≡ x - k * 2ⁿ + k * b
+
+    In our case we choose k = ⌊x / 2ⁿ⌋ and then let:
+
+        w = (x mod 2ⁿ) + ⌊x / 2ⁿ⌋ * b
+
+    The value w will either be the answer y or y can be obtained by
+    repeatedly subtracting (2ⁿ - b) from w until it is less than (2ⁿ - b).
+    At most b subtractions will be required.
+
+    In our case b is 19 which is more subtractions than we would like to do,
+    but we can handle that by performing the above reduction twice and then
+    performing a single trial subtraction:
+
+        w = (x mod 2ⁿ) + ⌊x / 2ⁿ⌋ * b
+        y = (w mod 2ⁿ) + ⌊w / 2ⁿ⌋ * b
+        if y >= (2ⁿ - b)
+            y -= (2ⁿ - b)
+
+    The value y is the answer we want for reducing x modulo (2ⁿ - b).
+    */
+
+    dlimb_t carry;
+    uint8_t posn;
+
+    // Calculate (x mod 2^255) + ((x / 2^255) * 19) which will
+    // either produce the answer we want or it will produce a
+    // value of the form "answer + j * (2^255 - 19)".
+    carry = ((dlimb_t)(x[NUM_LIMBS - 1] >> (LIMB_BITS - 1))) * 19U;
+    x[NUM_LIMBS - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1);
+    for (posn = 0; posn < size; ++posn) {
+        carry += ((dlimb_t)(x[posn + NUM_LIMBS])) * 38U;
+        carry += x[posn];
+        x[posn] = (limb_t)carry;
+        carry >>= LIMB_BITS;
+    }
+    if (size < NUM_LIMBS) {
+        // The high order half of the number is short; e.g. for mulA24().
+        // Propagate the carry through the rest of the low order part.
+        for (posn = size; posn < NUM_LIMBS; ++posn) {
+            carry += x[posn];
+            x[posn] = (limb_t)carry;
+            carry >>= LIMB_BITS;
+        }
+    }
+
+    // The "j" value may still be too large due to the final carry-out.
+    // We must repeat the reduction.  If we already have the answer,
+    // then this won't do any harm but we must still do the calculation
+    // to preserve the overall timing.
+    carry *= 38U;
+    carry += ((dlimb_t)(x[NUM_LIMBS - 1] >> (LIMB_BITS - 1))) * 19U;
+    x[NUM_LIMBS - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1);
+    for (posn = 0; posn < NUM_LIMBS; ++posn) {
+        carry += x[posn];
+        x[posn] = (limb_t)carry;
+        carry >>= LIMB_BITS;
+    }
+
+    // At this point "x" will either be the answer or it will be the
+    // answer plus (2^255 - 19).  Perform a trial subtraction which
+    // is equivalent to adding 19 and subtracting 2^255.  We put the
+    // trial answer into the top-most limbs of the original "x" array.
+    // We add 19 here; the subtraction of 2^255 occurs in the next step.
+    carry = 19U;
+    for (posn = 0; posn < NUM_LIMBS; ++posn) {
+        carry += x[posn];
+        x[posn + NUM_LIMBS] = (limb_t)carry;
+        carry >>= LIMB_BITS;
+    }
+
+    // If there was a borrow, then the bottom-most limbs of "x" are the
+    // correct answer.  If there was no borrow, then the top-most limbs
+    // of "x" are the correct answer.  Select the correct answer but do
+    // it in a way that instruction timing will not reveal which value
+    // was selected.  Borrow will occur if the high bit of the previous
+    // result is 0: turn the high bit into a selection mask.
+    limb_t mask = (limb_t)(((slimb_t)(x[NUM_LIMBS * 2 - 1])) >> (LIMB_BITS - 1));
+    limb_t nmask = ~mask;
+    x[NUM_LIMBS * 2 - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1);
+    for (posn = 0; posn < NUM_LIMBS; ++posn) {
+        result[posn] = (x[posn] & nmask) | (x[posn + NUM_LIMBS] & mask);
+    }
+}
+
+/**
+ * \brief Quickly reduces a number modulo 2^255 - 19.
+ *
+ * \param x The number to be reduced, which must be NUM_LIMBS limbs in size
+ * and less than or equal to 2 * (2^255 - 19 - 1).
+ * \return Zero if \a x was greater than or equal to (2^255 - 19).
+ *
+ * The answer is also put into \a x and will consist of NUM_LIMBS limbs.
+ *
+ * This function is intended for reducing the result of additions where
+ * the caller knows that \a x is within the described range.  A single
+ * trial subtraction is all that is needed to reduce the number.
+ */
+Curve25519::limb_t Curve25519::reduceQuick(limb_t *x)
+{
+    limb_t temp[NUM_LIMBS];
+    dlimb_t carry;
+    uint8_t posn;
+    
+    // Perform a trial subtraction of (2^255 - 19) from "x" which is
+    // equivalent to adding 19 and subtracting 2^255.  We add 19 here;
+    // the subtraction of 2^255 occurs in the next step.
+    carry = 19U;
+    for (posn = 0; posn < NUM_LIMBS; ++posn) {
+        carry += x[posn];
+        temp[posn] = (limb_t)carry;
+        carry >>= LIMB_BITS;
+    }
+
+    // If there was a borrow, then the original "x" is the correct answer.
+    // If there was no borrow, then "temp" is the correct answer.  Select the
+    // correct answer but do it in a way that instruction timing will not
+    // reveal which value was selected.  Borrow will occur if the high bit
+    // of "temp" is 0: turn the high bit into a selection mask.
+    limb_t mask = (limb_t)(((slimb_t)(temp[NUM_LIMBS - 1])) >> (LIMB_BITS - 1));
+    limb_t nmask = ~mask;
+    temp[NUM_LIMBS - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1);
+    for (posn = 0; posn < NUM_LIMBS; ++posn) {
+        x[posn] = (x[posn] & nmask) | (temp[posn] & mask);
+    }
+
+    // Clean up "temp".
+    clean(temp);
+
+    // Return a zero value if we actually subtracted (2^255 - 19) from "x".
+    return nmask;
+}
+
+/**
+ * \brief Multiplies two values and then reduces the result modulo 2^255 - 19.
+ *
+ * \param result The result, which must be NUM_LIMBS limbs in size and can
+ * be the same array as \a x or \a y.
+ * \param x The first value to multiply, which must be NUM_LIMBS limbs in size
+ * and less than 2^255 - 19.
+ * \param y The second value to multiply, which must be NUM_LIMBS limbs in size
+ * and less than 2^255 - 19.  This can be the same array as \a x.
+ */
+void Curve25519::mul(limb_t *result, const limb_t *x, const limb_t *y)
+{
+    limb_t temp[NUM_LIMBS * 2];
+    uint8_t i, j;
+    dlimb_t carry;
+    limb_t word;
+
+    // Multiply the lowest word of x by y.
+    carry = 0;
+    word = x[0];
+    for (i = 0; i < NUM_LIMBS; ++i) {
+        carry += ((dlimb_t)(y[i])) * word;
+        temp[i] = (limb_t)carry;
+        carry >>= LIMB_BITS;
+    }
+    temp[NUM_LIMBS] = (limb_t)carry;
+
+    // Multiply and add the remaining words of x by y.
+    for (i = 1; i < NUM_LIMBS; ++i) {
+        word = x[i];
+        carry = 0;
+        for (j = 0; j < NUM_LIMBS; ++j) {
+            carry += ((dlimb_t)(y[j])) * word;
+            carry += temp[i + j];
+            temp[i + j] = (limb_t)carry;
+            carry >>= LIMB_BITS;
+        }
+        temp[i + NUM_LIMBS] = (limb_t)carry;
+    }
+
+    // Reduce the intermediate result modulo 2^255 - 19.
+    reduce(result, temp, NUM_LIMBS);
+    clean(temp);
+}
+
+/**
+ * \fn void Curve25519::square(limb_t *result, const limb_t *x)
+ * \brief Squares a value and then reduces it modulo 2^255 - 19.
+ *
+ * \param result The result, which must be NUM_LIMBS limbs in size and
+ * can be the same array as \a x.
+ * \param x The value to square, which must be NUM_LIMBS limbs in size
+ * and less than 2^255 - 19.
+ */
+
+/**
+ * \brief Multiplies a value by the a24 constant and then reduces the result
+ * modulo 2^255 - 19.
+ *
+ * \param result The result, which must be NUM_LIMBS limbs in size and can
+ * be the same array as \a x.
+ * \param x The value to multiply by a24, which must be NUM_LIMBS limbs in size
+ * and less than 2^255 - 19.
+ */
+void Curve25519::mulA24(limb_t *result, const limb_t *x)
+{
+    // The constant a24 = 121665 (0x1DB41) as a limb array.
+#if CURVE25519_LIMB_8BIT
+    static limb_t const a24[3] PROGMEM = {0x41, 0xDB, 0x01};
+    #define pgm_read_a24(index) (pgm_read_byte(&(a24[(index)])))
+#elif CURVE25519_LIMB_16BIT
+    static limb_t const a24[2] PROGMEM = {0xDB41, 0x0001};
+    #define pgm_read_a24(index) (pgm_read_word(&(a24[(index)])))
+#elif CURVE25519_LIMB_32BIT
+    static limb_t const a24[1] PROGMEM = {0x0001DB41};
+    #define pgm_read_a24(index) (pgm_read_dword(&(a24[(index)])))
+#else
+    #error "limb_t must be 8, 16, or 32 bits in size"
+#endif
+    #define NUM_A24_LIMBS   (sizeof(a24) / sizeof(limb_t))
+
+    // Multiply the lowest limb of a24 by x and zero-extend into the result.
+    limb_t temp[NUM_LIMBS * 2];
+    uint8_t i, j;
+    dlimb_t carry = 0;
+    limb_t word = pgm_read_a24(0);
+    for (i = 0; i < NUM_LIMBS; ++i) {
+        carry += ((dlimb_t)(x[i])) * word;
+        temp[i] = (limb_t)carry;
+        carry >>= LIMB_BITS;
+    }
+    temp[NUM_LIMBS] = (limb_t)carry;
+
+    // Multiply and add the remaining limbs of a24.
+    for (i = 1; i < NUM_A24_LIMBS; ++i) {
+        word = pgm_read_a24(i);
+        carry = 0;
+        for (j = 0; j < NUM_LIMBS; ++j) {
+            carry += ((dlimb_t)(x[j])) * word;
+            carry += temp[i + j];
+            temp[i + j] = (limb_t)carry;
+            carry >>= LIMB_BITS;
+        }
+        temp[i + NUM_LIMBS] = (limb_t)carry;
+    }
+
+    // Reduce the intermediate result modulo 2^255 - 19.
+    reduce(result, temp, NUM_A24_LIMBS);
+    clean(temp);
+}
+
+/**
+ * \brief Adds two values and then reduces the result modulo 2^255 - 19.
+ *
+ * \param result The result, which must be NUM_LIMBS limbs in size and can
+ * be the same array as \a x or \a y.
+ * \param x The first value to multiply, which must be NUM_LIMBS limbs in size
+ * and less than 2^255 - 19.
+ * \param y The second value to multiply, which must be NUM_LIMBS limbs in size
+ * and less than 2^255 - 19.
+ */
+void Curve25519::add(limb_t *result, const limb_t *x, const limb_t *y)
+{
+    dlimb_t carry = 0;
+    uint8_t posn;
+
+    // Add the two arrays to obtain the intermediate result.
+    for (posn = 0; posn < NUM_LIMBS; ++posn) {
+        carry += x[posn];
+        carry += y[posn];
+        result[posn] = (limb_t)carry;
+        carry >>= LIMB_BITS;
+    }
+
+    // Reduce the result using the quick trial subtraction method.
+    reduceQuick(result);
+}
+
+/**
+ * \brief Subtracts two values and then reduces the result modulo 2^255 - 19.
+ *
+ * \param result The result, which must be NUM_LIMBS limbs in size and can
+ * be the same array as \a x or \a y.
+ * \param x The first value to multiply, which must be NUM_LIMBS limbs in size
+ * and less than 2^255 - 19.
+ * \param y The second value to multiply, which must be NUM_LIMBS limbs in size
+ * and less than 2^255 - 19.
+ */
+void Curve25519::sub(limb_t *result, const limb_t *x, const limb_t *y)
+{
+    dlimb_t borrow;
+    uint8_t posn;
+
+    // Subtract y from x to generate the intermediate result.
+    borrow = 0;
+    for (posn = 0; posn < NUM_LIMBS; ++posn) {
+        borrow = ((dlimb_t)x[posn]) - y[posn] - ((borrow >> LIMB_BITS) & 0x01);
+        result[posn] = (limb_t)borrow;
+    }
+
+    // If we had a borrow, then the result has gone negative and we
+    // have to add 2^255 - 19 to the result to make it positive again.
+    // The top bits of "borrow" will be all 1's if there is a borrow
+    // or it will be all 0's if there was no borrow.  Easiest is to
+    // conditionally subtract 19 and then mask off the high bit.
+    borrow = (borrow >> LIMB_BITS) & 19U;
+    borrow = ((dlimb_t)result[0]) - borrow;
+    result[0] = (limb_t)borrow;
+    for (posn = 1; posn < NUM_LIMBS; ++posn) {
+        borrow = ((dlimb_t)result[posn]) - ((borrow >> LIMB_BITS) & 0x01);
+        result[posn] = (limb_t)borrow;
+    }
+    result[NUM_LIMBS - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1);
+}
+
+/**
+ * \brief Conditionally swaps two values if a selection value is non-zero.
+ *
+ * \param select Non-zero to swap \a x and \a y, zero to leave them unchanged.
+ * \param x The first value to conditionally swap.
+ * \param y The second value to conditionally swap.
+ *
+ * The swap is performed in a way that it should take the same amount of
+ * time irrespective of the value of \a select.
+ */
+void Curve25519::cswap(uint8_t select, limb_t *x, limb_t *y)
+{
+    uint8_t posn;
+    limb_t dummy;
+    limb_t sel;
+
+    // Turn "select" into an all-zeroes or all-ones mask.  We don't care
+    // which bit or bits is set in the original "select" value.
+    sel = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - select) >> LIMB_BITS);
+    --sel;
+
+    // Swap the two values based on "select".  Algorithm from:
+    // https://tools.ietf.org/html/draft-irtf-cfrg-curves-02
+    for (posn = 0; posn < NUM_LIMBS; ++posn) {
+        dummy = sel & (x[posn] ^ y[posn]);
+        x[posn] ^= dummy;
+        y[posn] ^= dummy;
+    }
+}
+
+/**
+ * \brief Computes the reciprocal of a number modulo 2^255 - 19.
+ *
+ * \param result The result as a array of NUM_LIMBS limbs in size.  This can
+ * be the same array as \a x.
+ * \param x The number to compute the reciprocal for.
+ */
+void Curve25519::recip(limb_t *result, const limb_t *x)
+{
+    limb_t t1[NUM_LIMBS];
+    uint8_t i, j;
+
+    // The reciprocal is the same as x ^ (p - 2) where p = 2^255 - 19.
+    // The big-endian hexadecimal expansion of (p - 2) is:
+    // 7FFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFEB
+    //
+    // The naive implementation needs to do 2 multiplications per 1 bit and
+    // 1 multiplication per 0 bit.  We can improve upon this by creating a
+    // pattern 0000000001 ... 0000000001.  If we square and multiply the
+    // pattern by itself we can turn the pattern into the partial results
+    // 0000000011 ... 0000000011, 0000000111 ... 0000000111, etc.
+    // This averages out to about 1.1 multiplications per 1 bit instead of 2.
+
+    // Build a pattern of 250 bits in length of repeated copies of 0000000001.
+    #define RECIP_GROUP_SIZE 10
+    #define RECIP_GROUP_BITS 250    // Must be a multiple of RECIP_GROUP_SIZE.
+    square(t1, x);
+    for (j = 0; j < (RECIP_GROUP_SIZE - 1); ++j)
+        square(t1, t1);
+    mul(result, t1, x);
+    for (i = 0; i < ((RECIP_GROUP_BITS / RECIP_GROUP_SIZE) - 2); ++i) {
+        for (j = 0; j < RECIP_GROUP_SIZE; ++j)
+            square(t1, t1);
+        mul(result, result, t1);
+    }
+
+    // Multiply bit-shifted versions of the 0000000001 pattern into
+    // the result to "fill in" the gaps in the pattern.
+    square(t1, result);
+    mul(result, result, t1);
+    for (j = 0; j < (RECIP_GROUP_SIZE - 2); ++j) {
+        square(t1, t1);
+        mul(result, result, t1);
+    }
+
+    // Deal with the 5 lowest bits of (p - 2), 01011, from highest to lowest.
+    square(result, result);
+    square(result, result);
+    mul(result, result, x);
+    square(result, result);
+    square(result, result);
+    mul(result, result, x);
+    square(result, result);
+    mul(result, result, x);
+
+    // Clean up and exit.
+    clean(t1);
+}
+
+/**
+ * \brief Unpacks the little-endian byte representation of a field element
+ * into a limb array.
+ *
+ * \param result The limb array.
+ * \param x The byte representation.
+ *
+ * The top-most bit of \a result will be set to zero so that the value
+ * is guaranteed to be 255 bits rather than 256.
+ *
+ * \sa pack()
+ */
+void Curve25519::unpack(limb_t *result, const uint8_t *x)
+{
+#if CURVE25519_LIMB_8BIT
+    memcpy(result, x, 32);
+    result[31] &= 0x7F;
+#elif CURVE25519_LIMB_16BIT
+    for (uint8_t posn = 0; posn < 16; ++posn) {
+        result[posn] = ((limb_t)x[posn * 2]) | (((limb_t)x[posn * 2 + 1]) << 8);
+    }
+    result[15] &= 0x7FFF;
+#elif CURVE25519_LIMB_32BIT
+    for (uint8_t posn = 0; posn < 8; ++posn) {
+        result[posn] = ((limb_t)x[posn * 4]) |
+                      (((limb_t)x[posn * 4 + 1]) << 8) |
+                      (((limb_t)x[posn * 4 + 2]) << 16) |
+                      (((limb_t)x[posn * 4 + 3]) << 24);
+    }
+    result[7] &= 0x7FFFFFFF;
+#endif
+}
+
+/**
+ * \brief Packs the limb array representation of a field element into a
+ * byte array.
+ *
+ * \param result The byte array.
+ * \param x The limb representation.
+ *
+ * \sa unpack()
+ */
+void Curve25519::pack(uint8_t *result, const limb_t *x)
+{
+#if CURVE25519_LIMB_8BIT
+    memcpy(result, x, 32);
+#elif CURVE25519_LIMB_16BIT
+    for (uint8_t posn = 0; posn < 16; ++posn) {
+        limb_t value = x[posn];
+        result[posn * 2]     = (uint8_t)value;
+        result[posn * 2 + 1] = (uint8_t)(value >> 8);
+    }
+#elif CURVE25519_LIMB_32BIT
+    for (uint8_t posn = 0; posn < 8; ++posn) {
+        limb_t value = x[posn];
+        result[posn * 4]     = (uint8_t)value;
+        result[posn * 4 + 1] = (uint8_t)(value >> 8);
+        result[posn * 4 + 2] = (uint8_t)(value >> 16);
+        result[posn * 4 + 3] = (uint8_t)(value >> 24);
+    }
+#endif
+}
--- a/libraries/Crypto/Curve25519.h
+++ b/libraries/Crypto/Curve25519.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 Southern Storm Software, Pty Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef CRYPTO_CURVE15519_h
+#define CRYPTO_CURVE15519_h
+
+#include <inttypes.h>
+#include <stddef.h>
+
+// Define exactly one of these to 1 to set the size of the basic limb type.
+// 16-bit limbs seems to give the best performance on 8-bit AVR micros.
+#define CURVE25519_LIMB_8BIT  0
+#define CURVE25519_LIMB_16BIT 1
+#define CURVE25519_LIMB_32BIT 0
+
+class Curve25519
+{
+public:
+    static bool eval(uint8_t result[32], const uint8_t s[32], const uint8_t x[32]);
+
+    static void dh1(uint8_t k[32], uint8_t f[32]);
+    static bool dh2(uint8_t k[32], uint8_t f[32]);
+
+#if defined(TEST_CURVE25519_FIELD_OPS)
+public:
+#else
+private:
+#endif
+    // Define the limb types to use on this platform.
+    #if CURVE25519_LIMB_8BIT
+    typedef uint8_t limb_t;
+    typedef int8_t slimb_t;
+    typedef uint16_t dlimb_t;
+    #elif CURVE25519_LIMB_16BIT
+    typedef uint16_t limb_t;
+    typedef int16_t slimb_t;
+    typedef uint32_t dlimb_t;
+    #elif CURVE25519_LIMB_32BIT
+    typedef uint32_t limb_t;
+    typedef int32_t slimb_t;
+    typedef uint64_t dlimb_t;
+    #else
+    #error "limb_t must be 8, 16, or 32 bits in size"
+    #endif
+
+    static uint8_t isWeakPoint(const uint8_t k[32]);
+
+    static void reduce(limb_t *result, limb_t *x, uint8_t size);
+    static limb_t reduceQuick(limb_t *x);
+
+    static void mul(limb_t *result, const limb_t *x, const limb_t *y);
+    static void square(limb_t *result, const limb_t *x)
+    {
+        mul(result, x, x);
+    }
+
+    static void mulA24(limb_t *result, const limb_t *x);
+
+    static void add(limb_t *result, const limb_t *x, const limb_t *y);
+    static void sub(limb_t *result, const limb_t *x, const limb_t *y);
+
+    static void cswap(uint8_t select, limb_t *x, limb_t *y);
+
+    static void recip(limb_t *result, const limb_t *x);
+
+    static void unpack(limb_t *result, const uint8_t *x);
+    static void pack(uint8_t *result, const limb_t *x);
+
+    // Constructor and destructor are private - cannot instantiate this class.
+    Curve25519() {}
+    ~Curve25519() {}
+};
+
+#endif
--- a/libraries/Crypto/examples/TestCurve25519/TestCurve25519.ino
+++ b/libraries/Crypto/examples/TestCurve25519/TestCurve25519.ino
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2015 Southern Storm Software, Pty Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+This example runs tests on the Curve25519 algorithm.
+*/
+
+#include <Crypto.h>
+#include <Curve25519.h>
+#include <RNG.h>
+#include <string.h>
+
+void printNumber(const char *name, const uint8_t *x)
+{
+    static const char hexchars[] = "0123456789ABCDEF";
+    Serial.print(name);
+    Serial.print(" = ");
+    for (uint8_t posn = 0; posn < 32; ++posn) {
+        Serial.print(hexchars[(x[posn] >> 4) & 0x0F]);
+        Serial.print(hexchars[x[posn] & 0x0F]);
+    }
+    Serial.println();
+}
+
+// Check the eval() function using the test vectors from:
+// https://tools.ietf.org/html/draft-turner-thecurve25519function-01
+void testEval()
+{
+    static uint8_t alice_private[32] = {
+        0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d,
+        0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45,
+        0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a,
+        0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a
+    };
+    static uint8_t const alice_public[32] = {
+        0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54,
+        0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a,
+        0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4,
+        0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a
+    };
+    static uint8_t bob_private[32] = {
+        0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b,
+        0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6,
+        0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd,
+        0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb
+    };
+    static uint8_t const bob_public[32] = {
+        0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4,
+        0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37,
+        0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d,
+        0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f
+    };
+    static uint8_t const shared_secret[32] = {
+        0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1,
+        0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25,
+        0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33,
+        0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42
+    };
+
+    // Fix up the private keys by applying the standard masks.
+    alice_private[0] &= 0xF8;
+    alice_private[31] = (alice_private[31] & 0x7F) | 0x40;
+    bob_private[0] &= 0xF8;
+    bob_private[31] = (bob_private[31] & 0x7F) | 0x40;
+
+    // Evaluate the curve function and check the public keys.
+    uint8_t result[32];
+    Serial.println("Fixed test vectors:");
+    Serial.print("Computing Alice's public key ... ");
+    Serial.flush();
+    unsigned long start = micros();
+    Curve25519::eval(result, alice_private, 0);
+    unsigned long elapsed = micros() - start;
+    if (memcmp(result, alice_public, 32) == 0) {
+        Serial.print("ok");
+    } else {
+        Serial.println("failed");
+        printNumber("actual  ", result);
+        printNumber("expected", alice_public);
+    }
+    Serial.print(" (elapsed ");
+    Serial.print(elapsed);
+    Serial.println(" us)");
+    Serial.print("Computing Bob's public key ... ");
+    Serial.flush();
+    start = micros();
+    Curve25519::eval(result, bob_private, 0);
+    elapsed = micros() - start;
+    if (memcmp(result, bob_public, 32) == 0) {
+        Serial.print("ok");
+    } else {
+        Serial.println("failed");
+        printNumber("actual  ", result);
+        printNumber("expected", bob_public);
+    }
+    Serial.print(" (elapsed ");
+    Serial.print(elapsed);
+    Serial.println(" us)");
+
+    // Compute the shared secret from each side.
+    Serial.print("Computing Alice's shared secret ... ");
+    Serial.flush();
+    start = micros();
+    Curve25519::eval(result, alice_private, bob_public);
+    elapsed = micros() - start;
+    if (memcmp(result, shared_secret, 32) == 0) {
+        Serial.print("ok");
+    } else {
+        Serial.println("failed");
+        printNumber("actual  ", result);
+        printNumber("expected", shared_secret);
+    }
+    Serial.print(" (elapsed ");
+    Serial.print(elapsed);
+    Serial.println(" us)");
+    Serial.print("Computing Bob's shared secret ... ");
+    Serial.flush();
+    start = micros();
+    Curve25519::eval(result, bob_private, alice_public);
+    elapsed = micros() - start;
+    if (memcmp(result, shared_secret, 32) == 0) {
+        Serial.print("ok");
+    } else {
+        Serial.println("failed");
+        printNumber("actual  ", result);
+        printNumber("expected", shared_secret);
+    }
+    Serial.print(" (elapsed ");
+    Serial.print(elapsed);
+    Serial.println(" us)");
+}
+
+void testDH()
+{
+    static uint8_t alice_k[32];
+    static uint8_t alice_f[32];
+    static uint8_t bob_k[32];
+    static uint8_t bob_f[32];
+
+    Serial.println("Diffie-Hellman key exchange:");
+    Serial.print("Generate random k/f for Alice ... ");
+    Serial.flush();
+    unsigned long start = micros();
+    Curve25519::dh1(alice_k, alice_f);
+    unsigned long elapsed = micros() - start;
+    Serial.print("elapsed ");
+    Serial.print(elapsed);
+    Serial.println(" us");
+
+    Serial.print("Generate random k/f for Bob ... ");
+    Serial.flush();
+    start = micros();
+    Curve25519::dh1(bob_k, bob_f);
+    elapsed = micros() - start;
+    Serial.print("elapsed ");
+    Serial.print(elapsed);
+    Serial.println(" us");
+
+    Serial.print("Generate shared secret for Alice ... ");
+    Serial.flush();
+    start = micros();
+    Curve25519::dh2(bob_k, alice_f);
+    elapsed = micros() - start;
+    Serial.print("elapsed ");
+    Serial.print(elapsed);
+    Serial.println(" us");
+
+    Serial.print("Generate shared secret for Bob ... ");
+    Serial.flush();
+    start = micros();
+    Curve25519::dh2(alice_k, bob_f);
+    elapsed = micros() - start;
+    Serial.print("elapsed ");
+    Serial.print(elapsed);
+    Serial.println(" us");
+
+    Serial.print("Check that the shared secrets match ... ");
+    if (memcmp(alice_k, bob_k, 32) == 0)
+        Serial.println("ok");
+    else
+        Serial.println("failed");
+}
+
+void setup()
+{
+    Serial.begin(9600);
+
+    // Start the random number generator.  We don't initialise a noise
+    // source here because we don't need one for testing purposes.
+    // Real DH applications should of course use a proper noise source.
+    RNG.begin("TestCurve25519 1.0", 500);
+
+    // Perform the tests.
+    testEval();
+    Serial.println();
+    testDH();
+    Serial.println();
+}
+
+void loop()
+{
+}
--- a/libraries/Crypto/examples/TestCurve25519Math/TestCurve25519Math.ino
+++ b/libraries/Crypto/examples/TestCurve25519Math/TestCurve25519Math.ino
@@ -0,0 +1,622 @@
+/*
+ * Copyright (C) 2015 Southern Storm Software, Pty Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+This example runs tests on the Curve25519 field mathematics independent
+of the full curve operation itself.
+*/
+
+// Enable access to the internals of Curve25519 to test the raw field ops.
+#define TEST_CURVE25519_FIELD_OPS 1
+
+#include <Crypto.h>
+#include <Curve25519.h>
+#include <utility/ProgMemUtil.h>
+#include <string.h>
+
+// Copy some definitions from the Curve25519 class for convenience.
+#define NUM_LIMBS   (32 / sizeof(Curve25519::limb_t))
+#define LIMB_BITS   (8 * sizeof(Curve25519::limb_t))
+#define limb_t Curve25519::limb_t
+#define dlimb_t Curve25519::dlimb_t
+#define INVERSE_LIMB (~((limb_t)0))
+
+// For simpleMod() below we need a type that is 4 times the size of limb_t.
+#if CURVE25519_LIMB_8BIT
+#define qlimb_t uint32_t
+#elif CURVE25519_LIMB_16BIT
+#define qlimb_t uint64_t
+#else
+#define CURVE25519_NO_QLIMB 1
+#endif
+
+limb_t arg1[NUM_LIMBS];
+limb_t arg2[NUM_LIMBS];
+limb_t result[NUM_LIMBS];
+limb_t result2[NUM_LIMBS * 2 + 1];
+limb_t temp[NUM_LIMBS];
+
+// Convert a decimal string in program memory into a number.
+void fromString(limb_t *x, uint8_t size, const char *str)
+{
+    uint8_t ch, posn;
+    memset(x, 0, sizeof(limb_t) * size);
+    while ((ch = pgm_read_byte((uint8_t *)str)) != '\0') {
+        if (ch >= '0' && ch <= '9') {
+            // Quick and simple method to multiply by 10 and add the new digit.
+            dlimb_t carry = ch - '0';
+            for (posn = 0; posn < size; ++posn) {
+                carry += ((dlimb_t)x[posn]) * 10U;
+                x[posn] = (limb_t)carry;
+                carry >>= LIMB_BITS;
+            }
+        }
+        ++str;
+    }
+}
+
+// Compare two numbers of NUM_LIMBS in length.  Returns -1, 0, or 1.
+int compare(const limb_t *x, const limb_t *y)
+{
+    for (uint8_t posn = NUM_LIMBS; posn > 0; --posn) {
+        limb_t a = x[posn - 1];
+        limb_t b = y[posn - 1];
+        if (a < b)
+            return -1;
+        else if (a > b)
+            return 1;
+    }
+    return 0;
+}
+
+// Compare two numbers where one is a decimal string.  Returns -1, 0, or 1.
+int compare(const limb_t *x, const char *y)
+{
+    limb_t val[NUM_LIMBS];
+    fromString(val, NUM_LIMBS, y);
+    return compare(x, val);
+}
+
+void printNumber(const char *name, const limb_t *x)
+{
+    static const char hexchars[] = "0123456789ABCDEF";
+    Serial.print(name);
+    Serial.print(" = ");
+    for (uint8_t posn = 0; posn < NUM_LIMBS; ++posn) {
+        for (uint8_t bit = LIMB_BITS; bit > 0; ) {
+            bit -= 4;
+            Serial.print(hexchars[(x[posn] >> bit) & 0x0F]);
+        }
+        Serial.print(' ');
+    }
+    Serial.println();
+}
+
+// Standard numbers that are useful in field operation tests.
+char const num_0[] PROGMEM = "0";
+char const num_1[] PROGMEM = "1";
+char const num_2[] PROGMEM = "2";
+char const num_4[] PROGMEM = "4";
+char const num_5[] PROGMEM = "5";
+char const num_128[] PROGMEM = "128";
+char const num_256[] PROGMEM = "256";
+char const num_2_64_m7[] PROGMEM = "18446744073709551609"; // 2^64 - 7
+char const num_2_129_m5[] PROGMEM = "680564733841876926926749214863536422907"; // 2^129 - 5
+char const num_pi[] PROGMEM = "31415926535897932384626433832795028841971693993751058209749445923078164062862"; // 77 digits of pi
+char const num_2_255_m253[] PROGMEM = "57896044618658097711785492504343953926634992332820282019728792003956564819715"; // 2^255 - 253
+char const num_2_255_m20[] PROGMEM = "57896044618658097711785492504343953926634992332820282019728792003956564819948"; // 2^255 - 20
+char const num_2_255_m19[] PROGMEM = "57896044618658097711785492504343953926634992332820282019728792003956564819949"; // 2^255 - 19
+char const num_2_255_m19_x2[] PROGMEM = "115792089237316195423570985008687907853269984665640564039457584007913129639898"; // (2^255 - 19) * 2
+char const num_a24[] PROGMEM = "121665";
+
+// Table of useful numbers less than 2^255 - 19.
+const char * const numbers[] = {
+    num_0,
+    num_1,
+    num_2,
+    num_4,
+    num_5,
+    num_128,
+    num_256,
+    num_2_64_m7,
+    num_2_129_m5,
+    num_pi,
+    num_2_255_m253,
+    num_2_255_m20,
+    0
+};
+#define numbers_count   ((sizeof(numbers) / sizeof(numbers[0])) - 1)
+
+#define foreach_number(var) \
+    const char *var = numbers[0]; \
+    for (unsigned index##var = 0; index##var < numbers_count; \
+         ++index##var, var = numbers[index##var])
+
+void printProgMem(const char *str)
+{
+    uint8_t ch;
+    while ((ch = pgm_read_byte((uint8_t *)str)) != '\0') {
+        Serial.print((char)ch);
+        ++str;
+    }
+}
+
+// Simple implementation of modular addition to cross-check the library.
+void simpleAdd(limb_t *result, const limb_t *x, const limb_t *y)
+{
+    uint8_t posn;
+    dlimb_t carry = 0;
+    for (posn = 0; posn < NUM_LIMBS; ++posn) {
+        carry += x[posn];
+        carry += y[posn];
+        result[posn] = (limb_t)carry;
+        carry >>= LIMB_BITS;
+    }
+    if (compare(result, num_2_255_m19) >= 0) {
+        // Subtract 2^255 - 19 to get the final result.
+        // Same as add 19 and then subtract 2^255.
+        carry = 19;
+        for (posn = 0; posn < NUM_LIMBS; ++posn) {
+            carry += result[posn];
+            result[posn] = (limb_t)carry;
+            carry >>= LIMB_BITS;
+        }
+        result[NUM_LIMBS - 1] -= ((limb_t)1) << (LIMB_BITS - 1);
+    }
+}
+
+// Simple implementation of subtraction to cross-check the library.
+// Note: this does not reduce the result modulo 2^255 - 19 and we
+// assume that x is greater than or equal to y.
+void simpleSub(limb_t *result, const limb_t *x, const limb_t *y)
+{
+    uint8_t posn;
+    dlimb_t borrow = 0;
+    for (posn = 0; posn < NUM_LIMBS; ++posn) {
+        borrow = ((dlimb_t)x[posn]) - y[posn] - borrow;
+        result[posn] = (limb_t)borrow;
+        borrow = (borrow >> LIMB_BITS) != 0;
+    }
+}
+
+// Simple implementation of multiplication to cross-check the library.
+// Note: this does not reduce the result modulo 2^255 - 19.
+// The "result" buffer must contain at least NUM_LIMBS * 2 limbs.
+void simpleMul(limb_t *result, const limb_t *x, const limb_t *y)
+{
+    memset(result, 0, NUM_LIMBS * 2 * sizeof(limb_t));
+    for (uint8_t i = 0; i < NUM_LIMBS; ++i) {
+        for (uint8_t j = 0; j < NUM_LIMBS; ++j) {
+            uint8_t n = i + j;
+            dlimb_t carry =
+                ((dlimb_t)x[i]) * y[j] + result[n];
+            result[n] = (limb_t)carry;
+            carry >>= LIMB_BITS;
+            ++n;
+            while (carry != 0 && n < (NUM_LIMBS * 2)) {
+                carry += result[n];
+                result[n] = (limb_t)carry;
+                carry >>= LIMB_BITS;
+                ++n;
+            }
+        }
+    }
+}
+
+#if defined(CURVE25519_NO_QLIMB)
+
+// Quick check to correct the estimate on a quotient word.
+static inline limb_t correctEstimate
+    (limb_t q, limb_t y1, limb_t y2, dlimb_t x01, limb_t x2)
+{
+    // Algorithm D from section 4.3.1 of "The Art Of Computer Programming",
+    // D. Knuth, Volume 2, "Seminumerical Algorithms", Second Edition, 1981.
+    //
+    // We want to check if (y2 * q) > ((x01 - y1 * q) * b + x2) where
+    // b is (1 << LIMB_BITS).  If it is, then q must be reduced by 1.
+    //
+    // One wrinkle that isn't obvious from Knuth's description is that it
+    // is possible for (x01 - y1 * q) >= b, especially in the case where
+    // x0 = y1 and q = b - 1.  This will cause an overflow of the intermediate
+    // double-word result ((x01 - y1 * q) * b).
+    //
+    // In assembly language, we could use the carry flag to detect when
+    // (x01 - y1 * q) * b overflows, but we can't access the carry flag
+    // in C++.  So we have to account for the carry in a different way here.
+
+    // Calculate the remainder using the estimated quotient.
+    dlimb_t r = x01 - ((dlimb_t)y1) * q;
+
+    // If there will be a double-word carry when we calculate (r * b),
+    // then (y2 * q) is obviously going to be less than (r * b), so we
+    // can stop here.  The estimated quotient is correct.
+    if (r & (((dlimb_t)INVERSE_LIMB) << LIMB_BITS))
+        return q;
+
+    // Bail out if (y2 * q) <= (r * b + x2).  The estimate is correct.
+    dlimb_t y2q = ((dlimb_t)y2) * q;
+    if (y2q <= ((r << LIMB_BITS) + x2))
+        return q;
+
+    // Correct for the estimated quotient being off by 1.
+    --q;
+
+    // Now repeat the check to correct for q values that are off by 2.
+    r += y1;    // r' = (x01 - y1 * (q - 1)) = (x01 - y1 * q + y2) = r + y1
+    if (r & (((dlimb_t)INVERSE_LIMB) << LIMB_BITS))
+        return q;
+    // y2q' = (y2 * (q - 1)) = (y2 * q - y2) = y2q - y2
+    if ((y2q - y2) <= ((r << LIMB_BITS) + x2))
+        return q;
+
+    // Perform the final correction for q values that are off by 2.
+    return q - 1;
+}
+
+#endif
+
+// Simple implementation of modular division to cross-check the library.
+// Calling this "simple" is a bit of a misnomer.  It is a full implementation
+// of Algorithm D from section 4.3.1 of "The Art Of Computer Programming",
+// D. Knuth, Volume 2, "Seminumerical Algorithms", Second Edition, 1981.
+// This is quite slow on embedded platforms, but it should be correct.
+// Note: "x" is assumed to be (NUM_LIMBS * 2 + 1) limbs in size because
+// we need a limb for the extra leading zero word added by step D1.
+void simpleMod(limb_t *x)
+{
+    limb_t divisor[NUM_LIMBS];
+    uint8_t j, k;
+
+    // Step D1. Normalize.
+    // The divisor (2^255 - 19) and "x" need to be shifted left until
+    // the top-most bit of the divisor is 1.  Since we know that the
+    // next-to-top-most bit of (2^255 - 19) is already 1 and the top-most
+    // bit of "x" is zero, shifting everything into place is pretty easy.
+    fromString(divisor, NUM_LIMBS, num_2_255_m19_x2);
+    for (j = (NUM_LIMBS * 2); j > 1; --j) {
+        x[j - 1] = (x[j - 1] << 1) | (x[j - 2] >> (LIMB_BITS - 1));
+    }
+    x[0] <<= 1;
+    x[NUM_LIMBS * 2] = 0;   // Extra leading word.
+
+    // Step D2/D7. Loop on j
+    for (j = 0; j <= NUM_LIMBS; ++j) {
+        // Step D3. Calculate an estimate of the top-most quotient word.
+        limb_t *u = x + NUM_LIMBS * 2 - 2 - j;
+        limb_t *v = divisor + NUM_LIMBS - 2;
+        limb_t q;
+        dlimb_t uword = ((((dlimb_t)u[2]) << LIMB_BITS) + u[1]);
+        if (u[2] == v[1])
+            q = ~((limb_t)0);
+        else
+            q = (limb_t)(uword / v[1]);
+
+        // Step D3, part 2.  Correct the estimate downwards by 1 or 2.
+        // One subtlety of Knuth's algorithm is that it looks like the test
+        // is working with double-word quantities but it is actually using
+        // double-word plus a carry bit.  So we need to use qlimb_t for this.
+#if !defined(CURVE25519_NO_QLIMB)
+        qlimb_t test = ((((qlimb_t)uword) - ((dlimb_t)q) * v[1]) << LIMB_BITS) + u[0];
+        if ((((dlimb_t)q) * v[0]) > test) {
+            --q;
+            test = ((((qlimb_t)uword) - ((dlimb_t)q) * v[1]) << LIMB_BITS) + u[0];
+            if ((((dlimb_t)q) * v[0]) > test)
+                --q;
+        }
+#else
+        // 32-bit platform - we don't have a 128-bit numeric type so we have
+        // to calculate the estimate in another way to preserve the carry bit.
+        q = correctEstimate(q, v[0], v[1], uword, u[0]);
+#endif
+
+        // Step D4. Multiply and subtract.
+        u = x + (NUM_LIMBS - j);
+        v = divisor;
+        dlimb_t carry = 0;
+        dlimb_t borrow = 0;
+        for (k = 0; k < NUM_LIMBS; ++k) {
+            carry += ((dlimb_t)v[k]) * q;
+            borrow = ((dlimb_t)u[k]) - ((limb_t)carry) - borrow;
+            u[k] = (dlimb_t)borrow;
+            carry >>= LIMB_BITS;
+            borrow = ((borrow >> LIMB_BITS) != 0);
+        }
+        borrow = ((dlimb_t)u[k]) - ((limb_t)carry) - borrow;
+        u[k] = (dlimb_t)borrow;
+
+        // Step D5. Test remainder.  Nothing further to do if no borrow.
+        if ((borrow >> LIMB_BITS) == 0)
+            continue;
+
+        // Step D6. Borrow occurred: add back.
+        carry = 0;
+        for (k = 0; k < NUM_LIMBS; ++k) {
+            carry += u[k];
+            carry += v[k];
+            u[k] = (limb_t)carry;
+            carry >>= LIMB_BITS;
+        }
+        u[k] += (limb_t)carry;
+    }
+
+    // Step D8. Unnormalize.
+    // Shift the remainder right by 1 bit to undo the earlier left shift.
+    for (j = 0; j < (NUM_LIMBS - 1); ++j) {
+        x[j] = (x[j] >> 1) | (x[j + 1] << (LIMB_BITS - 1));
+    }
+    x[NUM_LIMBS - 1] >>= 1;
+}
+
+void testAdd(const char *x, const char *y)
+{
+    printProgMem(x);
+    Serial.print(" + ");
+    printProgMem(y);
+    Serial.print(": ");
+    Serial.flush();
+
+    fromString(arg1, NUM_LIMBS, x);
+    fromString(arg2, NUM_LIMBS, y);
+    Curve25519::add(result, arg1, arg2);
+
+    simpleAdd(result2, arg1, arg2);
+
+    if (compare(result, result2) == 0) {
+        Serial.println("ok");
+    } else {
+        Serial.println("failed");
+        printNumber("actual  ", result);
+        printNumber("expected", result2);
+    }
+}
+
+void testAdd()
+{
+    Serial.println("Addition:");
+    foreach_number (x) {
+        foreach_number (y) {
+            testAdd(x, y);
+        }
+    }
+    Serial.println();
+}
+
+void testSub(const char *x, const char *y)
+{
+    printProgMem(x);
+    Serial.print(" - ");
+    printProgMem(y);
+    Serial.print(": ");
+    Serial.flush();
+
+    fromString(arg1, NUM_LIMBS, x);
+    fromString(arg2, NUM_LIMBS, y);
+    Curve25519::sub(result, arg1, arg2);
+
+    if (compare(arg1, arg2) >= 0) {
+        // First argument is larger than the second.
+        simpleSub(result2, arg1, arg2);
+    } else {
+        // First argument is smaller than the second.
+        // Compute arg1 + (2^255 - 19 - arg2).
+        fromString(temp, NUM_LIMBS, num_2_255_m19);
+        simpleSub(result2, temp, arg2);
+        simpleAdd(result2, arg1, result2);
+    }
+
+    if (compare(result, result2) == 0) {
+        Serial.println("ok");
+    } else {
+        Serial.println("failed");
+        printNumber("actual  ", result);
+        printNumber("expected", result2);
+    }
+}
+
+void testSub()
+{
+    Serial.println("Subtraction:");
+    foreach_number (x) {
+        foreach_number (y) {
+            testSub(x, y);
+        }
+    }
+    Serial.println();
+}
+
+void testMul(const char *x, const char *y)
+{
+    printProgMem(x);
+    Serial.print(" * ");
+    printProgMem(y);
+    Serial.print(": ");
+    Serial.flush();
+
+    fromString(arg1, NUM_LIMBS, x);
+    fromString(arg2, NUM_LIMBS, y);
+
+    if (compare(arg1, arg2) != 0)
+        Curve25519::mul(result, arg1, arg2);
+    else
+        Curve25519::square(result, arg1);
+
+    simpleMul(result2, arg1, arg2);
+    simpleMod(result2);
+
+    if (compare(result, result2) == 0) {
+        Serial.println("ok");
+    } else {
+        Serial.println("failed");
+        printNumber("actual  ", result);
+        printNumber("expected", result2);
+    }
+}
+
+void testMul()
+{
+    Serial.println("Multiplication:");
+    foreach_number (x) {
+        foreach_number (y) {
+            testMul(x, y);
+        }
+    }
+    Serial.println();
+}
+
+void testMulA24(const char *x)
+{
+    printProgMem(x);
+    Serial.print(" * ");
+    printProgMem(num_a24);
+    Serial.print(": ");
+    Serial.flush();
+
+    fromString(arg1, NUM_LIMBS, x);
+    fromString(arg2, NUM_LIMBS, num_a24);
+    Curve25519::mulA24(result, arg1);
+
+    simpleMul(result2, arg1, arg2);
+    simpleMod(result2);
+
+    if (compare(result, result2) == 0) {
+        Serial.println("ok");
+    } else {
+        Serial.println("failed");
+        printNumber("actual  ", result);
+        printNumber("expected", result2);
+    }
+}
+
+void testMulA24()
+{
+    Serial.println("Multiplication by a24:");
+    foreach_number (x) {
+        testMulA24(x);
+    }
+    Serial.println();
+}
+
+void testSwap(const char *x, const char *y, uint8_t select)
+{
+    printProgMem(x);
+    Serial.print(" <-> ");
+    printProgMem(y);
+    Serial.print(": ");
+    Serial.flush();
+
+    fromString(arg1, NUM_LIMBS, x);
+    fromString(arg2, NUM_LIMBS, y);
+
+    memcpy(result, arg1, NUM_LIMBS * sizeof(limb_t));
+    memcpy(result2, arg2, NUM_LIMBS * sizeof(limb_t));
+
+    // Swap the values using the selection bit.
+    Curve25519::cswap(select, result, result2);
+    bool ok = compare(result, arg2) == 0 && compare(result2, arg1) == 0;
+
+    // Don't swap the values back yet.
+    Curve25519::cswap(0, result, result2);
+    if (ok)
+        ok = compare(result, arg2) == 0 && compare(result2, arg1) == 0;
+
+    // Swap the values back.
+    Curve25519::cswap(select, result, result2);
+    if (ok)
+        ok = compare(result, arg1) == 0 && compare(result2, arg2) == 0;
+
+    // No swap.
+    Curve25519::cswap(0, result, result2);
+    if (ok)
+        ok = compare(result, arg1) == 0 && compare(result2, arg2) == 0;
+
+    if (ok) {
+        Serial.println("ok");
+    } else {
+        Serial.println("failed");
+    }
+}
+
+void testSwap()
+{
+    Serial.println("Swap:");
+    uint8_t bit = 0;
+    foreach_number (x) {
+        foreach_number (y) {
+            testSwap(x, y, ((uint8_t)1) << bit);
+            bit = (bit + 1) % 8;
+        }
+    }
+    Serial.println();
+}
+
+void testRecip(const char *x)
+{
+    printProgMem(x);
+    Serial.print("^-1");
+    Serial.print(": ");
+    Serial.flush();
+
+    fromString(arg1, NUM_LIMBS, x);
+    Curve25519::recip(result, arg1);
+
+    bool ok;
+    if (compare(arg1, num_0) == 0) {
+        // 0^-1 = 0
+        ok = (compare(result, num_0) == 0);
+    } else {
+        // Multiply the result with arg1 - we expect 1 as the result.
+        Curve25519::mul(result2, result, arg1);
+        ok = (compare(result2, num_1) == 0);
+    }
+
+    if (ok) {
+        Serial.println("ok");
+    } else {
+        Serial.println("failed");
+        printNumber("actual", result);
+    }
+}
+
+void testRecip()
+{
+    Serial.println("Reciprocal:");
+    foreach_number (x) {
+        testRecip(x);
+    }
+    Serial.println();
+}
+
+void setup()
+{
+    Serial.begin(9600);
+
+    testAdd();
+    testSub();
+    testMul();
+    testMulA24();
+    testSwap();
+    testRecip();
+}
+
+void loop()
+{
+}
--- a/libraries/Crypto/keywords.txt
+++ b/libraries/Crypto/keywords.txt
@@ -7,6 +7,8 @@ BLAKE2s	KEYWORD1
 SHA1	KEYWORD1
 SHA256	KEYWORD1

+Curve25519	KEYWORD1
+
 CBC	KEYWORD1
 CFB	KEYWORD1
 CTR	KEYWORD1
@@ -38,3 +40,7 @@ save	KEYWORD2
 loop	KEYWORD2
 destroy	KEYWORD2
 calibrating	KEYWORD2
+
+eval	KEYWORD2
+dh1	KEYWORD2
+dh2	KEYWORD2