From 9ff24b0ddff4329e89f9cfc387dcf19970c98f64 Mon Sep 17 00:00:00 2001 From: Rhys Weatherley Date: Sat, 26 Mar 2016 19:03:31 +1000 Subject: [PATCH] Implementation of the NIST P-521 curve --- doc/crypto.dox | 14 +- doc/mainpage.dox | 2 +- host/Crypto/Makefile | 3 + libraries/Crypto/BigNumberUtil.cpp | 19 + libraries/Crypto/BigNumberUtil.h | 2 + libraries/Crypto/P521.cpp | 1642 +++++++++++++++++ libraries/Crypto/P521.h | 112 ++ .../Crypto/examples/TestP521/TestP521.ino | 519 ++++++ .../examples/TestP521Math/TestP521Math.ino | 612 ++++++ libraries/Crypto/utility/LimbUtil.h | 8 +- 10 files changed, 2928 insertions(+), 5 deletions(-) create mode 100644 libraries/Crypto/P521.cpp create mode 100644 libraries/Crypto/P521.h create mode 100644 libraries/Crypto/examples/TestP521/TestP521.ino create mode 100644 libraries/Crypto/examples/TestP521Math/TestP521Math.ino diff --git a/doc/crypto.dox b/doc/crypto.dox index 07af41c1..0529741f 100644 --- a/doc/crypto.dox +++ b/doc/crypto.dox @@ -33,7 +33,7 @@ \li Hash algorithms: SHA256, SHA512, SHA3_256, SHA3_512, BLAKE2s, BLAKE2b (regular and HMAC modes) \li Extendable output functions (XOF's): SHAKE128, SHAKE256 \li Message authenticators: Poly1305, GHASH, OMAC -\li Public key algorithms: Curve25519, Ed25519 +\li Public key algorithms: Curve25519, Ed25519, P521 \li Random number generation: \link RNGClass RNG\endlink, TransistorNoiseSource, RingOscillatorNoiseSource All cryptographic algorithms have been optimized for 8-bit Arduino platforms @@ -129,6 +129,12 @@ Ardunino Mega 2560 running at 16 MHz are similar: Ed25519::sign()5148msDigital signature generation Ed25519::verify()8196msDigital signature verification Ed25519::derivePublicKey()5102msDerive a public key from a private key +P521::eval()46290msRaw curve evaluation +P521::dh1()46293msFirst half of Diffie-Hellman key agreement +P521::dh2()46304msSecond half of Diffie-Hellman key agreement +P521::sign()60514msDigital signature generation +P521::verify()109078msDigital signature verification +P521::derivePublicKey()46290msDerive a public key from a private key Where a cipher supports more than one key size (such as ChaCha), the values @@ -196,5 +202,11 @@ All figures are for the Arduino Due running at 84 MHz: Ed25519::sign()195msDigital signature generation Ed25519::verify()306msDigital signature verification Ed25519::derivePublicKey()194msDerive a public key from a private key +P521::eval()1503msRaw curve evaluation +P521::dh1()1503msFirst half of Diffie-Hellman key agreement +P521::dh2()1503msSecond half of Diffie-Hellman key agreement +P521::sign()1860msDigital signature generation +P521::verify()3423msDigital signature verification +P521::derivePublicKey()1503msDerive a public key from a private key */ diff --git a/doc/mainpage.dox b/doc/mainpage.dox index c8ed795f..3aa6b707 100644 --- a/doc/mainpage.dox +++ b/doc/mainpage.dox @@ -98,7 +98,7 @@ realtime clock and the LCD library to implement an alarm clock. \li Hash algorithms: SHA256, SHA512, SHA3_256, SHA3_512, BLAKE2s, BLAKE2b (regular and HMAC modes) \li Extendable output functions (XOF's): SHAKE128, SHAKE256 \li Message authenticators: Poly1305, GHASH, OMAC -\li Public key algorithms: Curve25519, Ed25519 +\li Public key algorithms: Curve25519, Ed25519, P521 \li Random number generation: \link RNGClass RNG\endlink, TransistorNoiseSource, RingOscillatorNoiseSource More information can be found on the \ref crypto "Cryptographic Library" page. diff --git a/host/Crypto/Makefile b/host/Crypto/Makefile index bbffdf64..f3e28bd3 100644 --- a/host/Crypto/Makefile +++ b/host/Crypto/Makefile @@ -47,6 +47,7 @@ SOURCES = \ NoiseSource.cpp \ OFB.cpp \ OMAC.cpp \ + P521.cpp \ Poly1305.cpp \ RNG_host.cpp \ SHA256.cpp \ @@ -76,6 +77,8 @@ SKETCHES = \ TestGCM/TestGCM.ino \ TestGHASH/TestGHASH.ino \ TestOFB/TestOFB.ino \ + TestP521/TestP521.ino \ + TestP521Math/TestP521Math.ino \ TestPoly1305/TestPoly1305.ino \ TestSHA256/TestSHA256.ino \ TestSHA3_256/TestSHA3_256.ino \ diff --git a/libraries/Crypto/BigNumberUtil.cpp b/libraries/Crypto/BigNumberUtil.cpp index 1c413cb4..d942ab22 100644 --- a/libraries/Crypto/BigNumberUtil.cpp +++ b/libraries/Crypto/BigNumberUtil.cpp @@ -634,3 +634,22 @@ void BigNumberUtil::reduceQuick_P(limb_t *result, const limb_t *x, --size; } } + +/** + * \brief Determine if a big number is zero. + * + * \param x Points to the number to test. + * \param size The number of limbs in \a x. + * \return Returns 1 if \a x is zero or 0 otherwise. + * + * This function attempts to make the determination in constant time. + */ +limb_t BigNumberUtil::isZero(const limb_t *x, size_t size) +{ + limb_t word = 0; + while (size > 0) { + word |= *x++; + --size; + } + return (limb_t)(((((dlimb_t)1) << LIMB_BITS) - word) >> LIMB_BITS); +} diff --git a/libraries/Crypto/BigNumberUtil.h b/libraries/Crypto/BigNumberUtil.h index 83f6d63f..2bf2724e 100644 --- a/libraries/Crypto/BigNumberUtil.h +++ b/libraries/Crypto/BigNumberUtil.h @@ -86,6 +86,8 @@ public: static void reduceQuick_P(limb_t *result, const limb_t *x, const limb_t *y, size_t size); + static limb_t isZero(const limb_t *x, size_t size); + private: // Constructor and destructor are private - cannot instantiate this class. BigNumberUtil() {} diff --git a/libraries/Crypto/P521.cpp b/libraries/Crypto/P521.cpp new file mode 100644 index 00000000..8ebeb2d0 --- /dev/null +++ b/libraries/Crypto/P521.cpp @@ -0,0 +1,1642 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "P521.h" +#include "Crypto.h" +#include "RNG.h" +#include "SHA512.h" +#include "utility/LimbUtil.h" +#include +#include + +/** + * \class P521 P521.h + * \brief Elliptic curve operations with the NIST P-521 curve. + * + * This class supports both ECDH key exchange and ECDSA signatures. + * + * \note The public functions in this class need a substantial amount of + * stack space to store intermediate results while the curve function is + * being evaluated. About 2k of free stack space is recommended for safety. + * + * References: NIST FIPS 186-4, + * RFC 6090, + * RFC 6979, + * RFC 5903 + * + * \sa Curve25519 + */ + +// Number of limbs that are needed to represent a 521-bit number. +#define NUM_LIMBS_521BIT NUM_LIMBS_BITS(521) + +// Number of limbs that are needed to represent a 1042-bit number. +// To simply things we also require that this be twice the size of +// NUM_LIMB_521BIT which involves a little wastage at the high end +// of one extra limb for 8-bit and 32-bit limbs. There is no +// wastage for 16-bit limbs. +#define NUM_LIMBS_1042BIT (NUM_LIMBS_BITS(521) * 2) + +// The overhead of clean() calls in mul(), etc can add up to a lot of +// processing time. Only do such cleanups if strict mode has been enabled. +#if defined(P521_STRICT_CLEAN) +#define strict_clean(x) clean(x) +#else +#define strict_clean(x) do { ; } while (0) +#endif + +// Expand the partial 9-bit left over limb at the top of a 521-bit number. +#if BIGNUMBER_LIMB_8BIT +#define LIMB_PARTIAL(value) ((uint8_t)(value)), \ + ((uint8_t)((value) >> 8)) +#else +#define LIMB_PARTIAL(value) (value) +#endif + +/** @cond */ + +// The group order "q" value from RFC 4754 and RFC 5903. This is the +// same as the "n" value from Appendix D.1.2.5 of NIST FIPS 186-4. +static limb_t const P521_q[NUM_LIMBS_521BIT] PROGMEM = { + LIMB(0x91386409), LIMB(0xbb6fb71e), LIMB(0x899c47ae), LIMB(0x3bb5c9b8), + LIMB(0xf709a5d0), LIMB(0x7fcc0148), LIMB(0xbf2f966b), LIMB(0x51868783), + LIMB(0xfffffffa), LIMB(0xffffffff), LIMB(0xffffffff), LIMB(0xffffffff), + LIMB(0xffffffff), LIMB(0xffffffff), LIMB(0xffffffff), LIMB(0xffffffff), + LIMB_PARTIAL(0x1ff) +}; + +// The "b" value from Appendix D.1.2.5 of NIST FIPS 186-4. +static limb_t const P521_b[NUM_LIMBS_521BIT] PROGMEM = { + LIMB(0x6b503f00), LIMB(0xef451fd4), LIMB(0x3d2c34f1), LIMB(0x3573df88), + LIMB(0x3bb1bf07), LIMB(0x1652c0bd), LIMB(0xec7e937b), LIMB(0x56193951), + LIMB(0x8ef109e1), LIMB(0xb8b48991), LIMB(0x99b315f3), LIMB(0xa2da725b), + LIMB(0xb68540ee), LIMB(0x929a21a0), LIMB(0x8e1c9a1f), LIMB(0x953eb961), + LIMB_PARTIAL(0x051) +}; + +// The "Gx" value from Appendix D.1.2.5 of NIST FIPS 186-4. +static limb_t const P521_Gx[NUM_LIMBS_521BIT] PROGMEM = { + LIMB(0xc2e5bd66), LIMB(0xf97e7e31), LIMB(0x856a429b), LIMB(0x3348b3c1), + LIMB(0xa2ffa8de), LIMB(0xfe1dc127), LIMB(0xefe75928), LIMB(0xa14b5e77), + LIMB(0x6b4d3dba), LIMB(0xf828af60), LIMB(0x053fb521), LIMB(0x9c648139), + LIMB(0x2395b442), LIMB(0x9e3ecb66), LIMB(0x0404e9cd), LIMB(0x858e06b7), + LIMB_PARTIAL(0x0c6) +}; + +// The "Gy" value from Appendix D.1.2.5 of NIST FIPS 186-4. +static limb_t const P521_Gy[NUM_LIMBS_521BIT] PROGMEM = { + LIMB(0x9fd16650), LIMB(0x88be9476), LIMB(0xa272c240), LIMB(0x353c7086), + LIMB(0x3fad0761), LIMB(0xc550b901), LIMB(0x5ef42640), LIMB(0x97ee7299), + LIMB(0x273e662c), LIMB(0x17afbd17), LIMB(0x579b4468), LIMB(0x98f54449), + LIMB(0x2c7d1bd9), LIMB(0x5c8a5fb4), LIMB(0x9a3bc004), LIMB(0x39296a78), + LIMB_PARTIAL(0x118) +}; + +/** @endcond */ + +/** + * \brief Evaluates the curve function. + * + * \param result The result of applying the curve function, which consists + * of the x and y values of the result point encoded in big-endian order. + * \param f The scalar value to multiply by \a point to create the \a result. + * This is assumed to be be a 521-bit number in big-endian order. + * \param point The curve point to multiply consisting of the x and y + * values encoded in big-endian order. If \a point is NULL, then the + * generator Gx and Gy values for the curve will be used instead. + * + * \return Returns true if \a f * \a point could be evaluated, or false if + * \a point is not a point on the curve. + * + * This function provides access to the raw curve operation for testing + * purposes. Normally an application would use a higher-level function + * like dh1(), dh2(), sign(), or verify(). + * + * \sa dh1(), sign() + */ +bool P521::eval(uint8_t result[132], const uint8_t f[66], const uint8_t point[132]) +{ + limb_t x[NUM_LIMBS_521BIT]; + limb_t y[NUM_LIMBS_521BIT]; + bool ok; + + // Unpack the curve point from the parameters and validate it. + if (point) { + BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, point, 66); + BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, point + 66, 66); + ok = validate(x, y); + } else { + memcpy_P(x, P521_Gx, sizeof(x)); + memcpy_P(y, P521_Gy, sizeof(y)); + ok = true; + } + + // Evaluate the curve function. + evaluate(x, y, f); + + // Pack the answer into the result array. + BigNumberUtil::packBE(result, 66, x, NUM_LIMBS_521BIT); + BigNumberUtil::packBE(result + 66, 66, y, NUM_LIMBS_521BIT); + + // Clean up. + clean(x); + clean(y); + return ok; +} + +/** + * \brief Performs phase 1 of an ECDH key exchange using P-521. + * + * \param k The key value to send to the other party as part of the exchange. + * \param f The generated secret value for this party. This must not be + * transmitted to any party or stored in permanent storage. It only needs + * to be kept in memory until dh2() is called. + * + * The \a f value is generated with \link RNGClass::rand() RNG.rand()\endlink. + * It is the caller's responsibility to ensure that the global random number + * pool has sufficient entropy to generate the 66 bytes of \a f safely + * before calling this function. + * + * The following example demonstrates how to perform a full ECDH + * key exchange using dh1() and dh2(): + * + * \code + * uint8_t f[66]; + * uint8_t k[132]; + * + * // Generate the secret value "f" and the public value "k". + * P521::dh1(k, f); + * + * // Send "k" to the other party. + * ... + * + * // Read the "k" value that the other party sent to us. + * ... + * + * // Generate the shared secret in "f". + * if (!P521::dh2(k, f)) { + * // The received "k" value was invalid - abort the session. + * ... + * } + * + * // The "f" value can now be used to generate session keys for encryption. + * ... + * \endcode + * + * Reference: RFC 6090 + * + * \sa dh2() + */ +void P521::dh1(uint8_t k[132], uint8_t f[66]) +{ + generatePrivateKey(f); + derivePublicKey(k, f); +} + +/** + * \brief Performs phase 2 of an ECDH key exchange using P-521. + * + * \param k The public key value that was received from the other + * party as part of the exchange. + * \param f On entry, this is the secret value for this party that was + * generated by dh1(). On exit, this will be the shared secret. + * + * \return Returns true if the key exchange was successful, or false if + * the \a k value is invalid. + * + * Reference: RFC 6090 + * + * \sa dh1() + */ +bool P521::dh2(const uint8_t k[132], uint8_t f[66]) +{ + // Unpack the (x, y) point from k. + limb_t x[NUM_LIMBS_521BIT]; + limb_t y[NUM_LIMBS_521BIT]; + BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, k, 66); + BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, k + 66, 66); + + // Validate the curve point. We keep going to preserve the timing. + bool ok = validate(x, y); + + // Evaluate the curve function. + evaluate(x, y, f); + + // The secret key is the x component of the final value. + BigNumberUtil::packBE(f, 66, x, NUM_LIMBS_521BIT); + + // Clean up. + clean(x); + clean(y); + return ok; +} + +/** + * \brief Signs a message using a specific P-521 private key. + * + * \param signature The signature value. + * \param privateKey The private key to use to sign the message. + * \param message Points to the message to be signed. + * \param len The length of the \a message to be signed. + * \param hash The hash algorithm to use to hash the \a message before signing. + * If \a hash is NULL, then the \a message is assumed to already be a hash + * value from some previous process. + * + * This function generates deterministic ECDSA signatures according to + * RFC 6979. The \a hash function is used to generate the k value for + * the signature. If \a hash is NULL, then SHA512 is used. + * The \a hash object must be capable of HMAC mode. + * + * The length of the hashed message must be less than or equal to 64 + * bytes in size. Longer messages will be truncated to 64 bytes. + * + * References: RFC 6090, + * RFC 6979 + * + * \sa verify(), generatePrivateKey() + */ +void P521::sign(uint8_t signature[132], const uint8_t privateKey[66], + const void *message, size_t len, Hash *hash) +{ + uint8_t hm[66]; + uint8_t k[66]; + limb_t x[NUM_LIMBS_521BIT]; + limb_t y[NUM_LIMBS_521BIT]; + limb_t t[NUM_LIMBS_521BIT]; + uint64_t count = 0; + + // Format the incoming message, hashing it if necessary. + if (hash) { + // Hash the message. + hash->reset(); + hash->update(message, len); + len = hash->hashSize(); + if (len > 64) + len = 64; + memset(hm, 0, 66 - len); + hash->finalize(hm + 66 - len, len); + } else { + // The message is the hash. + if (len > 64) + len = 64; + memset(hm, 0, 66 - len); + memcpy(hm + 66 - len, message, len); + } + + // Keep generating k values until both r and s are non-zero. + for (;;) { + // Generate the k value deterministically according to RFC 6979. + if (hash) + generateK(k, hm, privateKey, hash, count); + else + generateK(k, hm, privateKey, count); + + // Generate r = kG.x mod q. + memcpy_P(x, P521_Gx, sizeof(x)); + memcpy_P(y, P521_Gy, sizeof(y)); + evaluate(x, y, k); + BigNumberUtil::reduceQuick_P(x, x, P521_q, NUM_LIMBS_521BIT); + BigNumberUtil::packBE(signature, 66, x, NUM_LIMBS_521BIT); + + // If r is zero, then we need to generate a new k value. + // This is utterly improbable, but let's be safe anyway. + if (BigNumberUtil::isZero(x, NUM_LIMBS_521BIT)) { + ++count; + continue; + } + + // Generate s = (privateKey * r + hm) / k mod q. + BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, privateKey, 66); + mulQ(y, y, x); + BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, hm, 66); + BigNumberUtil::add(x, x, y, NUM_LIMBS_521BIT); + BigNumberUtil::reduceQuick_P(x, x, P521_q, NUM_LIMBS_521BIT); + BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, k, 66); + recipQ(t, y); + mulQ(x, x, t); + BigNumberUtil::packBE(signature + 66, 66, x, NUM_LIMBS_521BIT); + + // Exit the loop if s is non-zero. + if (!BigNumberUtil::isZero(x, NUM_LIMBS_521BIT)) + break; + + // We need to generate a new k value according to RFC 6979. + // This is utterly improbable, but let's be safe anyway. + ++count; + } + + // Clean up. + clean(hm); + clean(k); + clean(x); + clean(y); + clean(t); +} + +/** + * \brief Verifies a signature using a specific P-521 public key. + * + * \param signature The signature value to be verified. + * \param publicKey The public key to use to verify the signature. + * \param message The message whose signature is to be verified. + * \param len The length of the \a message to be verified. + * \param hash The hash algorithm to use to hash the \a message before + * verification. If \a hash is NULL, then the \a message is assumed to + * already be a hash value from some previous process. + * + * The length of the hashed message must be less than or equal to 64 + * bytes in size. Longer messages will be truncated to 64 bytes. + * + * \return Returns true if the \a signature is valid for \a message; + * or false if the \a publicKey or \a signature is not valid. + * + * \sa sign() + */ +bool P521::verify(const uint8_t signature[132], + const uint8_t publicKey[132], + const void *message, size_t len, Hash *hash) +{ + limb_t x[NUM_LIMBS_521BIT]; + limb_t y[NUM_LIMBS_521BIT]; + limb_t r[NUM_LIMBS_521BIT]; + limb_t s[NUM_LIMBS_521BIT]; + limb_t u1[NUM_LIMBS_521BIT]; + limb_t u2[NUM_LIMBS_521BIT]; + uint8_t t[66]; + bool ok = false; + + // Because we are operating on public values, we don't need to + // be as strict about constant time. Bail out early if there + // is a problem with the parameters. + + // Unpack the signature. The values must be between 1 and q - 1. + BigNumberUtil::unpackBE(r, NUM_LIMBS_521BIT, signature, 66); + BigNumberUtil::unpackBE(s, NUM_LIMBS_521BIT, signature + 66, 66); + if (BigNumberUtil::isZero(r, NUM_LIMBS_521BIT) || + BigNumberUtil::isZero(s, NUM_LIMBS_521BIT) || + !BigNumberUtil::sub_P(x, r, P521_q, NUM_LIMBS_521BIT) || + !BigNumberUtil::sub_P(x, s, P521_q, NUM_LIMBS_521BIT)) { + goto failed; + } + + // Unpack the public key and check that it is a valid curve point. + BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, publicKey, 66); + BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, publicKey + 66, 66); + if (!validate(x, y)) { + goto failed; + } + + // Hash the message to generate hm, which we store into u1. + if (hash) { + // Hash the message. + hash->reset(); + hash->update(message, len); + len = hash->hashSize(); + if (len > 64) + len = 64; + hash->finalize(u2, len); + BigNumberUtil::unpackBE(u1, NUM_LIMBS_521BIT, (uint8_t *)u2, len); + } else { + // The message is the hash. + if (len > 64) + len = 64; + BigNumberUtil::unpackBE(u1, NUM_LIMBS_521BIT, (uint8_t *)message, len); + } + + // Compute u1 = hm * s^-1 mod q and u2 = r * s^-1 mod q. + recipQ(u2, s); + mulQ(u1, u1, u2); + mulQ(u2, r, u2); + + // Compute the curve point R = u2 * publicKey + u1 * G. + BigNumberUtil::packBE(t, 66, u2, NUM_LIMBS_521BIT); + evaluate(x, y, t); + memcpy_P(u2, P521_Gx, sizeof(x)); + memcpy_P(s, P521_Gy, sizeof(y)); + BigNumberUtil::packBE(t, 66, u1, NUM_LIMBS_521BIT); + evaluate(u2, s, t); + addAffine(u2, s, x, y); + + // If R.x = r mod q, then the signature is valid. + BigNumberUtil::reduceQuick_P(u1, u2, P521_q, NUM_LIMBS_521BIT); + ok = secure_compare(u1, r, NUM_LIMBS_521BIT * sizeof(limb_t)); + + // Clean up and exit. +failed: + clean(x); + clean(y); + clean(r); + clean(s); + clean(u1); + clean(u2); + clean(t); + return ok; +} + +/** + * \brief Generates a private key for P-521 signing operations. + * + * \param privateKey The resulting private key. + * + * The private key is generated with \link RNGClass::rand() RNG.rand()\endlink. + * It is the caller's responsibility to ensure that the global random number + * pool has sufficient entropy to generate the 521 bits of the key safely + * before calling this function. + * + * \sa derivePublicKey(), sign() + */ +void P521::generatePrivateKey(uint8_t privateKey[66]) +{ + // Generate a random 521-bit value for the private key. The value + // must be generated uniformly at random between 1 and q - 1 where q + // is the group order (RFC 6090). We use the recommended algorithm + // from Appendix B of RFC 6090: generate a random 521-bit value + // and discard it if it is not within the range 1 to q - 1. + limb_t x[NUM_LIMBS_521BIT]; + do { + RNG.rand((uint8_t *)x, sizeof(x)); +#if BIGNUMBER_LIMB_8BIT + x[NUM_LIMBS_521BIT - 1] &= 0x01; +#else + x[NUM_LIMBS_521BIT - 1] &= 0x1FF; +#endif + BigNumberUtil::packBE(privateKey, 66, x, NUM_LIMBS_521BIT); + } while (BigNumberUtil::isZero(x, NUM_LIMBS_521BIT) || + !BigNumberUtil::sub_P(x, x, P521_q, NUM_LIMBS_521BIT)); + clean(x); +} + +/** + * \brief Derives the public key from a private key for P-521 + * signing operations. + * + * \param publicKey The public key. + * \param privateKey The private key, which is assumed to have been + * created by generatePrivateKey(). + * + * \sa generatePrivateKey(), verify() + */ +void P521::derivePublicKey(uint8_t publicKey[132], const uint8_t privateKey[66]) +{ + // Evaluate the curve function starting with the generator. + limb_t x[NUM_LIMBS_521BIT]; + limb_t y[NUM_LIMBS_521BIT]; + memcpy_P(x, P521_Gx, sizeof(x)); + memcpy_P(y, P521_Gy, sizeof(y)); + evaluate(x, y, privateKey); + + // Pack the (x, y) point into the public key. + BigNumberUtil::packBE(publicKey, 66, x, NUM_LIMBS_521BIT); + BigNumberUtil::packBE(publicKey + 66, 66, y, NUM_LIMBS_521BIT); + + // Clean up. + clean(x); + clean(y); +} + +/** + * \brief Validates a private key value to ensure that it is + * between 1 and q - 1. + * + * \param privateKey The private key value to validate. + * \return Returns true if \a privateKey is valid, false if not. + * + * \sa isValidPublicKey() + */ +bool P521::isValidPrivateKey(const uint8_t privateKey[66]) +{ + // The value "q" as a byte array from most to least significant. + static uint8_t const P521_q_bytes[66] PROGMEM = { + 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFA, 0x51, 0x86, 0x87, 0x83, 0xBF, 0x2F, + 0x96, 0x6B, 0x7F, 0xCC, 0x01, 0x48, 0xF7, 0x09, + 0xA5, 0xD0, 0x3B, 0xB5, 0xC9, 0xB8, 0x89, 0x9C, + 0x47, 0xAE, 0xBB, 0x6F, 0xB7, 0x1E, 0x91, 0x38, + 0x64, 0x09 + }; + uint8_t zeroTest = 0; + uint8_t posn = 66; + uint16_t borrow = 0; + while (posn > 0) { + --posn; + + // Check for zero. + zeroTest |= privateKey[posn]; + + // Subtract P521_q_bytes from the key. If there is no borrow, + // then the key value was greater than or equal to q. + borrow = ((uint16_t)(privateKey[posn])) - + pgm_read_byte(&(P521_q_bytes[posn])) - + ((borrow >> 8) & 0x01); + } + return zeroTest != 0 && borrow != 0; +} + +/** + * \brief Validates a public key to ensure that it is a valid curve point. + * + * \param publicKey The public key value to validate. + * \return Returns true if \a publicKey is valid, false if not. + * + * \sa isValidPrivateKey() + */ +bool P521::isValidPublicKey(const uint8_t publicKey[132]) +{ + limb_t x[NUM_LIMBS_521BIT]; + limb_t y[NUM_LIMBS_521BIT]; + BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, publicKey, 66); + BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, publicKey + 66, 66); + bool ok = validate(x, y); + clean(x); + clean(y); + return ok; +} + +/** + * \fn bool P521::isValidCurvePoint(const uint8_t point[132]) + * \brief Validates a point to ensure that it is on the curve. + * + * \param point The point to validate. + * \return Returns true if \a point is valid and on the curve, false if not. + * + * This is a convenience function that calls isValidPublicKey() as the + * two operations are equivalent. + */ + +/** + * \brief Evaluates the curve function by multiplying (x, y) by f. + * + * \param x The X co-ordinate of the curve point. Replaced with the X + * co-ordinate of the result on exit. + * \param y The Y co-ordinate of the curve point. Replaced with the Y + * co-ordinate of the result on exit. + * \param f The 521-bit scalar to multiply (x, y) by, most significant + * bit first. + */ +void P521::evaluate(limb_t *x, limb_t *y, const uint8_t f[66]) +{ + limb_t x1[NUM_LIMBS_521BIT]; + limb_t y1[NUM_LIMBS_521BIT]; + limb_t z1[NUM_LIMBS_521BIT]; + limb_t x2[NUM_LIMBS_521BIT]; + limb_t y2[NUM_LIMBS_521BIT]; + limb_t z2[NUM_LIMBS_521BIT]; + + // We want the input in Jacobian co-ordinates. The point (x, y, z) + // corresponds to the affine point (x / z^2, y / z^3), so if we set z + // to 1 we end up with Jacobian co-ordinates. Remember that z is 1 + // and continue on. + + // Set the answer to the point-at-infinity initially (z = 0). + memset(x1, 0, sizeof(x1)); + memset(y1, 0, sizeof(y1)); + memset(z1, 0, sizeof(z1)); + + // Special handling for the highest bit. We can skip dblPoint()/addPoint() + // and simply conditionally move (x, y, z) into (x1, y1, z1). + uint8_t select = (f[0] & 0x01); + cmove(select, x1, x); + cmove(select, y1, y); + cmove1(select, z1); // z = 1 + + // Iterate over the remaining 520 bits of f from highest to lowest. + uint8_t mask = 0x80; + uint8_t fposn = 1; + for (uint16_t t = 520; t > 0; --t) { + // Double the answer. + dblPoint(x1, y1, z1, x1, y1, z1); + + // Add (x, y, z) to (x1, y1, z1) for the next 1 bit. + // We must always do this to preserve the overall timing. + // The z value is always 1 so we can omit that argument. + addPoint(x2, y2, z2, x1, y1, z1, x, y/*, z*/); + + // If the bit was 1, then move (x2, y2, z2) into (x1, y1, z1). + select = (f[fposn] & mask); + cmove(select, x1, x2); + cmove(select, y1, y2); + cmove(select, z1, z2); + + // Move onto the next bit. + mask >>= 1; + if (!mask) { + ++fposn; + mask = 0x80; + } + } + + // Convert from Jacobian co-ordinates back into affine co-ordinates. + // x = x1 * (z1^2)^-1, y = y1 * (z1^3)^-1. + recip(x2, z1); + square(y2, x2); + mul(x, x1, y2); + mul(y2, y2, x2); + mul(y, y1, y2); + + // Clean up. + clean(x1); + clean(y1); + clean(z1); + clean(x2); + clean(y2); + clean(z2); +} + +/** + * \brief Adds two affine points. + * + * \param x1 The X value for the first point to add, and the result. + * \param y1 The Y value for the first point to add, and the result. + * \param x2 The X value for the second point to add. + * \param y2 The Y value for the second point to add. + * + * The Z values for the two points are assumed to be 1. + */ +void P521::addAffine(limb_t *x1, limb_t *y1, const limb_t *x2, const limb_t *y2) +{ + limb_t xout[NUM_LIMBS_521BIT]; + limb_t yout[NUM_LIMBS_521BIT]; + limb_t zout[NUM_LIMBS_521BIT]; + limb_t z1[NUM_LIMBS_521BIT]; + + // z1 = 1 + z1[0] = 1; + memset(z1 + 1, 0, (NUM_LIMBS_521BIT - 1) * sizeof(limb_t)); + + // Add the two points. + addPoint(xout, yout, zout, x1, y1, z1, x2, y2/*, z2*/); + + // Convert from Jacobian co-ordinates back into affine co-ordinates. + // x1 = xout * (zout^2)^-1, y1 = yout * (zout^3)^-1. + recip(z1, zout); + square(zout, z1); + mul(x1, xout, zout); + mul(zout, zout, z1); + mul(y1, yout, zout); + + // Clean up. + clean(xout); + clean(yout); + clean(zout); + clean(z1); +} + +/** + * \brief Validates that (x, y) is actually a point on the curve. + * + * \param x The X co-ordinate of the point to test. + * \param y The Y co-ordinate of the point to test. + * \return Returns true if (x, y) is on the curve, or false if not. + * + * \sa inRange() + */ +bool P521::validate(const limb_t *x, const limb_t *y) +{ + bool result; + + // If x or y is greater than or equal to 2^521 - 1, then the + // point is definitely not on the curve. Preserve timing by + // delaying the reporting of the result until later. + result = inRange(x); + result &= inRange(y); + + // We need to check that y^2 = x^3 - 3 * x + b mod 2^521 - 1. + limb_t t1[NUM_LIMBS_521BIT]; + limb_t t2[NUM_LIMBS_521BIT]; + square(t1, x); + mul(t1, t1, x); + mulLiteral(t2, x, 3); + sub(t1, t1, t2); + memcpy_P(t2, P521_b, sizeof(t2)); + add(t1, t1, t2); + square(t2, y); + result &= secure_compare(t1, t2, sizeof(t1)); + clean(t1); + clean(t2); + return result; +} + +/** + * \brief Determines if a value is between 0 and 2^521 - 2. + * + * \param x The value to test. + * \return Returns true if \a x is in range, false if not. + * + * \sa validate() + */ +bool P521::inRange(const limb_t *x) +{ + // Do a trial subtraction of 2^521 - 1 from x, which is equivalent + // to adding 1 and subtracting 2^521. We only need the carry. + dlimb_t carry = 1; + limb_t word = 0; + for (uint8_t index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry += *x++; + word = (limb_t)carry; + carry >>= LIMB_BITS; + } + + // Determine the carry out from the low 521 bits. +#if BIGNUMBER_LIMB_8BIT + carry = (carry << 7) + (word >> 1); +#else + carry = (carry << (LIMB_BITS - 9)) + (word >> 9); +#endif + + // If the carry is zero, then x was in range. Otherwise it is out + // of range. Check for zero in a way that preserves constant timing. + word = (limb_t)(carry | (carry >> LIMB_BITS)); + word = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - word) >> LIMB_BITS); + return (bool)word; +} + +/** + * \brief Reduces a number modulo 2^521 - 1. + * + * \param result The array that will contain the result when the + * function exits. Must be NUM_LIMBS_521BIT limbs in size. + * \param x The number to be reduced, which must be NUM_LIMBS_1042BIT + * limbs in size and less than square(2^521 - 1). This array can be + * the same as \a result. + */ +void P521::reduce(limb_t *result, const limb_t *x) +{ +#if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT + // According to NIST FIPS 186-4, we add the high 521 bits to the + // low 521 bits and then do a trial subtraction of 2^521 - 1. + // We do both in a single step. Subtracting 2^521 - 1 is equivalent + // to adding 1 and subtracting 2^521. + uint8_t index; + const limb_t *xl = x; + const limb_t *xh = x + NUM_LIMBS_521BIT; + limb_t *rr = result; + dlimb_t carry; + limb_t word = x[NUM_LIMBS_521BIT - 1]; + carry = (word >> 9) + 1; + word &= 0x1FF; + for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) { + carry += *xl++; + carry += ((dlimb_t)(*xh++)) << (LIMB_BITS - 9); + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + carry += word; + carry += ((dlimb_t)(x[NUM_LIMBS_1042BIT - 1])) << (LIMB_BITS - 9); + word = (limb_t)carry; + *rr = word; + + // If the carry out was 1, then mask it off and we have the answer. + // If the carry out was 0, then we need to add 2^521 - 1 back again. + // To preserve the timing we perform a conditional subtract of 1 and + // then mask off the high bits. + carry = ((word >> 9) ^ 0x01) & 0x01; + rr = result; + for (index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry = ((dlimb_t)(*rr)) - carry; + *rr++ = (limb_t)carry; + carry = (carry >> LIMB_BITS) & 0x01; + } + *(--rr) &= 0x1FF; +#elif BIGNUMBER_LIMB_8BIT + // Same as above, but for 8-bit limbs. + uint8_t index; + const limb_t *xl = x; + const limb_t *xh = x + NUM_LIMBS_521BIT; + limb_t *rr = result; + dlimb_t carry; + limb_t word = x[NUM_LIMBS_521BIT - 1]; + carry = (word >> 1) + 1; + word &= 0x01; + for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) { + carry += *xl++; + carry += ((dlimb_t)(*xh++)) << 7; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + carry += word; + carry += ((dlimb_t)(x[NUM_LIMBS_1042BIT - 1])) << 1; + word = (limb_t)carry; + *rr = word; + carry = ((word >> 1) ^ 0x01) & 0x01; + rr = result; + for (index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry = ((dlimb_t)(*rr)) - carry; + *rr++ = (limb_t)carry; + carry = (carry >> LIMB_BITS) & 0x01; + } + *(--rr) &= 0x01; +#else + #error "Don't know how to reduce values mod 2^521 - 1" +#endif +} + +/** + * \brief Quickly reduces a number modulo 2^521 - 1. + * + * \param x The number to be reduced, which must be NUM_LIMBS_521BIT + * limbs in size and less than or equal to 2 * (2^521 - 2). + * + * The answer is also put into \a x and will consist of NUM_LIMBS_521BIT limbs. + * + * This function is intended for reducing the result of additions where + * the caller knows that \a x is within the described range. A single + * trial subtraction is all that is needed to reduce the number. + */ +void P521::reduceQuick(limb_t *x) +{ + // Perform a trial subtraction of 2^521 - 1 from x. This is + // equivalent to adding 1 and subtracting 2^521 - 1. + uint8_t index; + limb_t *xx = x; + dlimb_t carry = 1; + for (index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry += *xx; + *xx++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + + // If the carry out was 1, then mask it off and we have the answer. + // If the carry out was 0, then we need to add 2^521 - 1 back again. + // To preserve the timing we perform a conditional subtract of 1 and + // then mask off the high bits. +#if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT + carry = ((x[NUM_LIMBS_521BIT - 1] >> 9) ^ 0x01) & 0x01; + xx = x; + for (index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry = ((dlimb_t)(*xx)) - carry; + *xx++ = (limb_t)carry; + carry = (carry >> LIMB_BITS) & 0x01; + } + *(--xx) &= 0x1FF; +#elif BIGNUMBER_LIMB_8BIT + carry = ((x[NUM_LIMBS_521BIT - 1] >> 1) ^ 0x01) & 0x01; + xx = x; + for (index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry = ((dlimb_t)(*xx)) - carry; + *xx++ = (limb_t)carry; + carry = (carry >> LIMB_BITS) & 0x01; + } + *(--xx) &= 0x01; +#endif +} + +/** + * \brief Multiplies two 521-bit values to produce a 1042-bit result. + * + * \param result The result, which must be NUM_LIMBS_1042BIT limbs in size + * and must not overlap with \a x or \a y. + * \param x The first value to multiply, which must be NUM_LIMBS_521BIT + * limbs in size. + * \param y The second value to multiply, which must be NUM_LIMBS_521BIT + * limbs in size. + * + * \sa mul() + */ +void P521::mulNoReduce(limb_t *result, const limb_t *x, const limb_t *y) +{ + uint8_t i, j; + dlimb_t carry; + limb_t word; + const limb_t *yy; + limb_t *rr; + + // Multiply the lowest word of x by y. + carry = 0; + word = x[0]; + yy = y; + rr = result; + for (i = 0; i < NUM_LIMBS_521BIT; ++i) { + carry += ((dlimb_t)(*yy++)) * word; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + *rr = (limb_t)carry; + + // Multiply and add the remaining words of x by y. + for (i = 1; i < NUM_LIMBS_521BIT; ++i) { + word = x[i]; + carry = 0; + yy = y; + rr = result + i; + for (j = 0; j < NUM_LIMBS_521BIT; ++j) { + carry += ((dlimb_t)(*yy++)) * word; + carry += *rr; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + *rr = (limb_t)carry; + } +} + +/** + * \brief Multiplies two values and then reduces the result modulo 2^521 - 1. + * + * \param result The result, which must be NUM_LIMBS_521BIT limbs in size + * and can be the same array as \a x or \a y. + * \param x The first value to multiply, which must be NUM_LIMBS_521BIT limbs + * in size and less than 2^521 - 1. + * \param y The second value to multiply, which must be NUM_LIMBS_521BIT limbs + * in size and less than 2^521 - 1. This can be the same array as \a x. + */ +void P521::mul(limb_t *result, const limb_t *x, const limb_t *y) +{ + limb_t temp[NUM_LIMBS_1042BIT]; + mulNoReduce(temp, x, y); + reduce(result, temp); + strict_clean(temp); +} + +/** + * \fn void P521::square(limb_t *result, const limb_t *x) + * \brief Squares a value and then reduces it modulo 2^521 - 1. + * + * \param result The result, which must be NUM_LIMBS_521BIT limbs in size and + * can be the same array as \a x. + * \param x The value to square, which must be NUM_LIMBS_521BIT limbs in size + * and less than 2^521 - 1. + */ + +/** + * \brief Multiply a value by a single-limb literal modulo 2^521 - 1. + * + * \param result The result, which must be NUM_LIMBS_521BIT limbs in size and + * can be the same array as \a x. + * \param x The first value to multiply, which must be NUM_LIMBS_521BIT limbs + * in size and less than 2^521 - 1. + * \param y The second value to multiply, which must be less than 128. + */ +void P521::mulLiteral(limb_t *result, const limb_t *x, limb_t y) +{ + uint8_t index; + dlimb_t carry = 0; + const limb_t *xx = x; + limb_t *rr = result; + + // Multiply x by the literal and put it into the result array. + // We assume that y is small enough that overflow from the + // highest limb will not occur during this process. + for (index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry += ((dlimb_t)(*xx++)) * y; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + + // Reduce the value modulo 2^521 - 1. The high half is only a + // single limb, so we can short-cut some of reduce() here. +#if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT + limb_t word = result[NUM_LIMBS_521BIT - 1]; + carry = (word >> 9) + 1; + word &= 0x1FF; + rr = result; + for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) { + carry += *rr; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + carry += word; + word = (limb_t)carry; + *rr = word; + + // If the carry out was 1, then mask it off and we have the answer. + // If the carry out was 0, then we need to add 2^521 - 1 back again. + // To preserve the timing we perform a conditional subtract of 1 and + // then mask off the high bits. + carry = ((word >> 9) ^ 0x01) & 0x01; + rr = result; + for (index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry = ((dlimb_t)(*rr)) - carry; + *rr++ = (limb_t)carry; + carry = (carry >> LIMB_BITS) & 0x01; + } + *(--rr) &= 0x1FF; +#elif BIGNUMBER_LIMB_8BIT + // Same as above, but for 8-bit limbs. + limb_t word = result[NUM_LIMBS_521BIT - 1]; + carry = (word >> 1) + 1; + word &= 0x01; + rr = result; + for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) { + carry += *rr; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + carry += word; + word = (limb_t)carry; + *rr = word; + carry = ((word >> 1) ^ 0x01) & 0x01; + rr = result; + for (index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry = ((dlimb_t)(*rr)) - carry; + *rr++ = (limb_t)carry; + carry = (carry >> LIMB_BITS) & 0x01; + } + *(--rr) &= 0x01; +#endif +} + +/** + * \brief Adds two values and then reduces the result modulo 2^521 - 1. + * + * \param result The result, which must be NUM_LIMBS_521BIT limbs in size + * and can be the same array as \a x or \a y. + * \param x The first value to multiply, which must be NUM_LIMBS_521BIT + * limbs in size and less than 2^521 - 1. + * \param y The second value to multiply, which must be NUM_LIMBS_521BIT + * limbs in size and less than 2^521 - 1. + */ +void P521::add(limb_t *result, const limb_t *x, const limb_t *y) +{ + dlimb_t carry = 0; + limb_t *rr = result; + for (uint8_t posn = 0; posn < NUM_LIMBS_521BIT; ++posn) { + carry += *x++; + carry += *y++; + *rr++ = (limb_t)carry; + carry >>= LIMB_BITS; + } + reduceQuick(result); +} + +/** + * \brief Subtracts two values and then reduces the result modulo 2^521 - 1. + * + * \param result The result, which must be NUM_LIMBS_521BIT limbs in size + * and can be the same array as \a x or \a y. + * \param x The first value to multiply, which must be NUM_LIMBS_521BIT + * limbs in size and less than 2^521 - 1. + * \param y The second value to multiply, which must be NUM_LIMBS_521BIT + * limbs in size and less than 2^521 - 1. + */ +void P521::sub(limb_t *result, const limb_t *x, const limb_t *y) +{ + dlimb_t borrow; + uint8_t posn; + limb_t *rr = result; + + // Subtract y from x to generate the intermediate result. + borrow = 0; + for (posn = 0; posn < NUM_LIMBS_521BIT; ++posn) { + borrow = ((dlimb_t)(*x++)) - (*y++) - ((borrow >> LIMB_BITS) & 0x01); + *rr++ = (limb_t)borrow; + } + + // If we had a borrow, then the result has gone negative and we + // have to add 2^521 - 1 to the result to make it positive again. + // The top bits of "borrow" will be all 1's if there is a borrow + // or it will be all 0's if there was no borrow. Easiest is to + // conditionally subtract 1 and then mask off the high bits. + rr = result; + borrow = (borrow >> LIMB_BITS) & 1U; + borrow = ((dlimb_t)(*rr)) - borrow; + *rr++ = (limb_t)borrow; + for (posn = 1; posn < NUM_LIMBS_521BIT; ++posn) { + borrow = ((dlimb_t)(*rr)) - ((borrow >> LIMB_BITS) & 0x01); + *rr++ = (limb_t)borrow; + } +#if BIGNUMBER_LIMB_8BIT + *(--rr) &= 0x01; +#else + *(--rr) &= 0x1FF; +#endif +} + +/** + * \brief Doubles a point represented in Jacobian co-ordinates. + * + * \param xout The X value for the result. + * \param yout The Y value for the result. + * \param zout The Z value for the result. + * \param xin The X value for the point to be doubled. + * \param yin The Y value for the point to be doubled. + * \param zin The Z value for the point to be doubled. + * + * The output parameters can be the same as the input parameters + * to double in-place. + * + * Reference: http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b + */ +void P521::dblPoint(limb_t *xout, limb_t *yout, limb_t *zout, + const limb_t *xin, const limb_t *yin, + const limb_t *zin) +{ + limb_t alpha[NUM_LIMBS_521BIT]; + limb_t beta[NUM_LIMBS_521BIT]; + limb_t gamma[NUM_LIMBS_521BIT]; + limb_t delta[NUM_LIMBS_521BIT]; + limb_t tmp[NUM_LIMBS_521BIT]; + + // Double the point. If it is the point at infinity (z = 0), + // then zout will still be zero at the end of this process so + // we don't need any special handling for that case. + square(delta, zin); // delta = z^2 + square(gamma, yin); // gamma = y^2 + mul(beta, xin, gamma); // beta = x * gamma + sub(tmp, xin, delta); // alpha = 3 * (x - delta) * (x + delta) + mulLiteral(alpha, tmp, 3); + add(tmp, xin, delta); + mul(alpha, alpha, tmp); + square(xout, alpha); // xout = alpha^2 - 8 * beta + mulLiteral(tmp, beta, 8); + sub(xout, xout, tmp); + add(zout, yin, zin); // zout = (y + z)^2 - gamma - delta + square(zout, zout); + sub(zout, zout, gamma); + sub(zout, zout, delta); + mulLiteral(yout, beta, 4);// yout = alpha * (4 * beta - xout) - 8 * gamma^2 + sub(yout, yout, xout); + mul(yout, alpha, yout); + square(gamma, gamma); + mulLiteral(gamma, gamma, 8); + sub(yout, yout, gamma); + + // Clean up. + strict_clean(alpha); + strict_clean(beta); + strict_clean(gamma); + strict_clean(delta); + strict_clean(tmp); +} + +/** + * \brief Adds two curve points, one represented in Jacobian co-ordinates, + * and the other represented in affine co-ordinates. + * + * \param xout The X value for the result. + * \param yout The Y value for the result. + * \param zout The Z value for the result. + * \param x1 The X value for the first point to add. + * \param y1 The Y value for the first point to add. + * \param z1 The Z value for the first point to add. + * \param x2 The X value for the second point to add. + * \param y2 The Y value for the second point to add. + * + * The output parameters must not overlap with either of the inputs. + * + * The Z value of the second point is implicitly assumed to be 1. + * + * Reference: http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl + */ +void P521::addPoint(limb_t *xout, limb_t *yout, limb_t *zout, + const limb_t *x1, const limb_t *y1, + const limb_t *z1, const limb_t *x2, + const limb_t *y2) +{ + limb_t z1z1[NUM_LIMBS_521BIT]; + limb_t u2[NUM_LIMBS_521BIT]; + limb_t s2[NUM_LIMBS_521BIT]; + limb_t h[NUM_LIMBS_521BIT]; + limb_t i[NUM_LIMBS_521BIT]; + limb_t j[NUM_LIMBS_521BIT]; + limb_t r[NUM_LIMBS_521BIT]; + limb_t v[NUM_LIMBS_521BIT]; + + // Determine if the first value is the point-at-infinity identity element. + // The second z value is always 1 so it cannot be the point-at-infinity. + limb_t p1IsIdentity = BigNumberUtil::isZero(z1, NUM_LIMBS_521BIT); + + // Multiply the points, assuming that z2 = 1. + square(z1z1, z1); // z1z1 = z1^2 + mul(u2, x2, z1z1); // u2 = x2 * z1z1 + mul(s2, y2, z1); // s2 = y2 * z1 * z1z1 + mul(s2, s2, z1z1); + sub(h, u2, x1); // h = u2 - x1 + mulLiteral(i, h, 2); // i = (2 * h)^2 + square(i, i); + sub(r, s2, y1); // r = 2 * (s2 - y1) + add(r, r, r); + mul(j, h, i); // j = h * i + mul(v, x1, i); // v = x1 * i + square(xout, r); // xout = r^2 - j - 2 * v + sub(xout, xout, j); + sub(xout, xout, v); + sub(xout, xout, v); + sub(yout, v, xout); // yout = r * (v - xout) - 2 * y1 * j + mul(yout, r, yout); + mul(j, y1, j); + sub(yout, yout, j); + sub(yout, yout, j); + mul(zout, z1, h); // zout = 2 * z1 * h + add(zout, zout, zout); + + // Select the answer to return. If (x1, y1, z1) was the identity, + // then the answer is (x2, y2, z2). Otherwise it is (xout, yout, zout). + // Conditionally move the second argument over the output if necessary. + cmove(p1IsIdentity, xout, x2); + cmove(p1IsIdentity, yout, y2); + cmove1(p1IsIdentity, zout); // z2 = 1 + + // Clean up. + strict_clean(z1z1); + strict_clean(u2); + strict_clean(s2); + strict_clean(h); + strict_clean(i); + strict_clean(j); + strict_clean(r); + strict_clean(v); +} + +/** + * \brief Conditionally moves \a y into \a x if a selection value is non-zero. + * + * \param select Non-zero to move \a y into \a x, zero to leave \a x unchanged. + * \param x The destination to move into. + * \param y The value to conditionally move. + * + * The move is performed in a way that it should take the same amount of + * time irrespective of the value of \a select. + * + * \sa cmove1() + */ +void P521::cmove(limb_t select, limb_t *x, const limb_t *y) +{ + uint8_t posn; + limb_t dummy; + limb_t sel; + + // Turn "select" into an all-zeroes or all-ones mask. We don't care + // which bit or bits is set in the original "select" value. + sel = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - select) >> LIMB_BITS); + --sel; + + // Move y into x based on "select". + for (posn = 0; posn < NUM_LIMBS_521BIT; ++posn) { + dummy = sel & (*x ^ *y++); + *x++ ^= dummy; + } +} + +/** + * \brief Conditionally moves 1 into \a x if a selection value is non-zero. + * + * \param select Non-zero to move 1 into \a x, zero to leave \a x unchanged. + * \param x The destination to move into. + * + * The move is performed in a way that it should take the same amount of + * time irrespective of the value of \a select. + * + * \sa cmove() + */ +void P521::cmove1(limb_t select, limb_t *x) +{ + uint8_t posn; + limb_t dummy; + limb_t sel; + + // Turn "select" into an all-zeroes or all-ones mask. We don't care + // which bit or bits is set in the original "select" value. + sel = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - select) >> LIMB_BITS); + --sel; + + // Move 1 into x based on "select". + dummy = sel & (*x ^ 1); + *x++ ^= dummy; + for (posn = 1; posn < NUM_LIMBS_521BIT; ++posn) { + dummy = sel & *x; + *x++ ^= dummy; + } +} + +/** + * \brief Computes the reciprocal of a number modulo 2^521 - 1. + * + * \param result The result as a array of NUM_LIMBS_521BIT limbs in size. + * This cannot be the same array as \a x. + * \param x The number to compute the reciprocal for, also NUM_LIMBS_521BIT + * limbs in size. + */ +void P521::recip(limb_t *result, const limb_t *x) +{ + limb_t t1[NUM_LIMBS_521BIT]; + + // The reciprocal is the same as x ^ (p - 2) where p = 2^521 - 1. + // The big-endian hexadecimal expansion of (p - 2) is: + // 01FF FFFFFFF FFFFFFFF ... FFFFFFFF FFFFFFFD + // + // The naive implementation needs to do 2 multiplications per 1 bit and + // 1 multiplication per 0 bit. We can improve upon this by creating a + // pattern 1111 and then shifting and multiplying to create 11111111, + // and then 1111111111111111, and so on for the top 512-bits. + + // Build a 4-bit pattern 1111 in the result. + square(result, x); + mul(result, result, x); + square(result, result); + mul(result, result, x); + square(result, result); + mul(result, result, x); + + // Shift and multiply by increasing powers of two. This turns + // 1111 into 11111111, and then 1111111111111111, and so on. + for (size_t power = 4; power <= 256; power <<= 1) { + square(t1, result); + for (size_t temp = 1; temp < power; ++temp) + square(t1, t1); + mul(result, result, t1); + } + + // Handle the 9 lowest bits of (p - 2), 111111101, from highest to lowest. + for (uint8_t index = 0; index < 7; ++index) { + square(result, result); + mul(result, result, x); + } + square(result, result); + square(result, result); + mul(result, result, x); + + // Clean up. + clean(t1); +} + +/** + * \brief Reduces a number modulo q. + * + * \param result The result array, which must be NUM_LIMBS_521BIT limbs in size. + * \param r The value to reduce, which must be NUM_LIMBS_1042BIT limbs in size. + * + * It is allowed for \a result to be the same as \a r. + */ +void P521::reduceQ(limb_t *result, const limb_t *r) +{ + // Algorithm from: http://en.wikipedia.org/wiki/Barrett_reduction + // + // We assume that r is less than or equal to (q - 1)^2. + // + // We want to compute result = r mod q. Find the smallest k such + // that 2^k > q. In our case, k = 521. Then set m = floor(4^k / q) + // and let r = r - q * floor(m * r / 4^k). This will be the result + // or it will be at most one subtraction of q away from the result. + // + // Note: m is a 522-bit number, which fits in the same number of limbs + // as a 521-bit number assuming that limbs are 8 bits or more in size. + static limb_t const numM[NUM_LIMBS_521BIT] PROGMEM = { + LIMB(0x6EC79BF7), LIMB(0x449048E1), LIMB(0x7663B851), LIMB(0xC44A3647), + LIMB(0x08F65A2F), LIMB(0x8033FEB7), LIMB(0x40D06994), LIMB(0xAE79787C), + LIMB(0x00000005), LIMB(0x00000000), LIMB(0x00000000), LIMB(0x00000000), + LIMB(0x00000000), LIMB(0x00000000), LIMB(0x00000000), LIMB(0x00000000), + LIMB_PARTIAL(0x200) + }; + limb_t temp[NUM_LIMBS_1042BIT + NUM_LIMBS_521BIT]; + limb_t temp2[NUM_LIMBS_521BIT]; + + // Multiply r by m. + BigNumberUtil::mul_P(temp, r, NUM_LIMBS_1042BIT, numM, NUM_LIMBS_521BIT); + + // Compute (m * r / 4^521) = (m * r / 2^1042). +#if BIGNUMBER_LIMB_8BIT || BIGNUMBER_LIMB_16BIT + dlimb_t carry = temp[NUM_LIMBS_BITS(1040)] >> 2; + for (uint8_t index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry += ((dlimb_t)(temp[NUM_LIMBS_BITS(1040) + index + 1])) << (LIMB_BITS - 2); + temp2[index] = (limb_t)carry; + carry >>= LIMB_BITS; + } +#elif BIGNUMBER_LIMB_32BIT + dlimb_t carry = temp[NUM_LIMBS_BITS(1024)] >> 18; + for (uint8_t index = 0; index < NUM_LIMBS_521BIT; ++index) { + carry += ((dlimb_t)(temp[NUM_LIMBS_BITS(1024) + index + 1])) << 14; + temp2[index] = (limb_t)carry; + carry >>= LIMB_BITS; + } +#endif + + // Multiply (m * r) / 2^1042 by q and subtract it from r. + // We can ignore the high words of the subtraction result + // because they will all turn into zero after the subtraction. + BigNumberUtil::mul_P(temp, temp2, NUM_LIMBS_521BIT, + P521_q, NUM_LIMBS_521BIT); + BigNumberUtil::sub(result, r, temp, NUM_LIMBS_521BIT); + + // Perform a trial subtraction of q from the result to reduce it. + BigNumberUtil::reduceQuick_P(result, result, P521_q, NUM_LIMBS_521BIT); + + // Clean up and exit. + clean(temp); + clean(temp2); +} + +/** + * \brief Multiplies two values and then reduces the result modulo q. + * + * \param result The result, which must be NUM_LIMBS_521BIT limbs in size + * and can be the same array as \a x or \a y. + * \param x The first value to multiply, which must be NUM_LIMBS_521BIT limbs + * in size and less than q. + * \param y The second value to multiply, which must be NUM_LIMBS_521BIT limbs + * in size and less than q. This can be the same array as \a x. + */ +void P521::mulQ(limb_t *result, const limb_t *x, const limb_t *y) +{ + limb_t temp[NUM_LIMBS_1042BIT]; + mulNoReduce(temp, x, y); + reduceQ(result, temp); + strict_clean(temp); +} + +/** + * \brief Computes the reciprocal of a number modulo q. + * + * \param result The result as a array of NUM_LIMBS_521BIT limbs in size. + * This cannot be the same array as \a x. + * \param x The number to compute the reciprocal for, also NUM_LIMBS_521BIT + * limbs in size. + */ +void P521::recipQ(limb_t *result, const limb_t *x) +{ + // Bottom 265 bits of q - 2. The top 256 bits are all-1's. + static limb_t const P521_q_m2[] PROGMEM = { + LIMB(0x91386407), LIMB(0xbb6fb71e), LIMB(0x899c47ae), LIMB(0x3bb5c9b8), + LIMB(0xf709a5d0), LIMB(0x7fcc0148), LIMB(0xbf2f966b), LIMB(0x51868783), + LIMB_PARTIAL(0x1fa) + }; + + // Raise x to the power of q - 2, mod q. We start with the top + // 256 bits which are all-1's, using a similar technique to recip(). + limb_t t1[NUM_LIMBS_521BIT]; + mulQ(result, x, x); + mulQ(result, result, x); + mulQ(result, result, result); + mulQ(result, result, x); + mulQ(result, result, result); + mulQ(result, result, x); + for (size_t power = 4; power <= 128; power <<= 1) { + mulQ(t1, result, result); + for (size_t temp = 1; temp < power; ++temp) + mulQ(t1, t1, t1); + mulQ(result, result, t1); + } + clean(t1); + + // Deal with the bottom 265 bits from highest to lowest. Square for + // each bit and multiply in x whenever there is a 1 bit. The timing + // is based on the publicly-known constant q - 2, not on the value of x. + size_t bit = 265; + while (bit > 0) { + --bit; + mulQ(result, result, result); + if (pgm_read_limb(&(P521_q_m2[bit / LIMB_BITS])) & + (((limb_t)1) << (bit % LIMB_BITS))) { + mulQ(result, result, x); + } + } +} + +/** + * \brief Generates a k value using the algorithm from RFC 6979. + * + * \param k The value to generate. + * \param hm The hashed message formatted ready to be signed. + * \param x The private key to sign with. + * \param hash The hash algorithm to use. + * \param count Iteration counter for generating new values of k when the + * previous one is rejected. + */ +void P521::generateK(uint8_t k[66], const uint8_t hm[66], + const uint8_t x[66], Hash *hash, uint64_t count) +{ + size_t hlen = hash->hashSize(); + uint8_t V[64]; + uint8_t K[64]; + uint8_t marker; + + // If for some reason a hash function was supplied with more than + // 512 bits of output, truncate hash values to the first 512 bits. + // We cannot support more than this yet. + if (hlen > 64) + hlen = 64; + + // RFC 6979, Section 3.2, Step a. Hash the message, reduce modulo q, + // and produce an octet string the same length as q, bits2octets(H(m)). + // We support hashes up to 512 bits and q is a 521-bit number, so "hm" + // is already the bits2octets(H(m)) value that we need. + + // Steps b and c. Set V to all-ones and K to all-zeroes. + memset(V, 0x01, hlen); + memset(K, 0x00, hlen); + + // Step d. K = HMAC_K(V || 0x00 || x || hm). We make a small + // modification here to append the count value if it is non-zero. + // We use this to generate a new k if we have to re-enter this + // function because the previous one was rejected by sign(). + // This is slightly different to RFC 6979 which says that the + // loop in step h below should be continued. That code path is + // difficult to access, so instead modify K and V in steps d and f. + // This alternative construction is compatible with the second + // variant described in section 3.6 of RFC 6979. + hash->resetHMAC(K, hlen); + hash->update(V, hlen); + marker = 0x00; + hash->update(&marker, 1); + hash->update(x, 66); + hash->update(hm, 66); + if (count) + hash->update(&count, sizeof(count)); + hash->finalizeHMAC(K, hlen, K, hlen); + + // Step e. V = HMAC_K(V) + hash->resetHMAC(K, hlen); + hash->update(V, hlen); + hash->finalizeHMAC(K, hlen, V, hlen); + + // Step f. K = HMAC_K(V || 0x01 || x || hm) + hash->resetHMAC(K, hlen); + hash->update(V, hlen); + marker = 0x01; + hash->update(&marker, 1); + hash->update(x, 66); + hash->update(hm, 66); + if (count) + hash->update(&count, sizeof(count)); + hash->finalizeHMAC(K, hlen, K, hlen); + + // Step g. V = HMAC_K(V) + hash->resetHMAC(K, hlen); + hash->update(V, hlen); + hash->finalizeHMAC(K, hlen, V, hlen); + + // Step h. Generate candidate k values until we find what we want. + for (;;) { + // Step h.1 and h.2. Generate a string of 66 bytes in length. + // T = empty + // while (len(T) < 66) + // V = HMAC_K(V) + // T = T || V + size_t posn = 0; + while (posn < 66) { + size_t temp = 66 - posn; + if (temp > hlen) + temp = hlen; + hash->resetHMAC(K, hlen); + hash->update(V, hlen); + hash->finalizeHMAC(K, hlen, V, hlen); + memcpy(k + posn, V, temp); + posn += temp; + } + + // Step h.3. k = bits2int(T) and exit the loop if k is not in + // the range 1 to q - 1. Note: We have to extract the 521 most + // significant bits of T, which means shifting it right by seven + // bits to put it into the correct form. + for (posn = 65; posn > 0; --posn) + k[posn] = (k[posn - 1] << 1) | (k[posn] >> 7); + k[0] >>= 7; + if (isValidPrivateKey(k)) + break; + + // Generate new K and V values and try again. + // K = HMAC_K(V || 0x00) + // V = HMAC_K(V) + hash->resetHMAC(K, hlen); + hash->update(V, hlen); + marker = 0x00; + hash->update(&marker, 1); + hash->finalizeHMAC(K, hlen, K, hlen); + hash->resetHMAC(K, hlen); + hash->update(V, hlen); + hash->finalizeHMAC(K, hlen, V, hlen); + } + + // Clean up. + clean(V); + clean(K); +} + +/** + * \brief Generates a k value using the algorithm from RFC 6979. + * + * \param k The value to generate. + * \param hm The hashed message formatted ready to be signed. + * \param x The private key to sign with. + * \param count Iteration counter for generating new values of k when the + * previous one is rejected. + * + * This override uses SHA512 to generate k values. It is used when + * sign() was not passed an explicit hash object by the application. + */ +void P521::generateK(uint8_t k[66], const uint8_t hm[66], + const uint8_t x[66], uint64_t count) +{ + SHA512 hash; + generateK(k, hm, x, &hash, count); +} diff --git a/libraries/Crypto/P521.h b/libraries/Crypto/P521.h new file mode 100644 index 00000000..09ac02fd --- /dev/null +++ b/libraries/Crypto/P521.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CRYPTO_P521_h +#define CRYPTO_P521_h + +#include "BigNumberUtil.h" + +class Hash; + +class P521 +{ +public: + + static bool eval(uint8_t result[132], const uint8_t f[66], const uint8_t point[132]); + + static void dh1(uint8_t k[132], uint8_t f[66]); + static bool dh2(const uint8_t k[132], uint8_t f[66]); + + static void sign(uint8_t signature[132], const uint8_t privateKey[66], + const void *message, size_t len, Hash *hash = 0); + static bool verify(const uint8_t signature[132], + const uint8_t publicKey[132], + const void *message, size_t len, Hash *hash = 0); + + static void generatePrivateKey(uint8_t privateKey[66]); + static void derivePublicKey(uint8_t publicKey[132], const uint8_t privateKey[66]); + + static bool isValidPrivateKey(const uint8_t privateKey[66]); + static bool isValidPublicKey(const uint8_t publicKey[132]); + + static bool isValidCurvePoint(const uint8_t point[132]) + { + return isValidPublicKey(point); + } + +#if defined(TEST_P521_FIELD_OPS) +public: +#else +private: +#endif + static void evaluate(limb_t *x, limb_t *y, const uint8_t f[66]); + + static void addAffine(limb_t *x1, limb_t *y1, + const limb_t *x2, const limb_t *y2); + + static bool validate(const limb_t *x, const limb_t *y); + static bool inRange(const limb_t *x); + + static void reduce(limb_t *result, const limb_t *x); + static void reduceQuick(limb_t *x); + + static void mulNoReduce(limb_t *result, const limb_t *x, const limb_t *y); + + static void mul(limb_t *result, const limb_t *x, const limb_t *y); + static void square(limb_t *result, const limb_t *x) + { + mul(result, x, x); + } + + static void mulLiteral(limb_t *result, const limb_t *x, limb_t y); + + static void add(limb_t *result, const limb_t *x, const limb_t *y); + static void sub(limb_t *result, const limb_t *x, const limb_t *y); + + static void dblPoint(limb_t *xout, limb_t *yout, limb_t *zout, + const limb_t *xin, const limb_t *yin, + const limb_t *zin); + static void addPoint(limb_t *xout, limb_t *yout, limb_t *zout, + const limb_t *x1, const limb_t *y1, + const limb_t *z1, const limb_t *x2, + const limb_t *y2); + + static void cmove(limb_t select, limb_t *x, const limb_t *y); + static void cmove1(limb_t select, limb_t *x); + + static void recip(limb_t *result, const limb_t *x); + + static void reduceQ(limb_t *result, const limb_t *r); + static void mulQ(limb_t *result, const limb_t *x, const limb_t *y); + static void recipQ(limb_t *result, const limb_t *x); + + static void generateK(uint8_t k[66], const uint8_t hm[66], + const uint8_t x[66], Hash *hash, uint64_t count); + static void generateK(uint8_t k[66], const uint8_t hm[66], + const uint8_t x[66], uint64_t count); + + // Constructor and destructor are private - cannot instantiate this class. + P521() {} + ~P521() {} +}; + +#endif diff --git a/libraries/Crypto/examples/TestP521/TestP521.ino b/libraries/Crypto/examples/TestP521/TestP521.ino new file mode 100644 index 00000000..50832c6a --- /dev/null +++ b/libraries/Crypto/examples/TestP521/TestP521.ino @@ -0,0 +1,519 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* +This example runs tests on the P521 algorithm. + +Note: This example is too large to run on the Arduino Uno or other +AVR platforms with 32K or less of flash memory. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +void printNumber(const char *name, const uint8_t *x, size_t len) +{ + static const char hexchars[] = "0123456789ABCDEF"; + Serial.print(name); + Serial.print(" = "); + for (size_t posn = 0; posn < len; ++posn) { + Serial.print(hexchars[(x[posn] >> 4) & 0x0F]); + Serial.print(hexchars[x[posn] & 0x0F]); + } + Serial.println(); +} + +static int P521_memcmp_P(const void *s1, const void *s2, size_t len) +{ + const uint8_t *u1 = (const uint8_t *)s1; + const uint8_t *u2 = (const uint8_t *)s2; + while (len > 0) { + int ch1 = *u1++; + int ch2 = pgm_read_byte(u2++); + if (ch1 != ch2) + return ch1 - ch2; + --len; + } + return 0; +} + +static uint8_t alice_k[132]; +static uint8_t alice_f[66]; +static uint8_t bob_k[132]; +static uint8_t bob_f[66]; + +// Check the eval() function using the test vectors from RFC 5903. +void testEval() +{ + static uint8_t const alice_private[66] PROGMEM = { + 0x00, 0x37, 0xAD, 0xE9, 0x31, 0x9A, 0x89, 0xF4, + 0xDA, 0xBD, 0xB3, 0xEF, 0x41, 0x1A, 0xAC, 0xCC, + 0xA5, 0x12, 0x3C, 0x61, 0xAC, 0xAB, 0x57, 0xB5, + 0x39, 0x3D, 0xCE, 0x47, 0x60, 0x81, 0x72, 0xA0, + 0x95, 0xAA, 0x85, 0xA3, 0x0F, 0xE1, 0xC2, 0x95, + 0x2C, 0x67, 0x71, 0xD9, 0x37, 0xBA, 0x97, 0x77, + 0xF5, 0x95, 0x7B, 0x26, 0x39, 0xBA, 0xB0, 0x72, + 0x46, 0x2F, 0x68, 0xC2, 0x7A, 0x57, 0x38, 0x2D, + 0x4A, 0x52 + }; + static uint8_t const alice_public[132] PROGMEM = { + 0x00, 0x15, 0x41, 0x7E, 0x84, 0xDB, 0xF2, 0x8C, + 0x0A, 0xD3, 0xC2, 0x78, 0x71, 0x33, 0x49, 0xDC, + 0x7D, 0xF1, 0x53, 0xC8, 0x97, 0xA1, 0x89, 0x1B, + 0xD9, 0x8B, 0xAB, 0x43, 0x57, 0xC9, 0xEC, 0xBE, + 0xE1, 0xE3, 0xBF, 0x42, 0xE0, 0x0B, 0x8E, 0x38, + 0x0A, 0xEA, 0xE5, 0x7C, 0x2D, 0x10, 0x75, 0x64, + 0x94, 0x18, 0x85, 0x94, 0x2A, 0xF5, 0xA7, 0xF4, + 0x60, 0x17, 0x23, 0xC4, 0x19, 0x5D, 0x17, 0x6C, + 0xED, 0x3E, 0x01, 0x7C, 0xAE, 0x20, 0xB6, 0x64, + 0x1D, 0x2E, 0xEB, 0x69, 0x57, 0x86, 0xD8, 0xC9, + 0x46, 0x14, 0x62, 0x39, 0xD0, 0x99, 0xE1, 0x8E, + 0x1D, 0x5A, 0x51, 0x4C, 0x73, 0x9D, 0x7C, 0xB4, + 0xA1, 0x0A, 0xD8, 0xA7, 0x88, 0x01, 0x5A, 0xC4, + 0x05, 0xD7, 0x79, 0x9D, 0xC7, 0x5E, 0x7B, 0x7D, + 0x5B, 0x6C, 0xF2, 0x26, 0x1A, 0x6A, 0x7F, 0x15, + 0x07, 0x43, 0x8B, 0xF0, 0x1B, 0xEB, 0x6C, 0xA3, + 0x92, 0x6F, 0x95, 0x82 + }; + static uint8_t const bob_private[66] PROGMEM = { + 0x01, 0x45, 0xBA, 0x99, 0xA8, 0x47, 0xAF, 0x43, + 0x79, 0x3F, 0xDD, 0x0E, 0x87, 0x2E, 0x7C, 0xDF, + 0xA1, 0x6B, 0xE3, 0x0F, 0xDC, 0x78, 0x0F, 0x97, + 0xBC, 0xCC, 0x3F, 0x07, 0x83, 0x80, 0x20, 0x1E, + 0x9C, 0x67, 0x7D, 0x60, 0x0B, 0x34, 0x37, 0x57, + 0xA3, 0xBD, 0xBF, 0x2A, 0x31, 0x63, 0xE4, 0xC2, + 0xF8, 0x69, 0xCC, 0xA7, 0x45, 0x8A, 0xA4, 0xA4, + 0xEF, 0xFC, 0x31, 0x1F, 0x5C, 0xB1, 0x51, 0x68, + 0x5E, 0xB9 + }; + static uint8_t const bob_public[132] PROGMEM = { + 0x00, 0xD0, 0xB3, 0x97, 0x5A, 0xC4, 0xB7, 0x99, + 0xF5, 0xBE, 0xA1, 0x6D, 0x5E, 0x13, 0xE9, 0xAF, + 0x97, 0x1D, 0x5E, 0x9B, 0x98, 0x4C, 0x9F, 0x39, + 0x72, 0x8B, 0x5E, 0x57, 0x39, 0x73, 0x5A, 0x21, + 0x9B, 0x97, 0xC3, 0x56, 0x43, 0x6A, 0xDC, 0x6E, + 0x95, 0xBB, 0x03, 0x52, 0xF6, 0xBE, 0x64, 0xA6, + 0xC2, 0x91, 0x2D, 0x4E, 0xF2, 0xD0, 0x43, 0x3C, + 0xED, 0x2B, 0x61, 0x71, 0x64, 0x00, 0x12, 0xD9, + 0x46, 0x0F, 0x01, 0x5C, 0x68, 0x22, 0x63, 0x83, + 0x95, 0x6E, 0x3B, 0xD0, 0x66, 0xE7, 0x97, 0xB6, + 0x23, 0xC2, 0x7C, 0xE0, 0xEA, 0xC2, 0xF5, 0x51, + 0xA1, 0x0C, 0x2C, 0x72, 0x4D, 0x98, 0x52, 0x07, + 0x7B, 0x87, 0x22, 0x0B, 0x65, 0x36, 0xC5, 0xC4, + 0x08, 0xA1, 0xD2, 0xAE, 0xBB, 0x8E, 0x86, 0xD6, + 0x78, 0xAE, 0x49, 0xCB, 0x57, 0x09, 0x1F, 0x47, + 0x32, 0x29, 0x65, 0x79, 0xAB, 0x44, 0xFC, 0xD1, + 0x7F, 0x0F, 0xC5, 0x6A + }; + static uint8_t const shared_secret[66] PROGMEM = { + 0x01, 0x14, 0x4C, 0x7D, 0x79, 0xAE, 0x69, 0x56, + 0xBC, 0x8E, 0xDB, 0x8E, 0x7C, 0x78, 0x7C, 0x45, + 0x21, 0xCB, 0x08, 0x6F, 0xA6, 0x44, 0x07, 0xF9, + 0x78, 0x94, 0xE5, 0xE6, 0xB2, 0xD7, 0x9B, 0x04, + 0xD1, 0x42, 0x7E, 0x73, 0xCA, 0x4B, 0xAA, 0x24, + 0x0A, 0x34, 0x78, 0x68, 0x59, 0x81, 0x0C, 0x06, + 0xB3, 0xC7, 0x15, 0xA3, 0xA8, 0xCC, 0x31, 0x51, + 0xF2, 0xBE, 0xE4, 0x17, 0x99, 0x6D, 0x19, 0xF3, + 0xDD, 0xEA + }; + + // Evaluate the curve function and check the public keys. + uint8_t result[132]; + Serial.println("Fixed test vectors:"); + Serial.print("Computing Alice's public key ... "); + Serial.flush(); + memcpy_P(alice_f, alice_private, 66); + unsigned long start = micros(); + P521::eval(result, alice_f, 0); + unsigned long elapsed = micros() - start; + if (P521_memcmp_P(result, alice_public, 132) == 0) { + Serial.print("ok"); + } else { + Serial.println("failed"); + printNumber("actual ", result, 132); + printNumber("expected", alice_f, 132); + } + Serial.print(" (elapsed "); + Serial.print(elapsed); + Serial.println(" us)"); + Serial.print("Computing Bob's public key ... "); + Serial.flush(); + memcpy_P(bob_f, bob_private, 66); + start = micros(); + P521::eval(result, bob_f, 0); + elapsed = micros() - start; + if (P521_memcmp_P(result, bob_public, 132) == 0) { + Serial.print("ok"); + } else { + Serial.println("failed"); + printNumber("actual ", result, 132); + printNumber("expected", bob_f, 132); + } + Serial.print(" (elapsed "); + Serial.print(elapsed); + Serial.println(" us)"); + + // Compute the shared secret from each side. + Serial.print("Computing Alice's shared secret ... "); + Serial.flush(); + memcpy_P(alice_f, alice_private, 66); + memcpy_P(bob_k, bob_public, 132); + memcpy_P(bob_f, shared_secret, 66); + start = micros(); + P521::eval(result, alice_f, bob_k); + elapsed = micros() - start; + if (P521_memcmp_P(result, shared_secret, 66) == 0) { + Serial.print("ok"); + } else { + Serial.println("failed"); + printNumber("actual ", result, 66); + printNumber("expected", bob_f, 66); + } + Serial.print(" (elapsed "); + Serial.print(elapsed); + Serial.println(" us)"); + Serial.print("Computing Bob's shared secret ... "); + Serial.flush(); + memcpy_P(bob_f, bob_private, 66); + memcpy_P(alice_k, alice_public, 132); + memcpy_P(alice_f, shared_secret, 66); + start = micros(); + P521::eval(result, bob_f, alice_k); + elapsed = micros() - start; + if (P521_memcmp_P(result, shared_secret, 66) == 0) { + Serial.print("ok"); + } else { + Serial.println("failed"); + printNumber("actual ", result, 66); + printNumber("expected", alice_f, 66); + } + Serial.print(" (elapsed "); + Serial.print(elapsed); + Serial.println(" us)"); +} + +void testDH() +{ + Serial.println("Diffie-Hellman key exchange:"); + Serial.print("Generate random k/f for Alice ... "); + Serial.flush(); + unsigned long start = micros(); + P521::dh1(alice_k, alice_f); + unsigned long elapsed = micros() - start; + Serial.print("elapsed "); + Serial.print(elapsed); + Serial.println(" us"); + + Serial.print("Generate random k/f for Bob ... "); + Serial.flush(); + start = micros(); + P521::dh1(bob_k, bob_f); + elapsed = micros() - start; + Serial.print("elapsed "); + Serial.print(elapsed); + Serial.println(" us"); + + Serial.print("Generate shared secret for Alice ... "); + Serial.flush(); + start = micros(); + P521::dh2(bob_k, alice_f); + elapsed = micros() - start; + Serial.print("elapsed "); + Serial.print(elapsed); + Serial.println(" us"); + + Serial.print("Generate shared secret for Bob ... "); + Serial.flush(); + start = micros(); + P521::dh2(alice_k, bob_f); + elapsed = micros() - start; + Serial.print("elapsed "); + Serial.print(elapsed); + Serial.println(" us"); + + Serial.print("Check that the shared secrets match ... "); + if (memcmp(alice_f, bob_f, 66) == 0) { + Serial.println("ok"); + } else { + Serial.println("failed"); + printNumber("actual ", alice_f, 66); + printNumber("expected", bob_f, 66); + } +} + +struct TestSignKey +{ + uint8_t privateKey[66]; + uint8_t publicKey[132]; +}; + +// Test key from RFC 6979, Appendix A.2.7. +static TestSignKey const testKeyP521 PROGMEM = { + {0x00, 0xFA, 0xD0, 0x6D, 0xAA, 0x62, 0xBA, 0x3B, // x + 0x25, 0xD2, 0xFB, 0x40, 0x13, 0x3D, 0xA7, 0x57, + 0x20, 0x5D, 0xE6, 0x7F, 0x5B, 0xB0, 0x01, 0x8F, + 0xEE, 0x8C, 0x86, 0xE1, 0xB6, 0x8C, 0x7E, 0x75, + 0xCA, 0xA8, 0x96, 0xEB, 0x32, 0xF1, 0xF4, 0x7C, + 0x70, 0x85, 0x58, 0x36, 0xA6, 0xD1, 0x6F, 0xCC, + 0x14, 0x66, 0xF6, 0xD8, 0xFB, 0xEC, 0x67, 0xDB, + 0x89, 0xEC, 0x0C, 0x08, 0xB0, 0xE9, 0x96, 0xB8, + 0x35, 0x38}, + {0x01, 0x89, 0x45, 0x50, 0xD0, 0x78, 0x59, 0x32, // Ux + 0xE0, 0x0E, 0xAA, 0x23, 0xB6, 0x94, 0xF2, 0x13, + 0xF8, 0xC3, 0x12, 0x1F, 0x86, 0xDC, 0x97, 0xA0, + 0x4E, 0x5A, 0x71, 0x67, 0xDB, 0x4E, 0x5B, 0xCD, + 0x37, 0x11, 0x23, 0xD4, 0x6E, 0x45, 0xDB, 0x6B, + 0x5D, 0x53, 0x70, 0xA7, 0xF2, 0x0F, 0xB6, 0x33, + 0x15, 0x5D, 0x38, 0xFF, 0xA1, 0x6D, 0x2B, 0xD7, + 0x61, 0xDC, 0xAC, 0x47, 0x4B, 0x9A, 0x2F, 0x50, + 0x23, 0xA4, + 0x00, 0x49, 0x31, 0x01, 0xC9, 0x62, 0xCD, 0x4D, // Uy + 0x2F, 0xDD, 0xF7, 0x82, 0x28, 0x5E, 0x64, 0x58, + 0x41, 0x39, 0xC2, 0xF9, 0x1B, 0x47, 0xF8, 0x7F, + 0xF8, 0x23, 0x54, 0xD6, 0x63, 0x0F, 0x74, 0x6A, + 0x28, 0xA0, 0xDB, 0x25, 0x74, 0x1B, 0x5B, 0x34, + 0xA8, 0x28, 0x00, 0x8B, 0x22, 0xAC, 0xC2, 0x3F, + 0x92, 0x4F, 0xAA, 0xFB, 0xD4, 0xD3, 0x3F, 0x81, + 0xEA, 0x66, 0x95, 0x6D, 0xFE, 0xAA, 0x2B, 0xFD, + 0xFC, 0xF5} +}; + +struct TestSignVector +{ + const char *name; + const char *data; + uint8_t signature[132]; +}; + +// Test vectors from RFC 6979, Appendix A.2.7. +static TestSignVector const testVectorP521_1 PROGMEM = { + // P-521 test case with SHA-256 and message "sample". + "P-521 #1", + "sample", + {0x01, 0x51, 0x1B, 0xB4, 0xD6, 0x75, 0x11, 0x4F, // r + 0xE2, 0x66, 0xFC, 0x43, 0x72, 0xB8, 0x76, 0x82, + 0xBA, 0xEC, 0xC0, 0x1D, 0x3C, 0xC6, 0x2C, 0xF2, + 0x30, 0x3C, 0x92, 0xB3, 0x52, 0x60, 0x12, 0x65, + 0x9D, 0x16, 0x87, 0x6E, 0x25, 0xC7, 0xC1, 0xE5, + 0x76, 0x48, 0xF2, 0x3B, 0x73, 0x56, 0x4D, 0x67, + 0xF6, 0x1C, 0x6F, 0x14, 0xD5, 0x27, 0xD5, 0x49, + 0x72, 0x81, 0x04, 0x21, 0xE7, 0xD8, 0x75, 0x89, + 0xE1, 0xA7, + 0x00, 0x4A, 0x17, 0x11, 0x43, 0xA8, 0x31, 0x63, // s + 0xD6, 0xDF, 0x46, 0x0A, 0xAF, 0x61, 0x52, 0x26, + 0x95, 0xF2, 0x07, 0xA5, 0x8B, 0x95, 0xC0, 0x64, + 0x4D, 0x87, 0xE5, 0x2A, 0xA1, 0xA3, 0x47, 0x91, + 0x6E, 0x4F, 0x7A, 0x72, 0x93, 0x0B, 0x1B, 0xC0, + 0x6D, 0xBE, 0x22, 0xCE, 0x3F, 0x58, 0x26, 0x4A, + 0xFD, 0x23, 0x70, 0x4C, 0xBB, 0x63, 0xB2, 0x9B, + 0x93, 0x1F, 0x7D, 0xE6, 0xC9, 0xD9, 0x49, 0xA7, + 0xEC, 0xFC} +}; +static TestSignVector const testVectorP521_2 PROGMEM = { + // P-521 test case with SHA-512 and message "sample". + "P-521 #2", + "sample", + {0x00, 0xC3, 0x28, 0xFA, 0xFC, 0xBD, 0x79, 0xDD, // r + 0x77, 0x85, 0x03, 0x70, 0xC4, 0x63, 0x25, 0xD9, + 0x87, 0xCB, 0x52, 0x55, 0x69, 0xFB, 0x63, 0xC5, + 0xD3, 0xBC, 0x53, 0x95, 0x0E, 0x6D, 0x4C, 0x5F, + 0x17, 0x4E, 0x25, 0xA1, 0xEE, 0x90, 0x17, 0xB5, + 0xD4, 0x50, 0x60, 0x6A, 0xDD, 0x15, 0x2B, 0x53, + 0x49, 0x31, 0xD7, 0xD4, 0xE8, 0x45, 0x5C, 0xC9, + 0x1F, 0x9B, 0x15, 0xBF, 0x05, 0xEC, 0x36, 0xE3, + 0x77, 0xFA, + 0x00, 0x61, 0x7C, 0xCE, 0x7C, 0xF5, 0x06, 0x48, // s + 0x06, 0xC4, 0x67, 0xF6, 0x78, 0xD3, 0xB4, 0x08, + 0x0D, 0x6F, 0x1C, 0xC5, 0x0A, 0xF2, 0x6C, 0xA2, + 0x09, 0x41, 0x73, 0x08, 0x28, 0x1B, 0x68, 0xAF, + 0x28, 0x26, 0x23, 0xEA, 0xA6, 0x3E, 0x5B, 0x5C, + 0x07, 0x23, 0xD8, 0xB8, 0xC3, 0x7F, 0xF0, 0x77, + 0x7B, 0x1A, 0x20, 0xF8, 0xCC, 0xB1, 0xDC, 0xCC, + 0x43, 0x99, 0x7F, 0x1E, 0xE0, 0xE4, 0x4D, 0xA4, + 0xA6, 0x7A} +}; +static TestSignVector const testVectorP521_3 PROGMEM = { + // P-521 test case with SHA-256 and message "test". + "P-521 #3", + "test", + {0x00, 0x0E, 0x87, 0x1C, 0x4A, 0x14, 0xF9, 0x93, // r + 0xC6, 0xC7, 0x36, 0x95, 0x01, 0x90, 0x0C, 0x4B, + 0xC1, 0xE9, 0xC7, 0xB0, 0xB4, 0xBA, 0x44, 0xE0, + 0x48, 0x68, 0xB3, 0x0B, 0x41, 0xD8, 0x07, 0x10, + 0x42, 0xEB, 0x28, 0xC4, 0xC2, 0x50, 0x41, 0x1D, + 0x0C, 0xE0, 0x8C, 0xD1, 0x97, 0xE4, 0x18, 0x8E, + 0xA4, 0x87, 0x6F, 0x27, 0x9F, 0x90, 0xB3, 0xD8, + 0xD7, 0x4A, 0x3C, 0x76, 0xE6, 0xF1, 0xE4, 0x65, + 0x6A, 0xA8, + 0x00, 0xCD, 0x52, 0xDB, 0xAA, 0x33, 0xB0, 0x63, // s + 0xC3, 0xA6, 0xCD, 0x80, 0x58, 0xA1, 0xFB, 0x0A, + 0x46, 0xA4, 0x75, 0x4B, 0x03, 0x4F, 0xCC, 0x64, + 0x47, 0x66, 0xCA, 0x14, 0xDA, 0x8C, 0xA5, 0xCA, + 0x9F, 0xDE, 0x00, 0xE8, 0x8C, 0x1A, 0xD6, 0x0C, + 0xCB, 0xA7, 0x59, 0x02, 0x52, 0x99, 0x07, 0x9D, + 0x7A, 0x42, 0x7E, 0xC3, 0xCC, 0x5B, 0x61, 0x9B, + 0xFB, 0xC8, 0x28, 0xE7, 0x76, 0x9B, 0xCD, 0x69, + 0x4E, 0x86} +}; +static TestSignVector const testVectorP521_4 PROGMEM = { + // P-521 test case with SHA-512 and message "test". + "P-521 #4", + "test", + {0x01, 0x3E, 0x99, 0x02, 0x0A, 0xBF, 0x5C, 0xEE, // r + 0x75, 0x25, 0xD1, 0x6B, 0x69, 0xB2, 0x29, 0x65, + 0x2A, 0xB6, 0xBD, 0xF2, 0xAF, 0xFC, 0xAE, 0xF3, + 0x87, 0x73, 0xB4, 0xB7, 0xD0, 0x87, 0x25, 0xF1, + 0x0C, 0xDB, 0x93, 0x48, 0x2F, 0xDC, 0xC5, 0x4E, + 0xDC, 0xEE, 0x91, 0xEC, 0xA4, 0x16, 0x6B, 0x2A, + 0x7C, 0x62, 0x65, 0xEF, 0x0C, 0xE2, 0xBD, 0x70, + 0x51, 0xB7, 0xCE, 0xF9, 0x45, 0xBA, 0xBD, 0x47, + 0xEE, 0x6D, + 0x01, 0xFB, 0xD0, 0x01, 0x3C, 0x67, 0x4A, 0xA7, // s + 0x9C, 0xB3, 0x98, 0x49, 0x52, 0x79, 0x16, 0xCE, + 0x30, 0x1C, 0x66, 0xEA, 0x7C, 0xE8, 0xB8, 0x06, + 0x82, 0x78, 0x6A, 0xD6, 0x0F, 0x98, 0xF7, 0xE7, + 0x8A, 0x19, 0xCA, 0x69, 0xEF, 0xF5, 0xC5, 0x74, + 0x00, 0xE3, 0xB3, 0xA0, 0xAD, 0x66, 0xCE, 0x09, + 0x78, 0x21, 0x4D, 0x13, 0xBA, 0xF4, 0xE9, 0xAC, + 0x60, 0x75, 0x2F, 0x7B, 0x15, 0x5E, 0x2D, 0xE4, + 0xDC, 0xE3} +}; + +void testSignCommon(const struct TestSignVector *_test, Hash *hash) +{ + uint8_t *privateKey = alice_f; + uint8_t *publicKey = alice_k; + uint8_t *sig = bob_k; + static TestSignVector test; + + memcpy_P(&test, _test, sizeof(test)); + + Serial.print(test.name); + Serial.print(" Sign ... "); + Serial.flush(); + + memcpy_P(privateKey, testKeyP521.privateKey, 66); + + unsigned long start = micros(); + P521::sign(sig, privateKey, test.data, strlen(test.data), hash); + unsigned long elapsed = micros() - start; + Serial.print(elapsed); + Serial.print(" us ... "); + + bool ok = !memcmp(sig, test.signature, 132); + if (ok) { + Serial.println("ok"); + } else { + Serial.println("failed"); + printNumber("actual ", sig, 132); + printNumber("expected", test.signature, 132); + } + + Serial.print(test.name); + Serial.print(" Verify ... "); + Serial.flush(); + + memcpy_P(publicKey, testKeyP521.publicKey, 132); + + start = micros(); + bool verified = P521::verify + (test.signature, publicKey, test.data, strlen(test.data), hash); + elapsed = micros() - start; + Serial.print(elapsed); + Serial.print(" us ... "); + + if (verified) + Serial.println("ok"); + else + Serial.println("failed"); + + Serial.print(test.name); + Serial.print(" Derive Public Key ... "); + Serial.flush(); + + memcpy_P(privateKey, testKeyP521.privateKey, 66); + + start = micros(); + P521::derivePublicKey(publicKey, privateKey); + elapsed = micros() - start; + Serial.print(elapsed); + Serial.print(" us ... "); + + ok = !P521_memcmp_P(publicKey, testKeyP521.publicKey, 132); + if (ok) { + Serial.println("ok"); + } else { + Serial.println("failed"); + printNumber("actual ", publicKey, 132); + memcpy_P(publicKey, testKeyP521.publicKey, 132); + printNumber("expected", publicKey, 132); + } +} + +void testSignSHA256(const struct TestSignVector *test) +{ + SHA256 hash; + testSignCommon(test, &hash); +} + +void testSignSHA512(const struct TestSignVector *test) +{ + SHA512 hash; + testSignCommon(test, &hash); +} + +void testSign() +{ + Serial.println("Digital signatures:"); + testSignSHA256(&testVectorP521_1); + testSignSHA512(&testVectorP521_2); + testSignSHA256(&testVectorP521_3); + testSignSHA512(&testVectorP521_4); +} + +void setup() +{ + Serial.begin(9600); + + // Start the random number generator. We don't initialise a noise + // source here because we don't need one for testing purposes. + // Real DH applications should of course use a proper noise source. + RNG.begin("TestP521 1.0", 950); + + // Perform the tests. + testEval(); + Serial.println(); + testDH(); + Serial.println(); + testSign(); + Serial.println(); +} + +void loop() +{ +} diff --git a/libraries/Crypto/examples/TestP521Math/TestP521Math.ino b/libraries/Crypto/examples/TestP521Math/TestP521Math.ino new file mode 100644 index 00000000..6997191f --- /dev/null +++ b/libraries/Crypto/examples/TestP521Math/TestP521Math.ino @@ -0,0 +1,612 @@ +/* + * Copyright (C) 2016 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* +This example runs tests on the P521 field mathematics independent +of the full curve operation itself. +*/ + +// Enable access to the internals of P521 to test the raw field ops. +#define TEST_P521_FIELD_OPS 1 + +#include +#include +#include +#include + +// Copy some definitions from the P521 class for convenience. +#define NUM_LIMBS ((66 + sizeof(limb_t) - 1) / sizeof(limb_t)) +#define LIMB_BITS (8 * sizeof(limb_t)) +#define INVERSE_LIMB (~((limb_t)0)) + +// For simpleMod() below we need a type that is 4 times the size of limb_t. +#if BIGNUMBER_LIMB_8BIT +#define qlimb_t uint32_t +#elif BIGNUMBER_LIMB_16BIT +#define qlimb_t uint64_t +#else +#define BIGNUMBER_NO_QLIMB 1 +#endif + +limb_t arg1[NUM_LIMBS]; +limb_t arg2[NUM_LIMBS]; +limb_t result[NUM_LIMBS]; +limb_t result2[NUM_LIMBS * 2 + 1]; +limb_t temp[NUM_LIMBS]; + +// Convert a decimal string in program memory into a number. +void fromString(limb_t *x, uint8_t size, const char *str) +{ + uint8_t ch, posn; + memset(x, 0, sizeof(limb_t) * size); + while ((ch = pgm_read_byte((uint8_t *)str)) != '\0') { + if (ch >= '0' && ch <= '9') { + // Quick and simple method to multiply by 10 and add the new digit. + dlimb_t carry = ch - '0'; + for (posn = 0; posn < size; ++posn) { + carry += ((dlimb_t)x[posn]) * 10U; + x[posn] = (limb_t)carry; + carry >>= LIMB_BITS; + } + } + ++str; + } +} + +// Compare two numbers of NUM_LIMBS in length. Returns -1, 0, or 1. +int compare(const limb_t *x, const limb_t *y) +{ + for (uint8_t posn = NUM_LIMBS; posn > 0; --posn) { + limb_t a = x[posn - 1]; + limb_t b = y[posn - 1]; + if (a < b) + return -1; + else if (a > b) + return 1; + } + return 0; +} + +// Compare two numbers where one is a decimal string. Returns -1, 0, or 1. +int compare(const limb_t *x, const char *y) +{ + limb_t val[NUM_LIMBS]; + fromString(val, NUM_LIMBS, y); + return compare(x, val); +} + +void printNumber(const char *name, const limb_t *x) +{ + static const char hexchars[] = "0123456789ABCDEF"; + Serial.print(name); + Serial.print(" = "); + for (uint8_t posn = NUM_LIMBS; posn > 0; --posn) { + for (uint8_t bit = LIMB_BITS; bit > 0; ) { + bit -= 4; + Serial.print(hexchars[(x[posn - 1] >> bit) & 0x0F]); + } + Serial.print(' '); + } + Serial.println(); +} + +// Standard numbers that are useful in field operation tests. +char const num_0[] PROGMEM = "0"; +char const num_1[] PROGMEM = "1"; +char const num_2[] PROGMEM = "2"; +char const num_4[] PROGMEM = "4"; +char const num_5[] PROGMEM = "5"; +char const num_128[] PROGMEM = "128"; +char const num_256[] PROGMEM = "256"; +char const num_2_64_m7[] PROGMEM = "18446744073709551609"; // 2^64 - 7 +char const num_2_129_m5[] PROGMEM = "680564733841876926926749214863536422907"; // 2^129 - 5 +char const num_pi[] PROGMEM = "31415926535897932384626433832795028841971693993751058209749445923078164062862"; // 77 digits of pi +char const num_2_255_m253[] PROGMEM = "57896044618658097711785492504343953926634992332820282019728792003956564819715"; // 2^255 - 253 +char const num_2_255_m20[] PROGMEM = "57896044618658097711785492504343953926634992332820282019728792003956564819948"; // 2^255 - 20 +char const num_2_255_m19[] PROGMEM = "57896044618658097711785492504343953926634992332820282019728792003956564819949"; // 2^255 - 19 +char const num_2_255_m19_x2[] PROGMEM = "115792089237316195423570985008687907853269984665640564039457584007913129639898"; // (2^255 - 19) * 2 +char const num_2_521_m1[] PROGMEM = "6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151"; // 2^521 - 1 +char const num_2_521_m2[] PROGMEM = "6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057150"; // 2^521 - 2 +char const num_2_521_m20[] PROGMEM = "6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057132"; // 2^521 - 20 +char const num_pi_big[] PROGMEM = "3141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067982148086513282306647093844609550582231725359408128481117"; // 157 digits of pi + +// Table of useful numbers less than 2^521 - 1. +const char * const numbers[] = { + num_0, + num_1, + num_2, + num_4, + num_5, + num_128, + num_256, + num_2_64_m7, + num_2_129_m5, + num_pi, + num_2_255_m253, + num_2_255_m20, + num_2_255_m19_x2, + num_pi_big, + num_2_521_m2, + num_2_521_m20, + 0 +}; +#define numbers_count ((sizeof(numbers) / sizeof(numbers[0])) - 1) + +#define foreach_number(var) \ + const char *var = numbers[0]; \ + for (unsigned index##var = 0; index##var < numbers_count; \ + ++index##var, var = numbers[index##var]) + +void printProgMem(const char *str) +{ + uint8_t ch; + while ((ch = pgm_read_byte((uint8_t *)str)) != '\0') { + Serial.print((char)ch); + ++str; + } +} + +// Simple implementation of modular addition to cross-check the library. +void simpleAdd(limb_t *result, const limb_t *x, const limb_t *y) +{ + uint8_t posn; + dlimb_t carry = 0; + for (posn = 0; posn < NUM_LIMBS; ++posn) { + carry += x[posn]; + carry += y[posn]; + result[posn] = (limb_t)carry; + carry >>= LIMB_BITS; + } + if (compare(result, num_2_521_m1) >= 0) { + // Subtract 2^521 - 1 to get the final result. + // Same as add 1 and then subtract 2^521. + carry = 1; + for (posn = 0; posn < NUM_LIMBS; ++posn) { + carry += result[posn]; + result[posn] = (limb_t)carry; + carry >>= LIMB_BITS; + } +#if BIGNUMBER_LIMB_8BIT + result[NUM_LIMBS - 1] &= 0x01; +#else + result[NUM_LIMBS - 1] &= 0x1FF; +#endif + } +} + +// Simple implementation of subtraction to cross-check the library. +// Note: this does not reduce the result modulo 2^521 - 1 and we +// assume that x is greater than or equal to y. +void simpleSub(limb_t *result, const limb_t *x, const limb_t *y) +{ + uint8_t posn; + dlimb_t borrow = 0; + for (posn = 0; posn < NUM_LIMBS; ++posn) { + borrow = ((dlimb_t)x[posn]) - y[posn] - borrow; + result[posn] = (limb_t)borrow; + borrow = (borrow >> LIMB_BITS) != 0; + } +} + +// Simple implementation of multiplication to cross-check the library. +// Note: this does not reduce the result modulo 2^521 - 1. +// The "result" buffer must contain at least NUM_LIMBS * 2 limbs. +void simpleMul(limb_t *result, const limb_t *x, const limb_t *y) +{ + memset(result, 0, NUM_LIMBS * 2 * sizeof(limb_t)); + for (uint8_t i = 0; i < NUM_LIMBS; ++i) { + for (uint8_t j = 0; j < NUM_LIMBS; ++j) { + uint8_t n = i + j; + dlimb_t carry = + ((dlimb_t)x[i]) * y[j] + result[n]; + result[n] = (limb_t)carry; + carry >>= LIMB_BITS; + ++n; + while (carry != 0 && n < (NUM_LIMBS * 2)) { + carry += result[n]; + result[n] = (limb_t)carry; + carry >>= LIMB_BITS; + ++n; + } + } + } +} + +#if defined(BIGNUMBER_NO_QLIMB) + +// Quick check to correct the estimate on a quotient word. +static inline limb_t correctEstimate + (limb_t q, limb_t y1, limb_t y2, dlimb_t x01, limb_t x2) +{ + // Algorithm D from section 4.3.1 of "The Art Of Computer Programming", + // D. Knuth, Volume 2, "Seminumerical Algorithms", Second Edition, 1981. + // + // We want to check if (y2 * q) > ((x01 - y1 * q) * b + x2) where + // b is (1 << LIMB_BITS). If it is, then q must be reduced by 1. + // + // One wrinkle that isn't obvious from Knuth's description is that it + // is possible for (x01 - y1 * q) >= b, especially in the case where + // x0 = y1 and q = b - 1. This will cause an overflow of the intermediate + // double-word result ((x01 - y1 * q) * b). + // + // In assembly language, we could use the carry flag to detect when + // (x01 - y1 * q) * b overflows, but we can't access the carry flag + // in C++. So we have to account for the carry in a different way here. + + // Calculate the remainder using the estimated quotient. + dlimb_t r = x01 - ((dlimb_t)y1) * q; + + // If there will be a double-word carry when we calculate (r * b), + // then (y2 * q) is obviously going to be less than (r * b), so we + // can stop here. The estimated quotient is correct. + if (r & (((dlimb_t)INVERSE_LIMB) << LIMB_BITS)) + return q; + + // Bail out if (y2 * q) <= (r * b + x2). The estimate is correct. + dlimb_t y2q = ((dlimb_t)y2) * q; + if (y2q <= ((r << LIMB_BITS) + x2)) + return q; + + // Correct for the estimated quotient being off by 1. + --q; + + // Now repeat the check to correct for q values that are off by 2. + r += y1; // r' = (x01 - y1 * (q - 1)) = (x01 - y1 * q + y2) = r + y1 + if (r & (((dlimb_t)INVERSE_LIMB) << LIMB_BITS)) + return q; + // y2q' = (y2 * (q - 1)) = (y2 * q - y2) = y2q - y2 + if ((y2q - y2) <= ((r << LIMB_BITS) + x2)) + return q; + + // Perform the final correction for q values that are off by 2. + return q - 1; +} + +#endif + +// Shift a big number left by a number of bits. +void shiftLeft(limb_t *x, size_t size, uint8_t shift) +{ + dlimb_t carry = 0; + while (size > 0) { + carry += ((dlimb_t)(*x)) << shift; + *x++ = (limb_t)carry; + carry >>= LIMB_BITS; + --size; + } +} + +// Shift a big number right by a number of bits. +void shiftRight(limb_t *x, size_t size, uint8_t shift) +{ + limb_t carry = 0; + limb_t word; + while (size > 0) { + --size; + word = x[size]; + x[size] = (word >> shift) | carry; + carry = (word << (LIMB_BITS - shift)); + } +} + +// Simple implementation of modular division to cross-check the library. +// Calling this "simple" is a bit of a misnomer. It is a full implementation +// of Algorithm D from section 4.3.1 of "The Art Of Computer Programming", +// D. Knuth, Volume 2, "Seminumerical Algorithms", Second Edition, 1981. +// This is quite slow on embedded platforms, but it should be correct. +// Note: "x" is assumed to be (NUM_LIMBS * 2 + 1) limbs in size because +// we need a limb for the extra leading zero word added by step D1. +void simpleMod(limb_t *x) +{ + limb_t divisor[NUM_LIMBS]; + uint8_t j, k, shift; + + // Step D1. Normalize. + // The divisor (2^521 - 1) and "x" need to be shifted left until + // the top-most bit of the divisor is 1. +#if BIGNUMBER_LIMB_8BIT + shift = 7; +#else + shift = LIMB_BITS - 9; +#endif + fromString(divisor, NUM_LIMBS, num_2_521_m1); + shiftLeft(divisor, NUM_LIMBS, shift); + x[NUM_LIMBS * 2] = 0; + shiftLeft(x, NUM_LIMBS * 2 + 1, shift); + + // Step D2/D7. Loop on j + for (j = 0; j <= NUM_LIMBS; ++j) { + // Step D3. Calculate an estimate of the top-most quotient word. + limb_t *u = x + NUM_LIMBS * 2 - 2 - j; + limb_t *v = divisor + NUM_LIMBS - 2; + limb_t q; + dlimb_t uword = ((((dlimb_t)u[2]) << LIMB_BITS) + u[1]); + if (u[2] == v[1]) + q = ~((limb_t)0); + else + q = (limb_t)(uword / v[1]); + + // Step D3, part 2. Correct the estimate downwards by 1 or 2. + // One subtlety of Knuth's algorithm is that it looks like the test + // is working with double-word quantities but it is actually using + // double-word plus a carry bit. So we need to use qlimb_t for this. +#if !defined(BIGNUMBER_NO_QLIMB) + qlimb_t test = ((((qlimb_t)uword) - ((dlimb_t)q) * v[1]) << LIMB_BITS) + u[0]; + if ((((dlimb_t)q) * v[0]) > test) { + --q; + test = ((((qlimb_t)uword) - ((dlimb_t)q) * v[1]) << LIMB_BITS) + u[0]; + if ((((dlimb_t)q) * v[0]) > test) + --q; + } +#else + // 32-bit platform - we don't have a 128-bit numeric type so we have + // to calculate the estimate in another way to preserve the carry bit. + q = correctEstimate(q, v[0], v[1], uword, u[0]); +#endif + + // Step D4. Multiply and subtract. + u = x + (NUM_LIMBS - j); + v = divisor; + dlimb_t carry = 0; + dlimb_t borrow = 0; + for (k = 0; k < NUM_LIMBS; ++k) { + carry += ((dlimb_t)v[k]) * q; + borrow = ((dlimb_t)u[k]) - ((limb_t)carry) - borrow; + u[k] = (dlimb_t)borrow; + carry >>= LIMB_BITS; + borrow = ((borrow >> LIMB_BITS) != 0); + } + borrow = ((dlimb_t)u[k]) - ((limb_t)carry) - borrow; + u[k] = (dlimb_t)borrow; + + // Step D5. Test remainder. Nothing further to do if no borrow. + if ((borrow >> LIMB_BITS) == 0) + continue; + + // Step D6. Borrow occurred: add back. + carry = 0; + for (k = 0; k < NUM_LIMBS; ++k) { + carry += u[k]; + carry += v[k]; + u[k] = (limb_t)carry; + carry >>= LIMB_BITS; + } + u[k] += (limb_t)carry; + } + + // Step D8. Unnormalize. + // Shift the remainder right to undo the earlier left shift. + shiftRight(x, NUM_LIMBS, shift); +} + +void testAdd(const char *x, const char *y) +{ + printProgMem(x); + Serial.print(" + "); + printProgMem(y); + Serial.print(": "); + Serial.flush(); + + fromString(arg1, NUM_LIMBS, x); + fromString(arg2, NUM_LIMBS, y); + P521::add(result, arg1, arg2); + + simpleAdd(result2, arg1, arg2); + + if (compare(result, result2) == 0) { + Serial.println("ok"); + } else { + Serial.println("failed"); + printNumber("actual ", result); + printNumber("expected", result2); + } +} + +void testAdd() +{ + Serial.println("Addition:"); + foreach_number (x) { + foreach_number (y) { + testAdd(x, y); + } + } + Serial.println(); +} + +void testSub(const char *x, const char *y) +{ + printProgMem(x); + Serial.print(" - "); + printProgMem(y); + Serial.print(": "); + Serial.flush(); + + fromString(arg1, NUM_LIMBS, x); + fromString(arg2, NUM_LIMBS, y); + P521::sub(result, arg1, arg2); + + if (compare(arg1, arg2) >= 0) { + // First argument is larger than the second. + simpleSub(result2, arg1, arg2); + } else { + // First argument is smaller than the second. + // Compute arg1 + (2^521 - 1 - arg2). + fromString(temp, NUM_LIMBS, num_2_521_m1); + simpleSub(result2, temp, arg2); + simpleAdd(result2, arg1, result2); + } + + if (compare(result, result2) == 0) { + Serial.println("ok"); + } else { + Serial.println("failed"); + printNumber("actual ", result); + printNumber("expected", result2); + } +} + +void testSub() +{ + Serial.println("Subtraction:"); + foreach_number (x) { + foreach_number (y) { + testSub(x, y); + } + } + Serial.println(); +} + +void testMul(const char *x, const char *y) +{ + printProgMem(x); + Serial.print(" * "); + printProgMem(y); + Serial.print(": "); + Serial.flush(); + + fromString(arg1, NUM_LIMBS, x); + fromString(arg2, NUM_LIMBS, y); + + if (compare(arg1, arg2) != 0) + P521::mul(result, arg1, arg2); + else + P521::square(result, arg1); + + simpleMul(result2, arg1, arg2); + simpleMod(result2); + + if (compare(result, result2) == 0) { + Serial.println("ok"); + } else { + Serial.println("failed"); + printNumber("actual ", result); + printNumber("expected", result2); + } +} + +void testMul() +{ + Serial.println("Multiplication:"); + foreach_number (x) { + foreach_number (y) { + testMul(x, y); + } + } + Serial.println(); +} + +void testMove(const char *x, const char *y, uint8_t select) +{ + printProgMem(x); + Serial.print(" <- "); + printProgMem(y); + Serial.print(": "); + Serial.flush(); + + fromString(arg1, NUM_LIMBS, x); + fromString(arg2, NUM_LIMBS, y); + + memcpy(result, arg1, NUM_LIMBS * sizeof(limb_t)); + memcpy(result2, arg2, NUM_LIMBS * sizeof(limb_t)); + + // Perform the move using the selection bit. + P521::cmove(select, result, result2); + bool ok = compare(result, arg2) == 0; + + // Reset and test not moving. + memcpy(result, arg1, NUM_LIMBS * sizeof(limb_t)); + memcpy(result2, arg2, NUM_LIMBS * sizeof(limb_t)); + P521::cmove(0, result, result2); + if (ok) + ok = compare(result, arg1) == 0; + + if (ok) { + Serial.println("ok"); + } else { + Serial.println("failed"); + } +} + +void testMove() +{ + Serial.println("Move:"); + uint8_t bit = 0; + foreach_number (x) { + foreach_number (y) { + testMove(x, y, ((uint8_t)1) << bit); + bit = (bit + 1) % 8; + } + } + Serial.println(); +} + +void testRecip(const char *x) +{ + printProgMem(x); + Serial.print("^-1"); + Serial.print(": "); + Serial.flush(); + + fromString(arg1, NUM_LIMBS, x); + P521::recip(result, arg1); + + bool ok; + if (compare(arg1, num_0) == 0) { + // 0^-1 = 0 + ok = (compare(result, num_0) == 0); + } else { + // Multiply the result with arg1 - we expect 1 as the result. + P521::mul(result2, result, arg1); + ok = (compare(result2, num_1) == 0); + } + + if (ok) { + Serial.println("ok"); + } else { + Serial.println("failed"); + printNumber("actual", result); + } +} + +void testRecip() +{ + Serial.println("Reciprocal:"); + foreach_number (x) { + testRecip(x); + } + Serial.println(); +} + +void setup() +{ + Serial.begin(9600); + + testAdd(); + testSub(); + testMul(); + testMove(); + testRecip(); +} + +void loop() +{ +} diff --git a/libraries/Crypto/utility/LimbUtil.h b/libraries/Crypto/utility/LimbUtil.h index 410b0f6b..bdbaca0a 100644 --- a/libraries/Crypto/utility/LimbUtil.h +++ b/libraries/Crypto/utility/LimbUtil.h @@ -26,9 +26,11 @@ #include "ProgMemUtil.h" // Number of limbs in a big number value of various sizes. -#define NUM_LIMBS_128BIT (16 / sizeof(limb_t)) -#define NUM_LIMBS_256BIT (32 / sizeof(limb_t)) -#define NUM_LIMBS_512BIT (64 / sizeof(limb_t)) +#define NUM_LIMBS_BITS(n) \ + (((n) + sizeof(limb_t) * 8 - 1) / (8 * sizeof(limb_t))) +#define NUM_LIMBS_128BIT NUM_LIMBS_BITS(128) +#define NUM_LIMBS_256BIT NUM_LIMBS_BITS(256) +#define NUM_LIMBS_512BIT NUM_LIMBS_BITS(512) // The number of bits in a limb. #define LIMB_BITS (8 * sizeof(limb_t))