commit/25eb9d2eb6357946e103e12533c8b8ec768e30bb/P521_8cpp_source.html

 /*

  * Copyright (C) 2016 Southern Storm Software, Pty Ltd.

  *

  * Permission is hereby granted, free of charge, to any person obtaining a

  * copy of this software and associated documentation files (the "Software"),

  * to deal in the Software without restriction, including without limitation

  * the rights to use, copy, modify, merge, publish, distribute, sublicense,

  * and/or sell copies of the Software, and to permit persons to whom the

  * Software is furnished to do so, subject to the following conditions:

  *

  * The above copyright notice and this permission notice shall be included

  * in all copies or substantial portions of the Software.

  *

  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS

  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

  * DEALINGS IN THE SOFTWARE.

  */


 #include "P521.h"

 #include "Crypto.h"

 #include "RNG.h"

 #include "SHA512.h"

 #include "utility/LimbUtil.h"

 #include <string.h>


 // Number of limbs that are needed to represent a 521-bit number.

 #define NUM_LIMBS_521BIT    NUM_LIMBS_BITS(521)


 // Number of limbs that are needed to represent a 1042-bit number.

 // To simply things we also require that this be twice the size of

 // NUM_LIMB_521BIT which involves a little wastage at the high end

 // of one extra limb for 8-bit and 32-bit limbs.  There is no

 // wastage for 16-bit limbs.

 #define NUM_LIMBS_1042BIT   (NUM_LIMBS_BITS(521) * 2)


 // The overhead of clean() calls in mul(), etc can add up to a lot of

 // processing time.  Only do such cleanups if strict mode has been enabled.

 #if defined(P521_STRICT_CLEAN)

 #define strict_clean(x)     clean(x)

 #else

 #define strict_clean(x)     do { ; } while (0)

 #endif


 // Expand the partial 9-bit left over limb at the top of a 521-bit number.

 #if BIGNUMBER_LIMB_8BIT

 #define LIMB_PARTIAL(value) ((uint8_t)(value)), \

                             ((uint8_t)((value) >> 8))

 #else

 #define LIMB_PARTIAL(value) (value)

 #endif


 // The group order "q" value from RFC 4754 and RFC 5903.  This is the

 // same as the "n" value from Appendix D.1.2.5 of NIST FIPS 186-4.

 static limb_t const P521_q[NUM_LIMBS_521BIT] PROGMEM = {

     LIMB_PAIR(0x91386409, 0xbb6fb71e), LIMB_PAIR(0x899c47ae, 0x3bb5c9b8),

     LIMB_PAIR(0xf709a5d0, 0x7fcc0148), LIMB_PAIR(0xbf2f966b, 0x51868783),

     LIMB_PAIR(0xfffffffa, 0xffffffff), LIMB_PAIR(0xffffffff, 0xffffffff),

     LIMB_PAIR(0xffffffff, 0xffffffff), LIMB_PAIR(0xffffffff, 0xffffffff),

     LIMB_PARTIAL(0x1ff)

 };


 // The "b" value from Appendix D.1.2.5 of NIST FIPS 186-4.

 static limb_t const P521_b[NUM_LIMBS_521BIT] PROGMEM = {

     LIMB_PAIR(0x6b503f00, 0xef451fd4), LIMB_PAIR(0x3d2c34f1, 0x3573df88),

     LIMB_PAIR(0x3bb1bf07, 0x1652c0bd), LIMB_PAIR(0xec7e937b, 0x56193951),

     LIMB_PAIR(0x8ef109e1, 0xb8b48991), LIMB_PAIR(0x99b315f3, 0xa2da725b),

     LIMB_PAIR(0xb68540ee, 0x929a21a0), LIMB_PAIR(0x8e1c9a1f, 0x953eb961),

     LIMB_PARTIAL(0x051)

 };


 // The "Gx" value from Appendix D.1.2.5 of NIST FIPS 186-4.

 static limb_t const P521_Gx[NUM_LIMBS_521BIT] PROGMEM = {

     LIMB_PAIR(0xc2e5bd66, 0xf97e7e31), LIMB_PAIR(0x856a429b, 0x3348b3c1),

     LIMB_PAIR(0xa2ffa8de, 0xfe1dc127), LIMB_PAIR(0xefe75928, 0xa14b5e77),

     LIMB_PAIR(0x6b4d3dba, 0xf828af60), LIMB_PAIR(0x053fb521, 0x9c648139),

     LIMB_PAIR(0x2395b442, 0x9e3ecb66), LIMB_PAIR(0x0404e9cd, 0x858e06b7),

     LIMB_PARTIAL(0x0c6)

 };


 // The "Gy" value from Appendix D.1.2.5 of NIST FIPS 186-4.

 static limb_t const P521_Gy[NUM_LIMBS_521BIT] PROGMEM = {

     LIMB_PAIR(0x9fd16650, 0x88be9476), LIMB_PAIR(0xa272c240, 0x353c7086),

     LIMB_PAIR(0x3fad0761, 0xc550b901), LIMB_PAIR(0x5ef42640, 0x97ee7299),

     LIMB_PAIR(0x273e662c, 0x17afbd17), LIMB_PAIR(0x579b4468, 0x98f54449),

     LIMB_PAIR(0x2c7d1bd9, 0x5c8a5fb4), LIMB_PAIR(0x9a3bc004, 0x39296a78),

     LIMB_PARTIAL(0x118)

 };


 bool P521::eval(uint8_t result[132], const uint8_t f[66], const uint8_t point[132])

 {

     limb_t x[NUM_LIMBS_521BIT];

     limb_t y[NUM_LIMBS_521BIT];

     bool ok;


     // Unpack the curve point from the parameters and validate it.

     if (point) {

         BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, point, 66);

         BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, point + 66, 66);

         ok = validate(x, y);

     } else {

         memcpy_P(x, P521_Gx, sizeof(x));

         memcpy_P(y, P521_Gy, sizeof(y));

         ok = true;

     }


     // Evaluate the curve function.

     evaluate(x, y, f);


     // Pack the answer into the result array.

     BigNumberUtil::packBE(result, 66, x, NUM_LIMBS_521BIT);

     BigNumberUtil::packBE(result + 66, 66, y, NUM_LIMBS_521BIT);


     // Clean up.

     clean(x);

     clean(y);

     return ok;

 }


 void P521::dh1(uint8_t k[132], uint8_t f[66])

 {

     generatePrivateKey(f);

     derivePublicKey(k, f);

 }


 bool P521::dh2(const uint8_t k[132], uint8_t f[66])

 {

     // Unpack the (x, y) point from k.

     limb_t x[NUM_LIMBS_521BIT];

     limb_t y[NUM_LIMBS_521BIT];

     BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, k, 66);

     BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, k + 66, 66);


     // Validate the curve point.  We keep going to preserve the timing.

     bool ok = validate(x, y);


     // Evaluate the curve function.

     evaluate(x, y, f);


     // The secret key is the x component of the final value.

     BigNumberUtil::packBE(f, 66, x, NUM_LIMBS_521BIT);


     // Clean up.

     clean(x);

     clean(y);

     return ok;

 }


 void P521::sign(uint8_t signature[132], const uint8_t privateKey[66],

                 const void *message, size_t len, Hash *hash)

 {

     uint8_t hm[66];

     uint8_t k[66];

     limb_t x[NUM_LIMBS_521BIT];

     limb_t y[NUM_LIMBS_521BIT];

     limb_t t[NUM_LIMBS_521BIT];

     uint64_t count = 0;


     // Format the incoming message, hashing it if necessary.

     if (hash) {

         // Hash the message.

         hash->reset();

         hash->update(message, len);

         len = hash->hashSize();

         if (len > 64)

             len = 64;

         memset(hm, 0, 66 - len);

         hash->finalize(hm + 66 - len, len);

     } else {

         // The message is the hash.

         if (len > 64)

             len = 64;

         memset(hm, 0, 66 - len);

         memcpy(hm + 66 - len, message, len);

     }


     // Keep generating k values until both r and s are non-zero.

     for (;;) {

         // Generate the k value deterministically according to RFC 6979.

         if (hash)

             generateK(k, hm, privateKey, hash, count);

         else

             generateK(k, hm, privateKey, count);


         // Generate r = kG.x mod q.

         memcpy_P(x, P521_Gx, sizeof(x));

         memcpy_P(y, P521_Gy, sizeof(y));

         evaluate(x, y, k);

         BigNumberUtil::reduceQuick_P(x, x, P521_q, NUM_LIMBS_521BIT);

         BigNumberUtil::packBE(signature, 66, x, NUM_LIMBS_521BIT);


         // If r is zero, then we need to generate a new k value.

         // This is utterly improbable, but let's be safe anyway.

         if (BigNumberUtil::isZero(x, NUM_LIMBS_521BIT)) {

             ++count;

             continue;

         }


         // Generate s = (privateKey * r + hm) / k mod q.

         BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, privateKey, 66);

         mulQ(y, y, x);

         BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, hm, 66);

         BigNumberUtil::add(x, x, y, NUM_LIMBS_521BIT);

         BigNumberUtil::reduceQuick_P(x, x, P521_q, NUM_LIMBS_521BIT);

         BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, k, 66);

         recipQ(t, y);

         mulQ(x, x, t);

         BigNumberUtil::packBE(signature + 66, 66, x, NUM_LIMBS_521BIT);


         // Exit the loop if s is non-zero.

         if (!BigNumberUtil::isZero(x, NUM_LIMBS_521BIT))

             break;


         // We need to generate a new k value according to RFC 6979.

         // This is utterly improbable, but let's be safe anyway.

         ++count;

     }


     // Clean up.

     clean(hm);

     clean(k);

     clean(x);

     clean(y);

     clean(t);

 }


 bool P521::verify(const uint8_t signature[132],

                   const uint8_t publicKey[132],

                   const void *message, size_t len, Hash *hash)

 {

     limb_t x[NUM_LIMBS_521BIT];

     limb_t y[NUM_LIMBS_521BIT];

     limb_t r[NUM_LIMBS_521BIT];

     limb_t s[NUM_LIMBS_521BIT];

     limb_t u1[NUM_LIMBS_521BIT];

     limb_t u2[NUM_LIMBS_521BIT];

     uint8_t t[66];

     bool ok = false;


     // Because we are operating on public values, we don't need to

     // be as strict about constant time.  Bail out early if there

     // is a problem with the parameters.


     // Unpack the signature.  The values must be between 1 and q - 1.

     BigNumberUtil::unpackBE(r, NUM_LIMBS_521BIT, signature, 66);

     BigNumberUtil::unpackBE(s, NUM_LIMBS_521BIT, signature + 66, 66);

     if (BigNumberUtil::isZero(r, NUM_LIMBS_521BIT) ||

             BigNumberUtil::isZero(s, NUM_LIMBS_521BIT) ||

             !BigNumberUtil::sub_P(x, r, P521_q, NUM_LIMBS_521BIT) ||

             !BigNumberUtil::sub_P(x, s, P521_q, NUM_LIMBS_521BIT)) {

         goto failed;

     }


     // Unpack the public key and check that it is a valid curve point.

     BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, publicKey, 66);

     BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, publicKey + 66, 66);

     if (!validate(x, y)) {

         goto failed;

     }


     // Hash the message to generate hm, which we store into u1.

     if (hash) {

         // Hash the message.

         hash->reset();

         hash->update(message, len);

         len = hash->hashSize();

         if (len > 64)

             len = 64;

         hash->finalize(u2, len);

         BigNumberUtil::unpackBE(u1, NUM_LIMBS_521BIT, (uint8_t *)u2, len);

     } else {

         // The message is the hash.

         if (len > 64)

             len = 64;

         BigNumberUtil::unpackBE(u1, NUM_LIMBS_521BIT, (uint8_t *)message, len);

     }


     // Compute u1 = hm * s^-1 mod q and u2 = r * s^-1 mod q.

     recipQ(u2, s);

     mulQ(u1, u1, u2);

     mulQ(u2, r, u2);


     // Compute the curve point R = u2 * publicKey + u1 * G.

     BigNumberUtil::packBE(t, 66, u2, NUM_LIMBS_521BIT);

     evaluate(x, y, t);

     memcpy_P(u2, P521_Gx, sizeof(x));

     memcpy_P(s, P521_Gy, sizeof(y));

     BigNumberUtil::packBE(t, 66, u1, NUM_LIMBS_521BIT);

     evaluate(u2, s, t);

     addAffine(u2, s, x, y);


     // If R.x = r mod q, then the signature is valid.

     BigNumberUtil::reduceQuick_P(u1, u2, P521_q, NUM_LIMBS_521BIT);

     ok = secure_compare(u1, r, NUM_LIMBS_521BIT * sizeof(limb_t));


     // Clean up and exit.

 failed:

     clean(x);

     clean(y);

     clean(r);

     clean(s);

     clean(u1);

     clean(u2);

     clean(t);

     return ok;

 }


 void P521::generatePrivateKey(uint8_t privateKey[66])

 {

     // Generate a random 521-bit value for the private key.  The value

     // must be generated uniformly at random between 1 and q - 1 where q

     // is the group order (RFC 6090).  We use the recommended algorithm

     // from Appendix B of RFC 6090: generate a random 521-bit value

     // and discard it if it is not within the range 1 to q - 1.

     limb_t x[NUM_LIMBS_521BIT];

     do {

         RNG.rand((uint8_t *)x, sizeof(x));

 #if BIGNUMBER_LIMB_8BIT

         x[NUM_LIMBS_521BIT - 1] &= 0x01;

 #else

         x[NUM_LIMBS_521BIT - 1] &= 0x1FF;

 #endif

         BigNumberUtil::packBE(privateKey, 66, x, NUM_LIMBS_521BIT);

     } while (BigNumberUtil::isZero(x, NUM_LIMBS_521BIT) ||

              !BigNumberUtil::sub_P(x, x, P521_q, NUM_LIMBS_521BIT));

     clean(x);

 }


 void P521::derivePublicKey(uint8_t publicKey[132], const uint8_t privateKey[66])

 {

     // Evaluate the curve function starting with the generator.

     limb_t x[NUM_LIMBS_521BIT];

     limb_t y[NUM_LIMBS_521BIT];

     memcpy_P(x, P521_Gx, sizeof(x));

     memcpy_P(y, P521_Gy, sizeof(y));

     evaluate(x, y, privateKey);


     // Pack the (x, y) point into the public key.

     BigNumberUtil::packBE(publicKey, 66, x, NUM_LIMBS_521BIT);

     BigNumberUtil::packBE(publicKey + 66, 66, y, NUM_LIMBS_521BIT);


     // Clean up.

     clean(x);

     clean(y);

 }


 bool P521::isValidPrivateKey(const uint8_t privateKey[66])

 {

     // The value "q" as a byte array from most to least significant.

     static uint8_t const P521_q_bytes[66] PROGMEM = {

         0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,

         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,

         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,

         0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,

         0xFF, 0xFA, 0x51, 0x86, 0x87, 0x83, 0xBF, 0x2F,

         0x96, 0x6B, 0x7F, 0xCC, 0x01, 0x48, 0xF7, 0x09,

         0xA5, 0xD0, 0x3B, 0xB5, 0xC9, 0xB8, 0x89, 0x9C,

         0x47, 0xAE, 0xBB, 0x6F, 0xB7, 0x1E, 0x91, 0x38,

         0x64, 0x09

     };

     uint8_t zeroTest = 0;

     uint8_t posn = 66;

     uint16_t borrow = 0;

     while (posn > 0) {

         --posn;


         // Check for zero.

         zeroTest |= privateKey[posn];


         // Subtract P521_q_bytes from the key.  If there is no borrow,

         // then the key value was greater than or equal to q.

         borrow = ((uint16_t)(privateKey[posn])) -

                  pgm_read_byte(&(P521_q_bytes[posn])) -

                  ((borrow >> 8) & 0x01);

     }

     return zeroTest != 0 && borrow != 0;

 }


 bool P521::isValidPublicKey(const uint8_t publicKey[132])

 {

     limb_t x[NUM_LIMBS_521BIT];

     limb_t y[NUM_LIMBS_521BIT];

     BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, publicKey, 66);

     BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, publicKey + 66, 66);

     bool ok = validate(x, y);

     clean(x);

     clean(y);

     return ok;

 }


 void P521::evaluate(limb_t *x, limb_t *y, const uint8_t f[66])

 {

     limb_t x1[NUM_LIMBS_521BIT];

     limb_t y1[NUM_LIMBS_521BIT];

     limb_t z1[NUM_LIMBS_521BIT];

     limb_t x2[NUM_LIMBS_521BIT];

     limb_t y2[NUM_LIMBS_521BIT];

     limb_t z2[NUM_LIMBS_521BIT];


     // We want the input in Jacobian co-ordinates.  The point (x, y, z)

     // corresponds to the affine point (x / z^2, y / z^3), so if we set z

     // to 1 we end up with Jacobian co-ordinates.  Remember that z is 1

     // and continue on.


     // Set the answer to the point-at-infinity initially (z = 0).

     memset(x1, 0, sizeof(x1));

     memset(y1, 0, sizeof(y1));

     memset(z1, 0, sizeof(z1));


     // Special handling for the highest bit.  We can skip dblPoint()/addPoint()

     // and simply conditionally move (x, y, z) into (x1, y1, z1).

     uint8_t select = (f[0] & 0x01);

     cmove(select, x1, x);

     cmove(select, y1, y);

     cmove1(select, z1); // z = 1


     // Iterate over the remaining 520 bits of f from highest to lowest.

     uint8_t mask = 0x80;

     uint8_t fposn = 1;

     for (uint16_t t = 520; t > 0; --t) {

         // Double the answer.

         dblPoint(x1, y1, z1, x1, y1, z1);


         // Add (x, y, z) to (x1, y1, z1) for the next 1 bit.

         // We must always do this to preserve the overall timing.

         // The z value is always 1 so we can omit that argument.

         addPoint(x2, y2, z2, x1, y1, z1, x, y/*, z*/);


         // If the bit was 1, then move (x2, y2, z2) into (x1, y1, z1).

         select = (f[fposn] & mask);

         cmove(select, x1, x2);

         cmove(select, y1, y2);

         cmove(select, z1, z2);


         // Move onto the next bit.

         mask >>= 1;

         if (!mask) {

             ++fposn;

             mask = 0x80;

         }

     }


     // Convert from Jacobian co-ordinates back into affine co-ordinates.

     // x = x1 * (z1^2)^-1, y = y1 * (z1^3)^-1.

     recip(x2, z1);

     square(y2, x2);

     mul(x, x1, y2);

     mul(y2, y2, x2);

     mul(y, y1, y2);


     // Clean up.

     clean(x1);

     clean(y1);

     clean(z1);

     clean(x2);

     clean(y2);

     clean(z2);

 }


 void P521::addAffine(limb_t *x1, limb_t *y1, const limb_t *x2, const limb_t *y2)

 {

     limb_t xout[NUM_LIMBS_521BIT];

     limb_t yout[NUM_LIMBS_521BIT];

     limb_t zout[NUM_LIMBS_521BIT];

     limb_t z1[NUM_LIMBS_521BIT];


     // z1 = 1

     z1[0] = 1;

     memset(z1 + 1, 0, (NUM_LIMBS_521BIT - 1) * sizeof(limb_t));


     // Add the two points.

     addPoint(xout, yout, zout, x1, y1, z1, x2, y2/*, z2*/);


     // Convert from Jacobian co-ordinates back into affine co-ordinates.

     // x1 = xout * (zout^2)^-1, y1 = yout * (zout^3)^-1.

     recip(z1, zout);

     square(zout, z1);

     mul(x1, xout, zout);

     mul(zout, zout, z1);

     mul(y1, yout, zout);


     // Clean up.

     clean(xout);

     clean(yout);

     clean(zout);

     clean(z1);

 }


 bool P521::validate(const limb_t *x, const limb_t *y)

 {

     bool result;


     // If x or y is greater than or equal to 2^521 - 1, then the

     // point is definitely not on the curve.  Preserve timing by

     // delaying the reporting of the result until later.

     result = inRange(x);

     result &= inRange(y);


     // We need to check that y^2 = x^3 - 3 * x + b mod 2^521 - 1.

     limb_t t1[NUM_LIMBS_521BIT];

     limb_t t2[NUM_LIMBS_521BIT];

     square(t1, x);

     mul(t1, t1, x);

     mulLiteral(t2, x, 3);

     sub(t1, t1, t2);

     memcpy_P(t2, P521_b, sizeof(t2));

     add(t1, t1, t2);

     square(t2, y);

     result &= secure_compare(t1, t2, sizeof(t1));

     clean(t1);

     clean(t2);

     return result;

 }


 bool P521::inRange(const limb_t *x)

 {

     // Do a trial subtraction of 2^521 - 1 from x, which is equivalent

     // to adding 1 and subtracting 2^521.  We only need the carry.

     dlimb_t carry = 1;

     limb_t word = 0;

     for (uint8_t index = 0; index < NUM_LIMBS_521BIT; ++index) {

         carry += *x++;

         word = (limb_t)carry;

         carry >>= LIMB_BITS;

     }


     // Determine the carry out from the low 521 bits.

 #if BIGNUMBER_LIMB_8BIT

     carry = (carry << 7) + (word >> 1);

 #else

     carry = (carry << (LIMB_BITS - 9)) + (word >> 9);

 #endif


     // If the carry is zero, then x was in range.  Otherwise it is out

     // of range.  Check for zero in a way that preserves constant timing.

     word = (limb_t)(carry | (carry >> LIMB_BITS));

     word = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - word) >> LIMB_BITS);

     return (bool)word;

 }


 void P521::reduce(limb_t *result, const limb_t *x)

 {

 #if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT || BIGNUMBER_LIMB_64BIT

     // According to NIST FIPS 186-4, we add the high 521 bits to the

     // low 521 bits and then do a trial subtraction of 2^521 - 1.

     // We do both in a single step.  Subtracting 2^521 - 1 is equivalent

     // to adding 1 and subtracting 2^521.

     uint8_t index;

     const limb_t *xl = x;

     const limb_t *xh = x + NUM_LIMBS_521BIT;

     limb_t *rr = result;

     dlimb_t carry;

     limb_t word = x[NUM_LIMBS_521BIT - 1];

     carry = (word >> 9) + 1;

     word &= 0x1FF;

     for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) {

         carry += *xl++;

         carry += ((dlimb_t)(*xh++)) << (LIMB_BITS - 9);

         *rr++ = (limb_t)carry;

         carry >>= LIMB_BITS;

     }

     carry += word;

     carry += ((dlimb_t)(x[NUM_LIMBS_1042BIT - 1])) << (LIMB_BITS - 9);

     word = (limb_t)carry;

     *rr = word;


     // If the carry out was 1, then mask it off and we have the answer.

     // If the carry out was 0, then we need to add 2^521 - 1 back again.

     // To preserve the timing we perform a conditional subtract of 1 and

     // then mask off the high bits.

     carry = ((word >> 9) ^ 0x01) & 0x01;

     rr = result;

     for (index = 0; index < NUM_LIMBS_521BIT; ++index) {

         carry = ((dlimb_t)(*rr)) - carry;

         *rr++ = (limb_t)carry;

         carry = (carry >> LIMB_BITS) & 0x01;

     }

     *(--rr) &= 0x1FF;

 #elif BIGNUMBER_LIMB_8BIT

     // Same as above, but for 8-bit limbs.

     uint8_t index;

     const limb_t *xl = x;

     const limb_t *xh = x + NUM_LIMBS_521BIT;

     limb_t *rr = result;

     dlimb_t carry;

     limb_t word = x[NUM_LIMBS_521BIT - 1];

     carry = (word >> 1) + 1;

     word &= 0x01;

     for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) {

         carry += *xl++;

         carry += ((dlimb_t)(*xh++)) << 7;

         *rr++ = (limb_t)carry;

         carry >>= LIMB_BITS;

     }

     carry += word;

     carry += ((dlimb_t)(x[NUM_LIMBS_1042BIT - 1])) << 1;

     word = (limb_t)carry;

     *rr = word;

     carry = ((word >> 1) ^ 0x01) & 0x01;

     rr = result;

     for (index = 0; index < NUM_LIMBS_521BIT; ++index) {

         carry = ((dlimb_t)(*rr)) - carry;

         *rr++ = (limb_t)carry;

         carry = (carry >> LIMB_BITS) & 0x01;

     }

     *(--rr) &= 0x01;

 #else

     #error "Don't know how to reduce values mod 2^521 - 1"

 #endif

 }


 void P521::reduceQuick(limb_t *x)

 {

     // Perform a trial subtraction of 2^521 - 1 from x.  This is

     // equivalent to adding 1 and subtracting 2^521 - 1.

     uint8_t index;

     limb_t *xx = x;

     dlimb_t carry = 1;

     for (index = 0; index < NUM_LIMBS_521BIT; ++index) {

         carry += *xx;

         *xx++ = (limb_t)carry;

         carry >>= LIMB_BITS;

     }


     // If the carry out was 1, then mask it off and we have the answer.

     // If the carry out was 0, then we need to add 2^521 - 1 back again.

     // To preserve the timing we perform a conditional subtract of 1 and

     // then mask off the high bits.

 #if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT || BIGNUMBER_LIMB_64BIT

     carry = ((x[NUM_LIMBS_521BIT - 1] >> 9) ^ 0x01) & 0x01;

     xx = x;

     for (index = 0; index < NUM_LIMBS_521BIT; ++index) {

         carry = ((dlimb_t)(*xx)) - carry;

         *xx++ = (limb_t)carry;

         carry = (carry >> LIMB_BITS) & 0x01;

     }

     *(--xx) &= 0x1FF;

 #elif BIGNUMBER_LIMB_8BIT

     carry = ((x[NUM_LIMBS_521BIT - 1] >> 1) ^ 0x01) & 0x01;

     xx = x;

     for (index = 0; index < NUM_LIMBS_521BIT; ++index) {

         carry = ((dlimb_t)(*xx)) - carry;

         *xx++ = (limb_t)carry;

         carry = (carry >> LIMB_BITS) & 0x01;

     }

     *(--xx) &= 0x01;

 #endif

 }


 void P521::mulNoReduce(limb_t *result, const limb_t *x, const limb_t *y)

 {

     uint8_t i, j;

     dlimb_t carry;

     limb_t word;

     const limb_t *yy;

     limb_t *rr;


     // Multiply the lowest word of x by y.

     carry = 0;

     word = x[0];

     yy = y;

     rr = result;

     for (i = 0; i < NUM_LIMBS_521BIT; ++i) {

         carry += ((dlimb_t)(*yy++)) * word;

         *rr++ = (limb_t)carry;

         carry >>= LIMB_BITS;

     }

     *rr = (limb_t)carry;


     // Multiply and add the remaining words of x by y.

     for (i = 1; i < NUM_LIMBS_521BIT; ++i) {

         word = x[i];

         carry = 0;

         yy = y;

         rr = result + i;

         for (j = 0; j < NUM_LIMBS_521BIT; ++j) {

             carry += ((dlimb_t)(*yy++)) * word;

             carry += *rr;

             *rr++ = (limb_t)carry;

             carry >>= LIMB_BITS;

         }

         *rr = (limb_t)carry;

     }

 }


 void P521::mul(limb_t *result, const limb_t *x, const limb_t *y)

 {

     limb_t temp[NUM_LIMBS_1042BIT];

     mulNoReduce(temp, x, y);

     reduce(result, temp);

     strict_clean(temp);

 }


 void P521::mulLiteral(limb_t *result, const limb_t *x, limb_t y)

 {

     uint8_t index;

     dlimb_t carry = 0;

     const limb_t *xx = x;

     limb_t *rr = result;


     // Multiply x by the literal and put it into the result array.

     // We assume that y is small enough that overflow from the

     // highest limb will not occur during this process.

     for (index = 0; index < NUM_LIMBS_521BIT; ++index) {

         carry += ((dlimb_t)(*xx++)) * y;

         *rr++ = (limb_t)carry;

         carry >>= LIMB_BITS;

     }


     // Reduce the value modulo 2^521 - 1.  The high half is only a

     // single limb, so we can short-cut some of reduce() here.

 #if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT || BIGNUMBER_LIMB_64BIT

     limb_t word = result[NUM_LIMBS_521BIT - 1];

     carry = (word >> 9) + 1;

     word &= 0x1FF;

     rr = result;

     for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) {

         carry += *rr;

         *rr++ = (limb_t)carry;

         carry >>= LIMB_BITS;

     }

     carry += word;

     word = (limb_t)carry;

     *rr = word;


     // If the carry out was 1, then mask it off and we have the answer.

     // If the carry out was 0, then we need to add 2^521 - 1 back again.

     // To preserve the timing we perform a conditional subtract of 1 and

     // then mask off the high bits.

     carry = ((word >> 9) ^ 0x01) & 0x01;

     rr = result;

     for (index = 0; index < NUM_LIMBS_521BIT; ++index) {

         carry = ((dlimb_t)(*rr)) - carry;

         *rr++ = (limb_t)carry;

         carry = (carry >> LIMB_BITS) & 0x01;

     }

     *(--rr) &= 0x1FF;

 #elif BIGNUMBER_LIMB_8BIT

     // Same as above, but for 8-bit limbs.

     limb_t word = result[NUM_LIMBS_521BIT - 1];

     carry = (word >> 1) + 1;

     word &= 0x01;

     rr = result;

     for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) {

         carry += *rr;

         *rr++ = (limb_t)carry;

         carry >>= LIMB_BITS;

     }

     carry += word;

     word = (limb_t)carry;

     *rr = word;

     carry = ((word >> 1) ^ 0x01) & 0x01;

     rr = result;

     for (index = 0; index < NUM_LIMBS_521BIT; ++index) {

         carry = ((dlimb_t)(*rr)) - carry;

         *rr++ = (limb_t)carry;

         carry = (carry >> LIMB_BITS) & 0x01;

     }

     *(--rr) &= 0x01;

 #endif

 }


 void P521::add(limb_t *result, const limb_t *x, const limb_t *y)

 {

     dlimb_t carry = 0;

     limb_t *rr = result;

     for (uint8_t posn = 0; posn < NUM_LIMBS_521BIT; ++posn) {

         carry += *x++;

         carry += *y++;

         *rr++ = (limb_t)carry;

         carry >>= LIMB_BITS;

     }

     reduceQuick(result);

 }


 void P521::sub(limb_t *result, const limb_t *x, const limb_t *y)

 {

     dlimb_t borrow;

     uint8_t posn;

     limb_t *rr = result;


     // Subtract y from x to generate the intermediate result.

     borrow = 0;

     for (posn = 0; posn < NUM_LIMBS_521BIT; ++posn) {

         borrow = ((dlimb_t)(*x++)) - (*y++) - ((borrow >> LIMB_BITS) & 0x01);

         *rr++ = (limb_t)borrow;

     }


     // If we had a borrow, then the result has gone negative and we

     // have to add 2^521 - 1 to the result to make it positive again.

     // The top bits of "borrow" will be all 1's if there is a borrow

     // or it will be all 0's if there was no borrow.  Easiest is to

     // conditionally subtract 1 and then mask off the high bits.

     rr = result;

     borrow = (borrow >> LIMB_BITS) & 1U;

     borrow = ((dlimb_t)(*rr)) - borrow;

     *rr++ = (limb_t)borrow;

     for (posn = 1; posn < NUM_LIMBS_521BIT; ++posn) {

         borrow = ((dlimb_t)(*rr)) - ((borrow >> LIMB_BITS) & 0x01);

         *rr++ = (limb_t)borrow;

     }

 #if BIGNUMBER_LIMB_8BIT

     *(--rr) &= 0x01;

 #else

     *(--rr) &= 0x1FF;

 #endif

 }


 void P521::dblPoint(limb_t *xout, limb_t *yout, limb_t *zout,

                     const limb_t *xin, const limb_t *yin,

                     const limb_t *zin)

 {

     limb_t alpha[NUM_LIMBS_521BIT];

     limb_t beta[NUM_LIMBS_521BIT];

     limb_t gamma[NUM_LIMBS_521BIT];

     limb_t delta[NUM_LIMBS_521BIT];

     limb_t tmp[NUM_LIMBS_521BIT];


     // Double the point.  If it is the point at infinity (z = 0),

     // then zout will still be zero at the end of this process so

     // we don't need any special handling for that case.

     square(delta, zin);       // delta = z^2

     square(gamma, yin);       // gamma = y^2

     mul(beta, xin, gamma);    // beta = x * gamma

     sub(tmp, xin, delta);     // alpha = 3 * (x - delta) * (x + delta)

     mulLiteral(alpha, tmp, 3);

     add(tmp, xin, delta);

     mul(alpha, alpha, tmp);

     square(xout, alpha);      // xout = alpha^2 - 8 * beta

     mulLiteral(tmp, beta, 8);

     sub(xout, xout, tmp);

     add(zout, yin, zin);      // zout = (y + z)^2 - gamma - delta

     square(zout, zout);

     sub(zout, zout, gamma);

     sub(zout, zout, delta);

     mulLiteral(yout, beta, 4);// yout = alpha * (4 * beta - xout) - 8 * gamma^2

     sub(yout, yout, xout);

     mul(yout, alpha, yout);

     square(gamma, gamma);

     mulLiteral(gamma, gamma, 8);

     sub(yout, yout, gamma);


     // Clean up.

     strict_clean(alpha);

     strict_clean(beta);

     strict_clean(gamma);

     strict_clean(delta);

     strict_clean(tmp);

 }


 void P521::addPoint(limb_t *xout, limb_t *yout, limb_t *zout,

                     const limb_t *x1, const limb_t *y1,

                     const limb_t *z1, const limb_t *x2,

                     const limb_t *y2)

 {

     limb_t z1z1[NUM_LIMBS_521BIT];

     limb_t u2[NUM_LIMBS_521BIT];

     limb_t s2[NUM_LIMBS_521BIT];

     limb_t h[NUM_LIMBS_521BIT];

     limb_t i[NUM_LIMBS_521BIT];

     limb_t j[NUM_LIMBS_521BIT];

     limb_t r[NUM_LIMBS_521BIT];

     limb_t v[NUM_LIMBS_521BIT];


     // Determine if the first value is the point-at-infinity identity element.

     // The second z value is always 1 so it cannot be the point-at-infinity.

     limb_t p1IsIdentity = BigNumberUtil::isZero(z1, NUM_LIMBS_521BIT);


     // Multiply the points, assuming that z2 = 1.

     square(z1z1, z1);               // z1z1 = z1^2

     mul(u2, x2, z1z1);              // u2 = x2 * z1z1

     mul(s2, y2, z1);                // s2 = y2 * z1 * z1z1

     mul(s2, s2, z1z1);

     sub(h, u2, x1);                 // h = u2 - x1

     mulLiteral(i, h, 2);            // i = (2 * h)^2

     square(i, i);

     sub(r, s2, y1);                 // r = 2 * (s2 - y1)

     add(r, r, r);

     mul(j, h, i);                   // j = h * i

     mul(v, x1, i);                  // v = x1 * i

     square(xout, r);                // xout = r^2 - j - 2 * v

     sub(xout, xout, j);

     sub(xout, xout, v);

     sub(xout, xout, v);

     sub(yout, v, xout);             // yout = r * (v - xout) - 2 * y1 * j

     mul(yout, r, yout);

     mul(j, y1, j);

     sub(yout, yout, j);

     sub(yout, yout, j);

     mul(zout, z1, h);               // zout = 2 * z1 * h

     add(zout, zout, zout);


     // Select the answer to return.  If (x1, y1, z1) was the identity,

     // then the answer is (x2, y2, z2).  Otherwise it is (xout, yout, zout).

     // Conditionally move the second argument over the output if necessary.

     cmove(p1IsIdentity, xout, x2);

     cmove(p1IsIdentity, yout, y2);

     cmove1(p1IsIdentity, zout); // z2 = 1


     // Clean up.

     strict_clean(z1z1);

     strict_clean(u2);

     strict_clean(s2);

     strict_clean(h);

     strict_clean(i);

     strict_clean(j);

     strict_clean(r);

     strict_clean(v);

 }


 void P521::cmove(limb_t select, limb_t *x, const limb_t *y)

 {

     uint8_t posn;

     limb_t dummy;

     limb_t sel;


     // Turn "select" into an all-zeroes or all-ones mask.  We don't care

     // which bit or bits is set in the original "select" value.

     sel = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - select) >> LIMB_BITS);

     --sel;


     // Move y into x based on "select".

     for (posn = 0; posn < NUM_LIMBS_521BIT; ++posn) {

         dummy = sel & (*x ^ *y++);

         *x++ ^= dummy;

     }

 }


 void P521::cmove1(limb_t select, limb_t *x)

 {

     uint8_t posn;

     limb_t dummy;

     limb_t sel;


     // Turn "select" into an all-zeroes or all-ones mask.  We don't care

     // which bit or bits is set in the original "select" value.

     sel = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - select) >> LIMB_BITS);

     --sel;


     // Move 1 into x based on "select".

     dummy = sel & (*x ^ 1);

     *x++ ^= dummy;

     for (posn = 1; posn < NUM_LIMBS_521BIT; ++posn) {

         dummy = sel & *x;

         *x++ ^= dummy;

     }

 }


 void P521::recip(limb_t *result, const limb_t *x)

 {

     limb_t t1[NUM_LIMBS_521BIT];


     // The reciprocal is the same as x ^ (p - 2) where p = 2^521 - 1.

     // The big-endian hexadecimal expansion of (p - 2) is:

     // 01FF FFFFFFF FFFFFFFF ... FFFFFFFF FFFFFFFD

     //

     // The naive implementation needs to do 2 multiplications per 1 bit and

     // 1 multiplication per 0 bit.  We can improve upon this by creating a

     // pattern 1111 and then shifting and multiplying to create 11111111,

     // and then 1111111111111111, and so on for the top 512-bits.


     // Build a 4-bit pattern 1111 in the result.

     square(result, x);

     mul(result, result, x);

     square(result, result);

     mul(result, result, x);

     square(result, result);

     mul(result, result, x);


     // Shift and multiply by increasing powers of two.  This turns

     // 1111 into 11111111, and then 1111111111111111, and so on.

     for (size_t power = 4; power <= 256; power <<= 1) {

         square(t1, result);

         for (size_t temp = 1; temp < power; ++temp)

             square(t1, t1);

         mul(result, result, t1);

     }


     // Handle the 9 lowest bits of (p - 2), 111111101, from highest to lowest.

     for (uint8_t index = 0; index < 7; ++index) {

         square(result, result);

         mul(result, result, x);

     }

     square(result, result);

     square(result, result);

     mul(result, result, x);


     // Clean up.

     clean(t1);

 }


 void P521::reduceQ(limb_t *result, const limb_t *r)

 {

     // Algorithm from: http://en.wikipedia.org/wiki/Barrett_reduction

     //

     // We assume that r is less than or equal to (q - 1)^2.

     //

     // We want to compute result = r mod q.  Find the smallest k such

     // that 2^k > q.  In our case, k = 521.  Then set m = floor(4^k / q)

     // and let r = r - q * floor(m * r / 4^k).  This will be the result

     // or it will be at most one subtraction of q away from the result.

     //

     // Note: m is a 522-bit number, which fits in the same number of limbs

     // as a 521-bit number assuming that limbs are 8 bits or more in size.

     static limb_t const numM[NUM_LIMBS_521BIT] PROGMEM = {

         LIMB_PAIR(0x6EC79BF7, 0x449048E1), LIMB_PAIR(0x7663B851, 0xC44A3647),

         LIMB_PAIR(0x08F65A2F, 0x8033FEB7), LIMB_PAIR(0x40D06994, 0xAE79787C),

         LIMB_PAIR(0x00000005, 0x00000000), LIMB_PAIR(0x00000000, 0x00000000),

         LIMB_PAIR(0x00000000, 0x00000000), LIMB_PAIR(0x00000000, 0x00000000),

         LIMB_PARTIAL(0x200)

     };

     limb_t temp[NUM_LIMBS_1042BIT + NUM_LIMBS_521BIT];

     limb_t temp2[NUM_LIMBS_521BIT];


     // Multiply r by m.

     BigNumberUtil::mul_P(temp, r, NUM_LIMBS_1042BIT, numM, NUM_LIMBS_521BIT);


     // Compute (m * r / 4^521) = (m * r / 2^1042).

 #if BIGNUMBER_LIMB_8BIT || BIGNUMBER_LIMB_16BIT

     dlimb_t carry = temp[NUM_LIMBS_BITS(1040)] >> 2;

     for (uint8_t index = 0; index < NUM_LIMBS_521BIT; ++index) {

         carry += ((dlimb_t)(temp[NUM_LIMBS_BITS(1040) + index + 1])) << (LIMB_BITS - 2);

         temp2[index] = (limb_t)carry;

         carry >>= LIMB_BITS;

     }

 #elif BIGNUMBER_LIMB_32BIT || BIGNUMBER_LIMB_64BIT

     dlimb_t carry = temp[NUM_LIMBS_BITS(1024)] >> 18;

     for (uint8_t index = 0; index < NUM_LIMBS_521BIT; ++index) {

         carry += ((dlimb_t)(temp[NUM_LIMBS_BITS(1024) + index + 1])) << (LIMB_BITS - 18);

         temp2[index] = (limb_t)carry;

         carry >>= LIMB_BITS;

     }

 #endif


     // Multiply (m * r) / 2^1042 by q and subtract it from r.

     // We can ignore the high words of the subtraction result

     // because they will all turn into zero after the subtraction.

     BigNumberUtil::mul_P(temp, temp2, NUM_LIMBS_521BIT,

                          P521_q, NUM_LIMBS_521BIT);

     BigNumberUtil::sub(result, r, temp, NUM_LIMBS_521BIT);


     // Perform a trial subtraction of q from the result to reduce it.

     BigNumberUtil::reduceQuick_P(result, result, P521_q, NUM_LIMBS_521BIT);


     // Clean up and exit.

     clean(temp);

     clean(temp2);

 }


 void P521::mulQ(limb_t *result, const limb_t *x, const limb_t *y)

 {

     limb_t temp[NUM_LIMBS_1042BIT];

     mulNoReduce(temp, x, y);

     reduceQ(result, temp);

     strict_clean(temp);

 }


 void P521::recipQ(limb_t *result, const limb_t *x)

 {

     // Bottom 265 bits of q - 2.  The top 256 bits are all-1's.

     static limb_t const P521_q_m2[] PROGMEM = {

         LIMB_PAIR(0x91386407, 0xbb6fb71e), LIMB_PAIR(0x899c47ae, 0x3bb5c9b8),

         LIMB_PAIR(0xf709a5d0, 0x7fcc0148), LIMB_PAIR(0xbf2f966b, 0x51868783),

         LIMB_PARTIAL(0x1fa)

     };


     // Raise x to the power of q - 2, mod q.  We start with the top

     // 256 bits which are all-1's, using a similar technique to recip().

     limb_t t1[NUM_LIMBS_521BIT];

     mulQ(result, x, x);

     mulQ(result, result, x);

     mulQ(result, result, result);

     mulQ(result, result, x);

     mulQ(result, result, result);

     mulQ(result, result, x);

     for (size_t power = 4; power <= 128; power <<= 1) {

         mulQ(t1, result, result);

         for (size_t temp = 1; temp < power; ++temp)

             mulQ(t1, t1, t1);

         mulQ(result, result, t1);

     }

     clean(t1);


     // Deal with the bottom 265 bits from highest to lowest.  Square for

     // each bit and multiply in x whenever there is a 1 bit.  The timing

     // is based on the publicly-known constant q - 2, not on the value of x.

     size_t bit = 265;

     while (bit > 0) {

         --bit;

         mulQ(result, result, result);

         if (pgm_read_limb(&(P521_q_m2[bit / LIMB_BITS])) &

                 (((limb_t)1) << (bit % LIMB_BITS))) {

             mulQ(result, result, x);

         }

     }

 }


 void P521::generateK(uint8_t k[66], const uint8_t hm[66],

                      const uint8_t x[66], Hash *hash, uint64_t count)

 {

     size_t hlen = hash->hashSize();

     uint8_t V[64];

     uint8_t K[64];

     uint8_t marker;


     // If for some reason a hash function was supplied with more than

     // 512 bits of output, truncate hash values to the first 512 bits.

     // We cannot support more than this yet.

     if (hlen > 64)

         hlen = 64;


     // RFC 6979, Section 3.2, Step a.  Hash the message, reduce modulo q,

     // and produce an octet string the same length as q, bits2octets(H(m)).

     // We support hashes up to 512 bits and q is a 521-bit number, so "hm"

     // is already the bits2octets(H(m)) value that we need.


     // Steps b and c.  Set V to all-ones and K to all-zeroes.

     memset(V, 0x01, hlen);

     memset(K, 0x00, hlen);


     // Step d.  K = HMAC_K(V || 0x00 || x || hm).  We make a small

     // modification here to append the count value if it is non-zero.

     // We use this to generate a new k if we have to re-enter this

     // function because the previous one was rejected by sign().

     // This is slightly different to RFC 6979 which says that the

     // loop in step h below should be continued.  That code path is

     // difficult to access, so instead modify K and V in steps d and f.

     // This alternative construction is compatible with the second

     // variant described in section 3.6 of RFC 6979.

     hash->resetHMAC(K, hlen);

     hash->update(V, hlen);

     marker = 0x00;

     hash->update(&marker, 1);

     hash->update(x, 66);

     hash->update(hm, 66);

     if (count)

         hash->update(&count, sizeof(count));

     hash->finalizeHMAC(K, hlen, K, hlen);


     // Step e.  V = HMAC_K(V)

     hash->resetHMAC(K, hlen);

     hash->update(V, hlen);

     hash->finalizeHMAC(K, hlen, V, hlen);


     // Step f.  K = HMAC_K(V || 0x01 || x || hm)

     hash->resetHMAC(K, hlen);

     hash->update(V, hlen);

     marker = 0x01;

     hash->update(&marker, 1);

     hash->update(x, 66);

     hash->update(hm, 66);

     if (count)

         hash->update(&count, sizeof(count));

     hash->finalizeHMAC(K, hlen, K, hlen);


     // Step g.  V = HMAC_K(V)

     hash->resetHMAC(K, hlen);

     hash->update(V, hlen);

     hash->finalizeHMAC(K, hlen, V, hlen);


     // Step h.  Generate candidate k values until we find what we want.

     for (;;) {

         // Step h.1 and h.2.  Generate a string of 66 bytes in length.

         //      T = empty

         //      while (len(T) < 66)

         //          V = HMAC_K(V)

         //          T = T || V

         size_t posn = 0;

         while (posn < 66) {

             size_t temp = 66 - posn;

             if (temp > hlen)

                 temp = hlen;

             hash->resetHMAC(K, hlen);

             hash->update(V, hlen);

             hash->finalizeHMAC(K, hlen, V, hlen);

             memcpy(k + posn, V, temp);

             posn += temp;

         }


         // Step h.3.  k = bits2int(T) and exit the loop if k is not in

         // the range 1 to q - 1.  Note: We have to extract the 521 most

         // significant bits of T, which means shifting it right by seven

         // bits to put it into the correct form.

         for (posn = 65; posn > 0; --posn)

             k[posn] = (k[posn - 1] << 1) | (k[posn] >> 7);

         k[0] >>= 7;

         if (isValidPrivateKey(k))

             break;


         // Generate new K and V values and try again.

         //      K = HMAC_K(V || 0x00)

         //      V = HMAC_K(V)

         hash->resetHMAC(K, hlen);

         hash->update(V, hlen);

         marker = 0x00;

         hash->update(&marker, 1);

         hash->finalizeHMAC(K, hlen, K, hlen);

         hash->resetHMAC(K, hlen);

         hash->update(V, hlen);

         hash->finalizeHMAC(K, hlen, V, hlen);

     }


     // Clean up.

     clean(V);

     clean(K);

 }


 void P521::generateK(uint8_t k[66], const uint8_t hm[66],

                      const uint8_t x[66], uint64_t count)

 {

     SHA512 hash;

     generateK(k, hm, x, &hash, count);

 }

BigNumberUtil::reduceQuick_P
static void reduceQuick_P(limb_t *result, const limb_t *x, const limb_t *y, size_t size)
Reduces x modulo y using subtraction where y is in program memory.
Definition: BigNumberUtil.cpp:734

P521::eval
static bool eval(uint8_t result[132], const uint8_t f[66], const uint8_t point[132])
Evaluates the curve function.
Definition: P521.cpp:135

BigNumberUtil::add
static limb_t add(limb_t *result, const limb_t *x, const limb_t *y, size_t size)
Adds two big numbers.
Definition: BigNumberUtil.cpp:495

P521::generatePrivateKey
static void generatePrivateKey(uint8_t privateKey[66])
Generates a private key for P-521 signing operations.
Definition: P521.cpp:466

BigNumberUtil::sub_P
static limb_t sub_P(limb_t *result, const limb_t *x, const limb_t *y, size_t size)
Subtracts one big number from another where one is in program memory.
Definition: BigNumberUtil.cpp:655

RNGClass::rand
void rand(uint8_t *data, size_t len)
Generates random bytes into a caller-supplied buffer.
Definition: RNG.cpp:516

P521::dh2
static bool dh2(const uint8_t k[132], uint8_t f[66])
Performs phase 2 of an ECDH key exchange using P-521.
Definition: P521.cpp:229

Hash
Abstract base class for cryptographic hash algorithms.
Definition: Hash.h:29

Hash::finalizeHMAC
virtual void finalizeHMAC(const void *key, size_t keyLen, void *hash, size_t hashLen)=0
Finalizes the HMAC hashing process and returns the hash.

P521::isValidPrivateKey
static bool isValidPrivateKey(const uint8_t privateKey[66])
Validates a private key value to ensure that it is between 1 and q - 1.
Definition: P521.cpp:524

SHA512
SHA-512 hash algorithm.
Definition: SHA512.h:30

P521::derivePublicKey
static void derivePublicKey(uint8_t publicKey[132], const uint8_t privateKey[66])
Derives the public key from a private key for P-521 signing operations.
Definition: P521.cpp:497

P521::sign
static void sign(uint8_t signature[132], const uint8_t privateKey[66], const void *message, size_t len, Hash *hash=0)
Signs a message using a specific P-521 private key.
Definition: P521.cpp:276

BigNumberUtil::sub
static limb_t sub(limb_t *result, const limb_t *x, const limb_t *y, size_t size)
Subtracts one big number from another.
Definition: BigNumberUtil.cpp:522

Hash::reset
virtual void reset()=0
Resets the hash ready for a new hashing process.

P521::dh1
static void dh1(uint8_t k[132], uint8_t f[66])
Performs phase 1 of an ECDH key exchange using P-521.
Definition: P521.cpp:208

BigNumberUtil::mul_P
static void mul_P(limb_t *result, const limb_t *x, size_t xcount, const limb_t *y, size_t ycount)
Multiplies two big numbers where one is in program memory.
Definition: BigNumberUtil.cpp:680

BigNumberUtil::packBE
static void packBE(uint8_t *bytes, size_t len, const limb_t *limbs, size_t count)
Packs the big-endian byte representation of a big number into a byte array.
Definition: BigNumberUtil.cpp:375

Hash::resetHMAC
virtual void resetHMAC(const void *key, size_t keyLen)=0
Resets the hash ready for a new HMAC hashing process.

P521::verify
static bool verify(const uint8_t signature[132], const uint8_t publicKey[132], const void *message, size_t len, Hash *hash=0)
Verifies a signature using a specific P-521 public key.
Definition: P521.cpp:373

P521::isValidPublicKey
static bool isValidPublicKey(const uint8_t publicKey[132])
Validates a public key to ensure that it is a valid curve point.
Definition: P521.cpp:564

BigNumberUtil::unpackBE
static void unpackBE(limb_t *limbs, size_t count, const uint8_t *bytes, size_t len)
Unpacks the big-endian byte representation of a big number into a limb array.
Definition: BigNumberUtil.cpp:163

Hash::hashSize
virtual size_t hashSize() const =0
Size of the hash result from finalize().

Hash::update
virtual void update(const void *data, size_t len)=0
Updates the hash with more data.

Hash::finalize
virtual void finalize(void *hash, size_t len)=0
Finalizes the hashing process and returns the hash.

BigNumberUtil::isZero
static limb_t isZero(const limb_t *x, size_t size)
Determine if a big number is zero.
Definition: BigNumberUtil.cpp:761