ArduinoLibs
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
P521.cpp
1 /*
2  * Copyright (C) 2016 Southern Storm Software, Pty Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "P521.h"
24 #include "Crypto.h"
25 #include "RNG.h"
26 #include "SHA512.h"
27 #include "utility/LimbUtil.h"
28 #include <string.h>
29 
48 // Number of limbs that are needed to represent a 521-bit number.
49 #define NUM_LIMBS_521BIT NUM_LIMBS_BITS(521)
50 
51 // Number of limbs that are needed to represent a 1042-bit number.
52 // To simply things we also require that this be twice the size of
53 // NUM_LIMB_521BIT which involves a little wastage at the high end
54 // of one extra limb for 8-bit and 32-bit limbs. There is no
55 // wastage for 16-bit limbs.
56 #define NUM_LIMBS_1042BIT (NUM_LIMBS_BITS(521) * 2)
57 
58 // The overhead of clean() calls in mul(), etc can add up to a lot of
59 // processing time. Only do such cleanups if strict mode has been enabled.
60 #if defined(P521_STRICT_CLEAN)
61 #define strict_clean(x) clean(x)
62 #else
63 #define strict_clean(x) do { ; } while (0)
64 #endif
65 
66 // Expand the partial 9-bit left over limb at the top of a 521-bit number.
67 #if BIGNUMBER_LIMB_8BIT
68 #define LIMB_PARTIAL(value) ((uint8_t)(value)), \
69  ((uint8_t)((value) >> 8))
70 #else
71 #define LIMB_PARTIAL(value) (value)
72 #endif
73 
76 // The group order "q" value from RFC 4754 and RFC 5903. This is the
77 // same as the "n" value from Appendix D.1.2.5 of NIST FIPS 186-4.
78 static limb_t const P521_q[NUM_LIMBS_521BIT] PROGMEM = {
79  LIMB_PAIR(0x91386409, 0xbb6fb71e), LIMB_PAIR(0x899c47ae, 0x3bb5c9b8),
80  LIMB_PAIR(0xf709a5d0, 0x7fcc0148), LIMB_PAIR(0xbf2f966b, 0x51868783),
81  LIMB_PAIR(0xfffffffa, 0xffffffff), LIMB_PAIR(0xffffffff, 0xffffffff),
82  LIMB_PAIR(0xffffffff, 0xffffffff), LIMB_PAIR(0xffffffff, 0xffffffff),
83  LIMB_PARTIAL(0x1ff)
84 };
85 
86 // The "b" value from Appendix D.1.2.5 of NIST FIPS 186-4.
87 static limb_t const P521_b[NUM_LIMBS_521BIT] PROGMEM = {
88  LIMB_PAIR(0x6b503f00, 0xef451fd4), LIMB_PAIR(0x3d2c34f1, 0x3573df88),
89  LIMB_PAIR(0x3bb1bf07, 0x1652c0bd), LIMB_PAIR(0xec7e937b, 0x56193951),
90  LIMB_PAIR(0x8ef109e1, 0xb8b48991), LIMB_PAIR(0x99b315f3, 0xa2da725b),
91  LIMB_PAIR(0xb68540ee, 0x929a21a0), LIMB_PAIR(0x8e1c9a1f, 0x953eb961),
92  LIMB_PARTIAL(0x051)
93 };
94 
95 // The "Gx" value from Appendix D.1.2.5 of NIST FIPS 186-4.
96 static limb_t const P521_Gx[NUM_LIMBS_521BIT] PROGMEM = {
97  LIMB_PAIR(0xc2e5bd66, 0xf97e7e31), LIMB_PAIR(0x856a429b, 0x3348b3c1),
98  LIMB_PAIR(0xa2ffa8de, 0xfe1dc127), LIMB_PAIR(0xefe75928, 0xa14b5e77),
99  LIMB_PAIR(0x6b4d3dba, 0xf828af60), LIMB_PAIR(0x053fb521, 0x9c648139),
100  LIMB_PAIR(0x2395b442, 0x9e3ecb66), LIMB_PAIR(0x0404e9cd, 0x858e06b7),
101  LIMB_PARTIAL(0x0c6)
102 };
103 
104 // The "Gy" value from Appendix D.1.2.5 of NIST FIPS 186-4.
105 static limb_t const P521_Gy[NUM_LIMBS_521BIT] PROGMEM = {
106  LIMB_PAIR(0x9fd16650, 0x88be9476), LIMB_PAIR(0xa272c240, 0x353c7086),
107  LIMB_PAIR(0x3fad0761, 0xc550b901), LIMB_PAIR(0x5ef42640, 0x97ee7299),
108  LIMB_PAIR(0x273e662c, 0x17afbd17), LIMB_PAIR(0x579b4468, 0x98f54449),
109  LIMB_PAIR(0x2c7d1bd9, 0x5c8a5fb4), LIMB_PAIR(0x9a3bc004, 0x39296a78),
110  LIMB_PARTIAL(0x118)
111 };
112 
135 bool P521::eval(uint8_t result[132], const uint8_t f[66], const uint8_t point[132])
136 {
137  limb_t x[NUM_LIMBS_521BIT];
138  limb_t y[NUM_LIMBS_521BIT];
139  bool ok;
140 
141  // Unpack the curve point from the parameters and validate it.
142  if (point) {
143  BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, point, 66);
144  BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, point + 66, 66);
145  ok = validate(x, y);
146  } else {
147  memcpy_P(x, P521_Gx, sizeof(x));
148  memcpy_P(y, P521_Gy, sizeof(y));
149  ok = true;
150  }
151 
152  // Evaluate the curve function.
153  evaluate(x, y, f);
154 
155  // Pack the answer into the result array.
156  BigNumberUtil::packBE(result, 66, x, NUM_LIMBS_521BIT);
157  BigNumberUtil::packBE(result + 66, 66, y, NUM_LIMBS_521BIT);
158 
159  // Clean up.
160  clean(x);
161  clean(y);
162  return ok;
163 }
164 
208 void P521::dh1(uint8_t k[132], uint8_t f[66])
209 {
211  derivePublicKey(k, f);
212 }
213 
229 bool P521::dh2(const uint8_t k[132], uint8_t f[66])
230 {
231  // Unpack the (x, y) point from k.
232  limb_t x[NUM_LIMBS_521BIT];
233  limb_t y[NUM_LIMBS_521BIT];
234  BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, k, 66);
235  BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, k + 66, 66);
236 
237  // Validate the curve point. We keep going to preserve the timing.
238  bool ok = validate(x, y);
239 
240  // Evaluate the curve function.
241  evaluate(x, y, f);
242 
243  // The secret key is the x component of the final value.
244  BigNumberUtil::packBE(f, 66, x, NUM_LIMBS_521BIT);
245 
246  // Clean up.
247  clean(x);
248  clean(y);
249  return ok;
250 }
251 
276 void P521::sign(uint8_t signature[132], const uint8_t privateKey[66],
277  const void *message, size_t len, Hash *hash)
278 {
279  uint8_t hm[66];
280  uint8_t k[66];
281  limb_t x[NUM_LIMBS_521BIT];
282  limb_t y[NUM_LIMBS_521BIT];
283  limb_t t[NUM_LIMBS_521BIT];
284  uint64_t count = 0;
285 
286  // Format the incoming message, hashing it if necessary.
287  if (hash) {
288  // Hash the message.
289  hash->reset();
290  hash->update(message, len);
291  len = hash->hashSize();
292  if (len > 64)
293  len = 64;
294  memset(hm, 0, 66 - len);
295  hash->finalize(hm + 66 - len, len);
296  } else {
297  // The message is the hash.
298  if (len > 64)
299  len = 64;
300  memset(hm, 0, 66 - len);
301  memcpy(hm + 66 - len, message, len);
302  }
303 
304  // Keep generating k values until both r and s are non-zero.
305  for (;;) {
306  // Generate the k value deterministically according to RFC 6979.
307  if (hash)
308  generateK(k, hm, privateKey, hash, count);
309  else
310  generateK(k, hm, privateKey, count);
311 
312  // Generate r = kG.x mod q.
313  memcpy_P(x, P521_Gx, sizeof(x));
314  memcpy_P(y, P521_Gy, sizeof(y));
315  evaluate(x, y, k);
316  BigNumberUtil::reduceQuick_P(x, x, P521_q, NUM_LIMBS_521BIT);
317  BigNumberUtil::packBE(signature, 66, x, NUM_LIMBS_521BIT);
318 
319  // If r is zero, then we need to generate a new k value.
320  // This is utterly improbable, but let's be safe anyway.
321  if (BigNumberUtil::isZero(x, NUM_LIMBS_521BIT)) {
322  ++count;
323  continue;
324  }
325 
326  // Generate s = (privateKey * r + hm) / k mod q.
327  BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, privateKey, 66);
328  mulQ(y, y, x);
329  BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, hm, 66);
330  BigNumberUtil::add(x, x, y, NUM_LIMBS_521BIT);
331  BigNumberUtil::reduceQuick_P(x, x, P521_q, NUM_LIMBS_521BIT);
332  BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, k, 66);
333  recipQ(t, y);
334  mulQ(x, x, t);
335  BigNumberUtil::packBE(signature + 66, 66, x, NUM_LIMBS_521BIT);
336 
337  // Exit the loop if s is non-zero.
338  if (!BigNumberUtil::isZero(x, NUM_LIMBS_521BIT))
339  break;
340 
341  // We need to generate a new k value according to RFC 6979.
342  // This is utterly improbable, but let's be safe anyway.
343  ++count;
344  }
345 
346  // Clean up.
347  clean(hm);
348  clean(k);
349  clean(x);
350  clean(y);
351  clean(t);
352 }
353 
373 bool P521::verify(const uint8_t signature[132],
374  const uint8_t publicKey[132],
375  const void *message, size_t len, Hash *hash)
376 {
377  limb_t x[NUM_LIMBS_521BIT];
378  limb_t y[NUM_LIMBS_521BIT];
379  limb_t r[NUM_LIMBS_521BIT];
380  limb_t s[NUM_LIMBS_521BIT];
381  limb_t u1[NUM_LIMBS_521BIT];
382  limb_t u2[NUM_LIMBS_521BIT];
383  uint8_t t[66];
384  bool ok = false;
385 
386  // Because we are operating on public values, we don't need to
387  // be as strict about constant time. Bail out early if there
388  // is a problem with the parameters.
389 
390  // Unpack the signature. The values must be between 1 and q - 1.
391  BigNumberUtil::unpackBE(r, NUM_LIMBS_521BIT, signature, 66);
392  BigNumberUtil::unpackBE(s, NUM_LIMBS_521BIT, signature + 66, 66);
393  if (BigNumberUtil::isZero(r, NUM_LIMBS_521BIT) ||
394  BigNumberUtil::isZero(s, NUM_LIMBS_521BIT) ||
395  !BigNumberUtil::sub_P(x, r, P521_q, NUM_LIMBS_521BIT) ||
396  !BigNumberUtil::sub_P(x, s, P521_q, NUM_LIMBS_521BIT)) {
397  goto failed;
398  }
399 
400  // Unpack the public key and check that it is a valid curve point.
401  BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, publicKey, 66);
402  BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, publicKey + 66, 66);
403  if (!validate(x, y)) {
404  goto failed;
405  }
406 
407  // Hash the message to generate hm, which we store into u1.
408  if (hash) {
409  // Hash the message.
410  hash->reset();
411  hash->update(message, len);
412  len = hash->hashSize();
413  if (len > 64)
414  len = 64;
415  hash->finalize(u2, len);
416  BigNumberUtil::unpackBE(u1, NUM_LIMBS_521BIT, (uint8_t *)u2, len);
417  } else {
418  // The message is the hash.
419  if (len > 64)
420  len = 64;
421  BigNumberUtil::unpackBE(u1, NUM_LIMBS_521BIT, (uint8_t *)message, len);
422  }
423 
424  // Compute u1 = hm * s^-1 mod q and u2 = r * s^-1 mod q.
425  recipQ(u2, s);
426  mulQ(u1, u1, u2);
427  mulQ(u2, r, u2);
428 
429  // Compute the curve point R = u2 * publicKey + u1 * G.
430  BigNumberUtil::packBE(t, 66, u2, NUM_LIMBS_521BIT);
431  evaluate(x, y, t);
432  memcpy_P(u2, P521_Gx, sizeof(x));
433  memcpy_P(s, P521_Gy, sizeof(y));
434  BigNumberUtil::packBE(t, 66, u1, NUM_LIMBS_521BIT);
435  evaluate(u2, s, t);
436  addAffine(u2, s, x, y);
437 
438  // If R.x = r mod q, then the signature is valid.
439  BigNumberUtil::reduceQuick_P(u1, u2, P521_q, NUM_LIMBS_521BIT);
440  ok = secure_compare(u1, r, NUM_LIMBS_521BIT * sizeof(limb_t));
441 
442  // Clean up and exit.
443 failed:
444  clean(x);
445  clean(y);
446  clean(r);
447  clean(s);
448  clean(u1);
449  clean(u2);
450  clean(t);
451  return ok;
452 }
453 
466 void P521::generatePrivateKey(uint8_t privateKey[66])
467 {
468  // Generate a random 521-bit value for the private key. The value
469  // must be generated uniformly at random between 1 and q - 1 where q
470  // is the group order (RFC 6090). We use the recommended algorithm
471  // from Appendix B of RFC 6090: generate a random 521-bit value
472  // and discard it if it is not within the range 1 to q - 1.
473  limb_t x[NUM_LIMBS_521BIT];
474  do {
475  RNG.rand((uint8_t *)x, sizeof(x));
476 #if BIGNUMBER_LIMB_8BIT
477  x[NUM_LIMBS_521BIT - 1] &= 0x01;
478 #else
479  x[NUM_LIMBS_521BIT - 1] &= 0x1FF;
480 #endif
481  BigNumberUtil::packBE(privateKey, 66, x, NUM_LIMBS_521BIT);
482  } while (BigNumberUtil::isZero(x, NUM_LIMBS_521BIT) ||
483  !BigNumberUtil::sub_P(x, x, P521_q, NUM_LIMBS_521BIT));
484  clean(x);
485 }
486 
497 void P521::derivePublicKey(uint8_t publicKey[132], const uint8_t privateKey[66])
498 {
499  // Evaluate the curve function starting with the generator.
500  limb_t x[NUM_LIMBS_521BIT];
501  limb_t y[NUM_LIMBS_521BIT];
502  memcpy_P(x, P521_Gx, sizeof(x));
503  memcpy_P(y, P521_Gy, sizeof(y));
504  evaluate(x, y, privateKey);
505 
506  // Pack the (x, y) point into the public key.
507  BigNumberUtil::packBE(publicKey, 66, x, NUM_LIMBS_521BIT);
508  BigNumberUtil::packBE(publicKey + 66, 66, y, NUM_LIMBS_521BIT);
509 
510  // Clean up.
511  clean(x);
512  clean(y);
513 }
514 
524 bool P521::isValidPrivateKey(const uint8_t privateKey[66])
525 {
526  // The value "q" as a byte array from most to least significant.
527  static uint8_t const P521_q_bytes[66] PROGMEM = {
528  0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
529  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
530  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
531  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
532  0xFF, 0xFA, 0x51, 0x86, 0x87, 0x83, 0xBF, 0x2F,
533  0x96, 0x6B, 0x7F, 0xCC, 0x01, 0x48, 0xF7, 0x09,
534  0xA5, 0xD0, 0x3B, 0xB5, 0xC9, 0xB8, 0x89, 0x9C,
535  0x47, 0xAE, 0xBB, 0x6F, 0xB7, 0x1E, 0x91, 0x38,
536  0x64, 0x09
537  };
538  uint8_t zeroTest = 0;
539  uint8_t posn = 66;
540  uint16_t borrow = 0;
541  while (posn > 0) {
542  --posn;
543 
544  // Check for zero.
545  zeroTest |= privateKey[posn];
546 
547  // Subtract P521_q_bytes from the key. If there is no borrow,
548  // then the key value was greater than or equal to q.
549  borrow = ((uint16_t)(privateKey[posn])) -
550  pgm_read_byte(&(P521_q_bytes[posn])) -
551  ((borrow >> 8) & 0x01);
552  }
553  return zeroTest != 0 && borrow != 0;
554 }
555 
564 bool P521::isValidPublicKey(const uint8_t publicKey[132])
565 {
566  limb_t x[NUM_LIMBS_521BIT];
567  limb_t y[NUM_LIMBS_521BIT];
568  BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, publicKey, 66);
569  BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, publicKey + 66, 66);
570  bool ok = validate(x, y);
571  clean(x);
572  clean(y);
573  return ok;
574 }
575 
597 void P521::evaluate(limb_t *x, limb_t *y, const uint8_t f[66])
598 {
599  limb_t x1[NUM_LIMBS_521BIT];
600  limb_t y1[NUM_LIMBS_521BIT];
601  limb_t z1[NUM_LIMBS_521BIT];
602  limb_t x2[NUM_LIMBS_521BIT];
603  limb_t y2[NUM_LIMBS_521BIT];
604  limb_t z2[NUM_LIMBS_521BIT];
605 
606  // We want the input in Jacobian co-ordinates. The point (x, y, z)
607  // corresponds to the affine point (x / z^2, y / z^3), so if we set z
608  // to 1 we end up with Jacobian co-ordinates. Remember that z is 1
609  // and continue on.
610 
611  // Set the answer to the point-at-infinity initially (z = 0).
612  memset(x1, 0, sizeof(x1));
613  memset(y1, 0, sizeof(y1));
614  memset(z1, 0, sizeof(z1));
615 
616  // Special handling for the highest bit. We can skip dblPoint()/addPoint()
617  // and simply conditionally move (x, y, z) into (x1, y1, z1).
618  uint8_t select = (f[0] & 0x01);
619  cmove(select, x1, x);
620  cmove(select, y1, y);
621  cmove1(select, z1); // z = 1
622 
623  // Iterate over the remaining 520 bits of f from highest to lowest.
624  uint8_t mask = 0x80;
625  uint8_t fposn = 1;
626  for (uint16_t t = 520; t > 0; --t) {
627  // Double the answer.
628  dblPoint(x1, y1, z1, x1, y1, z1);
629 
630  // Add (x, y, z) to (x1, y1, z1) for the next 1 bit.
631  // We must always do this to preserve the overall timing.
632  // The z value is always 1 so we can omit that argument.
633  addPoint(x2, y2, z2, x1, y1, z1, x, y/*, z*/);
634 
635  // If the bit was 1, then move (x2, y2, z2) into (x1, y1, z1).
636  select = (f[fposn] & mask);
637  cmove(select, x1, x2);
638  cmove(select, y1, y2);
639  cmove(select, z1, z2);
640 
641  // Move onto the next bit.
642  mask >>= 1;
643  if (!mask) {
644  ++fposn;
645  mask = 0x80;
646  }
647  }
648 
649  // Convert from Jacobian co-ordinates back into affine co-ordinates.
650  // x = x1 * (z1^2)^-1, y = y1 * (z1^3)^-1.
651  recip(x2, z1);
652  square(y2, x2);
653  mul(x, x1, y2);
654  mul(y2, y2, x2);
655  mul(y, y1, y2);
656 
657  // Clean up.
658  clean(x1);
659  clean(y1);
660  clean(z1);
661  clean(x2);
662  clean(y2);
663  clean(z2);
664 }
665 
676 void P521::addAffine(limb_t *x1, limb_t *y1, const limb_t *x2, const limb_t *y2)
677 {
678  limb_t xout[NUM_LIMBS_521BIT];
679  limb_t yout[NUM_LIMBS_521BIT];
680  limb_t zout[NUM_LIMBS_521BIT];
681  limb_t z1[NUM_LIMBS_521BIT];
682 
683  // z1 = 1
684  z1[0] = 1;
685  memset(z1 + 1, 0, (NUM_LIMBS_521BIT - 1) * sizeof(limb_t));
686 
687  // Add the two points.
688  addPoint(xout, yout, zout, x1, y1, z1, x2, y2/*, z2*/);
689 
690  // Convert from Jacobian co-ordinates back into affine co-ordinates.
691  // x1 = xout * (zout^2)^-1, y1 = yout * (zout^3)^-1.
692  recip(z1, zout);
693  square(zout, z1);
694  mul(x1, xout, zout);
695  mul(zout, zout, z1);
696  mul(y1, yout, zout);
697 
698  // Clean up.
699  clean(xout);
700  clean(yout);
701  clean(zout);
702  clean(z1);
703 }
704 
714 bool P521::validate(const limb_t *x, const limb_t *y)
715 {
716  bool result;
717 
718  // If x or y is greater than or equal to 2^521 - 1, then the
719  // point is definitely not on the curve. Preserve timing by
720  // delaying the reporting of the result until later.
721  result = inRange(x);
722  result &= inRange(y);
723 
724  // We need to check that y^2 = x^3 - 3 * x + b mod 2^521 - 1.
725  limb_t t1[NUM_LIMBS_521BIT];
726  limb_t t2[NUM_LIMBS_521BIT];
727  square(t1, x);
728  mul(t1, t1, x);
729  mulLiteral(t2, x, 3);
730  sub(t1, t1, t2);
731  memcpy_P(t2, P521_b, sizeof(t2));
732  add(t1, t1, t2);
733  square(t2, y);
734  result &= secure_compare(t1, t2, sizeof(t1));
735  clean(t1);
736  clean(t2);
737  return result;
738 }
739 
748 bool P521::inRange(const limb_t *x)
749 {
750  // Do a trial subtraction of 2^521 - 1 from x, which is equivalent
751  // to adding 1 and subtracting 2^521. We only need the carry.
752  dlimb_t carry = 1;
753  limb_t word = 0;
754  for (uint8_t index = 0; index < NUM_LIMBS_521BIT; ++index) {
755  carry += *x++;
756  word = (limb_t)carry;
757  carry >>= LIMB_BITS;
758  }
759 
760  // Determine the carry out from the low 521 bits.
761 #if BIGNUMBER_LIMB_8BIT
762  carry = (carry << 7) + (word >> 1);
763 #else
764  carry = (carry << (LIMB_BITS - 9)) + (word >> 9);
765 #endif
766 
767  // If the carry is zero, then x was in range. Otherwise it is out
768  // of range. Check for zero in a way that preserves constant timing.
769  word = (limb_t)(carry | (carry >> LIMB_BITS));
770  word = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - word) >> LIMB_BITS);
771  return (bool)word;
772 }
773 
783 void P521::reduce(limb_t *result, const limb_t *x)
784 {
785 #if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT || BIGNUMBER_LIMB_64BIT
786  // According to NIST FIPS 186-4, we add the high 521 bits to the
787  // low 521 bits and then do a trial subtraction of 2^521 - 1.
788  // We do both in a single step. Subtracting 2^521 - 1 is equivalent
789  // to adding 1 and subtracting 2^521.
790  uint8_t index;
791  const limb_t *xl = x;
792  const limb_t *xh = x + NUM_LIMBS_521BIT;
793  limb_t *rr = result;
794  dlimb_t carry;
795  limb_t word = x[NUM_LIMBS_521BIT - 1];
796  carry = (word >> 9) + 1;
797  word &= 0x1FF;
798  for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) {
799  carry += *xl++;
800  carry += ((dlimb_t)(*xh++)) << (LIMB_BITS - 9);
801  *rr++ = (limb_t)carry;
802  carry >>= LIMB_BITS;
803  }
804  carry += word;
805  carry += ((dlimb_t)(x[NUM_LIMBS_1042BIT - 1])) << (LIMB_BITS - 9);
806  word = (limb_t)carry;
807  *rr = word;
808 
809  // If the carry out was 1, then mask it off and we have the answer.
810  // If the carry out was 0, then we need to add 2^521 - 1 back again.
811  // To preserve the timing we perform a conditional subtract of 1 and
812  // then mask off the high bits.
813  carry = ((word >> 9) ^ 0x01) & 0x01;
814  rr = result;
815  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
816  carry = ((dlimb_t)(*rr)) - carry;
817  *rr++ = (limb_t)carry;
818  carry = (carry >> LIMB_BITS) & 0x01;
819  }
820  *(--rr) &= 0x1FF;
821 #elif BIGNUMBER_LIMB_8BIT
822  // Same as above, but for 8-bit limbs.
823  uint8_t index;
824  const limb_t *xl = x;
825  const limb_t *xh = x + NUM_LIMBS_521BIT;
826  limb_t *rr = result;
827  dlimb_t carry;
828  limb_t word = x[NUM_LIMBS_521BIT - 1];
829  carry = (word >> 1) + 1;
830  word &= 0x01;
831  for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) {
832  carry += *xl++;
833  carry += ((dlimb_t)(*xh++)) << 7;
834  *rr++ = (limb_t)carry;
835  carry >>= LIMB_BITS;
836  }
837  carry += word;
838  carry += ((dlimb_t)(x[NUM_LIMBS_1042BIT - 1])) << 1;
839  word = (limb_t)carry;
840  *rr = word;
841  carry = ((word >> 1) ^ 0x01) & 0x01;
842  rr = result;
843  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
844  carry = ((dlimb_t)(*rr)) - carry;
845  *rr++ = (limb_t)carry;
846  carry = (carry >> LIMB_BITS) & 0x01;
847  }
848  *(--rr) &= 0x01;
849 #else
850  #error "Don't know how to reduce values mod 2^521 - 1"
851 #endif
852 }
853 
866 void P521::reduceQuick(limb_t *x)
867 {
868  // Perform a trial subtraction of 2^521 - 1 from x. This is
869  // equivalent to adding 1 and subtracting 2^521 - 1.
870  uint8_t index;
871  limb_t *xx = x;
872  dlimb_t carry = 1;
873  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
874  carry += *xx;
875  *xx++ = (limb_t)carry;
876  carry >>= LIMB_BITS;
877  }
878 
879  // If the carry out was 1, then mask it off and we have the answer.
880  // If the carry out was 0, then we need to add 2^521 - 1 back again.
881  // To preserve the timing we perform a conditional subtract of 1 and
882  // then mask off the high bits.
883 #if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT || BIGNUMBER_LIMB_64BIT
884  carry = ((x[NUM_LIMBS_521BIT - 1] >> 9) ^ 0x01) & 0x01;
885  xx = x;
886  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
887  carry = ((dlimb_t)(*xx)) - carry;
888  *xx++ = (limb_t)carry;
889  carry = (carry >> LIMB_BITS) & 0x01;
890  }
891  *(--xx) &= 0x1FF;
892 #elif BIGNUMBER_LIMB_8BIT
893  carry = ((x[NUM_LIMBS_521BIT - 1] >> 1) ^ 0x01) & 0x01;
894  xx = x;
895  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
896  carry = ((dlimb_t)(*xx)) - carry;
897  *xx++ = (limb_t)carry;
898  carry = (carry >> LIMB_BITS) & 0x01;
899  }
900  *(--xx) &= 0x01;
901 #endif
902 }
903 
916 void P521::mulNoReduce(limb_t *result, const limb_t *x, const limb_t *y)
917 {
918  uint8_t i, j;
919  dlimb_t carry;
920  limb_t word;
921  const limb_t *yy;
922  limb_t *rr;
923 
924  // Multiply the lowest word of x by y.
925  carry = 0;
926  word = x[0];
927  yy = y;
928  rr = result;
929  for (i = 0; i < NUM_LIMBS_521BIT; ++i) {
930  carry += ((dlimb_t)(*yy++)) * word;
931  *rr++ = (limb_t)carry;
932  carry >>= LIMB_BITS;
933  }
934  *rr = (limb_t)carry;
935 
936  // Multiply and add the remaining words of x by y.
937  for (i = 1; i < NUM_LIMBS_521BIT; ++i) {
938  word = x[i];
939  carry = 0;
940  yy = y;
941  rr = result + i;
942  for (j = 0; j < NUM_LIMBS_521BIT; ++j) {
943  carry += ((dlimb_t)(*yy++)) * word;
944  carry += *rr;
945  *rr++ = (limb_t)carry;
946  carry >>= LIMB_BITS;
947  }
948  *rr = (limb_t)carry;
949  }
950 }
951 
962 void P521::mul(limb_t *result, const limb_t *x, const limb_t *y)
963 {
964  limb_t temp[NUM_LIMBS_1042BIT];
965  mulNoReduce(temp, x, y);
966  reduce(result, temp);
967  strict_clean(temp);
968 }
969 
989 void P521::mulLiteral(limb_t *result, const limb_t *x, limb_t y)
990 {
991  uint8_t index;
992  dlimb_t carry = 0;
993  const limb_t *xx = x;
994  limb_t *rr = result;
995 
996  // Multiply x by the literal and put it into the result array.
997  // We assume that y is small enough that overflow from the
998  // highest limb will not occur during this process.
999  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
1000  carry += ((dlimb_t)(*xx++)) * y;
1001  *rr++ = (limb_t)carry;
1002  carry >>= LIMB_BITS;
1003  }
1004 
1005  // Reduce the value modulo 2^521 - 1. The high half is only a
1006  // single limb, so we can short-cut some of reduce() here.
1007 #if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT || BIGNUMBER_LIMB_64BIT
1008  limb_t word = result[NUM_LIMBS_521BIT - 1];
1009  carry = (word >> 9) + 1;
1010  word &= 0x1FF;
1011  rr = result;
1012  for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) {
1013  carry += *rr;
1014  *rr++ = (limb_t)carry;
1015  carry >>= LIMB_BITS;
1016  }
1017  carry += word;
1018  word = (limb_t)carry;
1019  *rr = word;
1020 
1021  // If the carry out was 1, then mask it off and we have the answer.
1022  // If the carry out was 0, then we need to add 2^521 - 1 back again.
1023  // To preserve the timing we perform a conditional subtract of 1 and
1024  // then mask off the high bits.
1025  carry = ((word >> 9) ^ 0x01) & 0x01;
1026  rr = result;
1027  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
1028  carry = ((dlimb_t)(*rr)) - carry;
1029  *rr++ = (limb_t)carry;
1030  carry = (carry >> LIMB_BITS) & 0x01;
1031  }
1032  *(--rr) &= 0x1FF;
1033 #elif BIGNUMBER_LIMB_8BIT
1034  // Same as above, but for 8-bit limbs.
1035  limb_t word = result[NUM_LIMBS_521BIT - 1];
1036  carry = (word >> 1) + 1;
1037  word &= 0x01;
1038  rr = result;
1039  for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) {
1040  carry += *rr;
1041  *rr++ = (limb_t)carry;
1042  carry >>= LIMB_BITS;
1043  }
1044  carry += word;
1045  word = (limb_t)carry;
1046  *rr = word;
1047  carry = ((word >> 1) ^ 0x01) & 0x01;
1048  rr = result;
1049  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
1050  carry = ((dlimb_t)(*rr)) - carry;
1051  *rr++ = (limb_t)carry;
1052  carry = (carry >> LIMB_BITS) & 0x01;
1053  }
1054  *(--rr) &= 0x01;
1055 #endif
1056 }
1057 
1068 void P521::add(limb_t *result, const limb_t *x, const limb_t *y)
1069 {
1070  dlimb_t carry = 0;
1071  limb_t *rr = result;
1072  for (uint8_t posn = 0; posn < NUM_LIMBS_521BIT; ++posn) {
1073  carry += *x++;
1074  carry += *y++;
1075  *rr++ = (limb_t)carry;
1076  carry >>= LIMB_BITS;
1077  }
1078  reduceQuick(result);
1079 }
1080 
1091 void P521::sub(limb_t *result, const limb_t *x, const limb_t *y)
1092 {
1093  dlimb_t borrow;
1094  uint8_t posn;
1095  limb_t *rr = result;
1096 
1097  // Subtract y from x to generate the intermediate result.
1098  borrow = 0;
1099  for (posn = 0; posn < NUM_LIMBS_521BIT; ++posn) {
1100  borrow = ((dlimb_t)(*x++)) - (*y++) - ((borrow >> LIMB_BITS) & 0x01);
1101  *rr++ = (limb_t)borrow;
1102  }
1103 
1104  // If we had a borrow, then the result has gone negative and we
1105  // have to add 2^521 - 1 to the result to make it positive again.
1106  // The top bits of "borrow" will be all 1's if there is a borrow
1107  // or it will be all 0's if there was no borrow. Easiest is to
1108  // conditionally subtract 1 and then mask off the high bits.
1109  rr = result;
1110  borrow = (borrow >> LIMB_BITS) & 1U;
1111  borrow = ((dlimb_t)(*rr)) - borrow;
1112  *rr++ = (limb_t)borrow;
1113  for (posn = 1; posn < NUM_LIMBS_521BIT; ++posn) {
1114  borrow = ((dlimb_t)(*rr)) - ((borrow >> LIMB_BITS) & 0x01);
1115  *rr++ = (limb_t)borrow;
1116  }
1117 #if BIGNUMBER_LIMB_8BIT
1118  *(--rr) &= 0x01;
1119 #else
1120  *(--rr) &= 0x1FF;
1121 #endif
1122 }
1123 
1139 void P521::dblPoint(limb_t *xout, limb_t *yout, limb_t *zout,
1140  const limb_t *xin, const limb_t *yin,
1141  const limb_t *zin)
1142 {
1143  limb_t alpha[NUM_LIMBS_521BIT];
1144  limb_t beta[NUM_LIMBS_521BIT];
1145  limb_t gamma[NUM_LIMBS_521BIT];
1146  limb_t delta[NUM_LIMBS_521BIT];
1147  limb_t tmp[NUM_LIMBS_521BIT];
1148 
1149  // Double the point. If it is the point at infinity (z = 0),
1150  // then zout will still be zero at the end of this process so
1151  // we don't need any special handling for that case.
1152  square(delta, zin); // delta = z^2
1153  square(gamma, yin); // gamma = y^2
1154  mul(beta, xin, gamma); // beta = x * gamma
1155  sub(tmp, xin, delta); // alpha = 3 * (x - delta) * (x + delta)
1156  mulLiteral(alpha, tmp, 3);
1157  add(tmp, xin, delta);
1158  mul(alpha, alpha, tmp);
1159  square(xout, alpha); // xout = alpha^2 - 8 * beta
1160  mulLiteral(tmp, beta, 8);
1161  sub(xout, xout, tmp);
1162  add(zout, yin, zin); // zout = (y + z)^2 - gamma - delta
1163  square(zout, zout);
1164  sub(zout, zout, gamma);
1165  sub(zout, zout, delta);
1166  mulLiteral(yout, beta, 4);// yout = alpha * (4 * beta - xout) - 8 * gamma^2
1167  sub(yout, yout, xout);
1168  mul(yout, alpha, yout);
1169  square(gamma, gamma);
1170  mulLiteral(gamma, gamma, 8);
1171  sub(yout, yout, gamma);
1172 
1173  // Clean up.
1174  strict_clean(alpha);
1175  strict_clean(beta);
1176  strict_clean(gamma);
1177  strict_clean(delta);
1178  strict_clean(tmp);
1179 }
1180 
1200 void P521::addPoint(limb_t *xout, limb_t *yout, limb_t *zout,
1201  const limb_t *x1, const limb_t *y1,
1202  const limb_t *z1, const limb_t *x2,
1203  const limb_t *y2)
1204 {
1205  limb_t z1z1[NUM_LIMBS_521BIT];
1206  limb_t u2[NUM_LIMBS_521BIT];
1207  limb_t s2[NUM_LIMBS_521BIT];
1208  limb_t h[NUM_LIMBS_521BIT];
1209  limb_t i[NUM_LIMBS_521BIT];
1210  limb_t j[NUM_LIMBS_521BIT];
1211  limb_t r[NUM_LIMBS_521BIT];
1212  limb_t v[NUM_LIMBS_521BIT];
1213 
1214  // Determine if the first value is the point-at-infinity identity element.
1215  // The second z value is always 1 so it cannot be the point-at-infinity.
1216  limb_t p1IsIdentity = BigNumberUtil::isZero(z1, NUM_LIMBS_521BIT);
1217 
1218  // Multiply the points, assuming that z2 = 1.
1219  square(z1z1, z1); // z1z1 = z1^2
1220  mul(u2, x2, z1z1); // u2 = x2 * z1z1
1221  mul(s2, y2, z1); // s2 = y2 * z1 * z1z1
1222  mul(s2, s2, z1z1);
1223  sub(h, u2, x1); // h = u2 - x1
1224  mulLiteral(i, h, 2); // i = (2 * h)^2
1225  square(i, i);
1226  sub(r, s2, y1); // r = 2 * (s2 - y1)
1227  add(r, r, r);
1228  mul(j, h, i); // j = h * i
1229  mul(v, x1, i); // v = x1 * i
1230  square(xout, r); // xout = r^2 - j - 2 * v
1231  sub(xout, xout, j);
1232  sub(xout, xout, v);
1233  sub(xout, xout, v);
1234  sub(yout, v, xout); // yout = r * (v - xout) - 2 * y1 * j
1235  mul(yout, r, yout);
1236  mul(j, y1, j);
1237  sub(yout, yout, j);
1238  sub(yout, yout, j);
1239  mul(zout, z1, h); // zout = 2 * z1 * h
1240  add(zout, zout, zout);
1241 
1242  // Select the answer to return. If (x1, y1, z1) was the identity,
1243  // then the answer is (x2, y2, z2). Otherwise it is (xout, yout, zout).
1244  // Conditionally move the second argument over the output if necessary.
1245  cmove(p1IsIdentity, xout, x2);
1246  cmove(p1IsIdentity, yout, y2);
1247  cmove1(p1IsIdentity, zout); // z2 = 1
1248 
1249  // Clean up.
1250  strict_clean(z1z1);
1251  strict_clean(u2);
1252  strict_clean(s2);
1253  strict_clean(h);
1254  strict_clean(i);
1255  strict_clean(j);
1256  strict_clean(r);
1257  strict_clean(v);
1258 }
1259 
1272 void P521::cmove(limb_t select, limb_t *x, const limb_t *y)
1273 {
1274  uint8_t posn;
1275  limb_t dummy;
1276  limb_t sel;
1277 
1278  // Turn "select" into an all-zeroes or all-ones mask. We don't care
1279  // which bit or bits is set in the original "select" value.
1280  sel = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - select) >> LIMB_BITS);
1281  --sel;
1282 
1283  // Move y into x based on "select".
1284  for (posn = 0; posn < NUM_LIMBS_521BIT; ++posn) {
1285  dummy = sel & (*x ^ *y++);
1286  *x++ ^= dummy;
1287  }
1288 }
1289 
1301 void P521::cmove1(limb_t select, limb_t *x)
1302 {
1303  uint8_t posn;
1304  limb_t dummy;
1305  limb_t sel;
1306 
1307  // Turn "select" into an all-zeroes or all-ones mask. We don't care
1308  // which bit or bits is set in the original "select" value.
1309  sel = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - select) >> LIMB_BITS);
1310  --sel;
1311 
1312  // Move 1 into x based on "select".
1313  dummy = sel & (*x ^ 1);
1314  *x++ ^= dummy;
1315  for (posn = 1; posn < NUM_LIMBS_521BIT; ++posn) {
1316  dummy = sel & *x;
1317  *x++ ^= dummy;
1318  }
1319 }
1320 
1329 void P521::recip(limb_t *result, const limb_t *x)
1330 {
1331  limb_t t1[NUM_LIMBS_521BIT];
1332 
1333  // The reciprocal is the same as x ^ (p - 2) where p = 2^521 - 1.
1334  // The big-endian hexadecimal expansion of (p - 2) is:
1335  // 01FF FFFFFFF FFFFFFFF ... FFFFFFFF FFFFFFFD
1336  //
1337  // The naive implementation needs to do 2 multiplications per 1 bit and
1338  // 1 multiplication per 0 bit. We can improve upon this by creating a
1339  // pattern 1111 and then shifting and multiplying to create 11111111,
1340  // and then 1111111111111111, and so on for the top 512-bits.
1341 
1342  // Build a 4-bit pattern 1111 in the result.
1343  square(result, x);
1344  mul(result, result, x);
1345  square(result, result);
1346  mul(result, result, x);
1347  square(result, result);
1348  mul(result, result, x);
1349 
1350  // Shift and multiply by increasing powers of two. This turns
1351  // 1111 into 11111111, and then 1111111111111111, and so on.
1352  for (size_t power = 4; power <= 256; power <<= 1) {
1353  square(t1, result);
1354  for (size_t temp = 1; temp < power; ++temp)
1355  square(t1, t1);
1356  mul(result, result, t1);
1357  }
1358 
1359  // Handle the 9 lowest bits of (p - 2), 111111101, from highest to lowest.
1360  for (uint8_t index = 0; index < 7; ++index) {
1361  square(result, result);
1362  mul(result, result, x);
1363  }
1364  square(result, result);
1365  square(result, result);
1366  mul(result, result, x);
1367 
1368  // Clean up.
1369  clean(t1);
1370 }
1371 
1380 void P521::reduceQ(limb_t *result, const limb_t *r)
1381 {
1382  // Algorithm from: http://en.wikipedia.org/wiki/Barrett_reduction
1383  //
1384  // We assume that r is less than or equal to (q - 1)^2.
1385  //
1386  // We want to compute result = r mod q. Find the smallest k such
1387  // that 2^k > q. In our case, k = 521. Then set m = floor(4^k / q)
1388  // and let r = r - q * floor(m * r / 4^k). This will be the result
1389  // or it will be at most one subtraction of q away from the result.
1390  //
1391  // Note: m is a 522-bit number, which fits in the same number of limbs
1392  // as a 521-bit number assuming that limbs are 8 bits or more in size.
1393  static limb_t const numM[NUM_LIMBS_521BIT] PROGMEM = {
1394  LIMB_PAIR(0x6EC79BF7, 0x449048E1), LIMB_PAIR(0x7663B851, 0xC44A3647),
1395  LIMB_PAIR(0x08F65A2F, 0x8033FEB7), LIMB_PAIR(0x40D06994, 0xAE79787C),
1396  LIMB_PAIR(0x00000005, 0x00000000), LIMB_PAIR(0x00000000, 0x00000000),
1397  LIMB_PAIR(0x00000000, 0x00000000), LIMB_PAIR(0x00000000, 0x00000000),
1398  LIMB_PARTIAL(0x200)
1399  };
1400  limb_t temp[NUM_LIMBS_1042BIT + NUM_LIMBS_521BIT];
1401  limb_t temp2[NUM_LIMBS_521BIT];
1402 
1403  // Multiply r by m.
1404  BigNumberUtil::mul_P(temp, r, NUM_LIMBS_1042BIT, numM, NUM_LIMBS_521BIT);
1405 
1406  // Compute (m * r / 4^521) = (m * r / 2^1042).
1407 #if BIGNUMBER_LIMB_8BIT || BIGNUMBER_LIMB_16BIT
1408  dlimb_t carry = temp[NUM_LIMBS_BITS(1040)] >> 2;
1409  for (uint8_t index = 0; index < NUM_LIMBS_521BIT; ++index) {
1410  carry += ((dlimb_t)(temp[NUM_LIMBS_BITS(1040) + index + 1])) << (LIMB_BITS - 2);
1411  temp2[index] = (limb_t)carry;
1412  carry >>= LIMB_BITS;
1413  }
1414 #elif BIGNUMBER_LIMB_32BIT || BIGNUMBER_LIMB_64BIT
1415  dlimb_t carry = temp[NUM_LIMBS_BITS(1024)] >> 18;
1416  for (uint8_t index = 0; index < NUM_LIMBS_521BIT; ++index) {
1417  carry += ((dlimb_t)(temp[NUM_LIMBS_BITS(1024) + index + 1])) << (LIMB_BITS - 18);
1418  temp2[index] = (limb_t)carry;
1419  carry >>= LIMB_BITS;
1420  }
1421 #endif
1422 
1423  // Multiply (m * r) / 2^1042 by q and subtract it from r.
1424  // We can ignore the high words of the subtraction result
1425  // because they will all turn into zero after the subtraction.
1426  BigNumberUtil::mul_P(temp, temp2, NUM_LIMBS_521BIT,
1427  P521_q, NUM_LIMBS_521BIT);
1428  BigNumberUtil::sub(result, r, temp, NUM_LIMBS_521BIT);
1429 
1430  // Perform a trial subtraction of q from the result to reduce it.
1431  BigNumberUtil::reduceQuick_P(result, result, P521_q, NUM_LIMBS_521BIT);
1432 
1433  // Clean up and exit.
1434  clean(temp);
1435  clean(temp2);
1436 }
1437 
1448 void P521::mulQ(limb_t *result, const limb_t *x, const limb_t *y)
1449 {
1450  limb_t temp[NUM_LIMBS_1042BIT];
1451  mulNoReduce(temp, x, y);
1452  reduceQ(result, temp);
1453  strict_clean(temp);
1454 }
1455 
1464 void P521::recipQ(limb_t *result, const limb_t *x)
1465 {
1466  // Bottom 265 bits of q - 2. The top 256 bits are all-1's.
1467  static limb_t const P521_q_m2[] PROGMEM = {
1468  LIMB_PAIR(0x91386407, 0xbb6fb71e), LIMB_PAIR(0x899c47ae, 0x3bb5c9b8),
1469  LIMB_PAIR(0xf709a5d0, 0x7fcc0148), LIMB_PAIR(0xbf2f966b, 0x51868783),
1470  LIMB_PARTIAL(0x1fa)
1471  };
1472 
1473  // Raise x to the power of q - 2, mod q. We start with the top
1474  // 256 bits which are all-1's, using a similar technique to recip().
1475  limb_t t1[NUM_LIMBS_521BIT];
1476  mulQ(result, x, x);
1477  mulQ(result, result, x);
1478  mulQ(result, result, result);
1479  mulQ(result, result, x);
1480  mulQ(result, result, result);
1481  mulQ(result, result, x);
1482  for (size_t power = 4; power <= 128; power <<= 1) {
1483  mulQ(t1, result, result);
1484  for (size_t temp = 1; temp < power; ++temp)
1485  mulQ(t1, t1, t1);
1486  mulQ(result, result, t1);
1487  }
1488  clean(t1);
1489 
1490  // Deal with the bottom 265 bits from highest to lowest. Square for
1491  // each bit and multiply in x whenever there is a 1 bit. The timing
1492  // is based on the publicly-known constant q - 2, not on the value of x.
1493  size_t bit = 265;
1494  while (bit > 0) {
1495  --bit;
1496  mulQ(result, result, result);
1497  if (pgm_read_limb(&(P521_q_m2[bit / LIMB_BITS])) &
1498  (((limb_t)1) << (bit % LIMB_BITS))) {
1499  mulQ(result, result, x);
1500  }
1501  }
1502 }
1503 
1514 void P521::generateK(uint8_t k[66], const uint8_t hm[66],
1515  const uint8_t x[66], Hash *hash, uint64_t count)
1516 {
1517  size_t hlen = hash->hashSize();
1518  uint8_t V[64];
1519  uint8_t K[64];
1520  uint8_t marker;
1521 
1522  // If for some reason a hash function was supplied with more than
1523  // 512 bits of output, truncate hash values to the first 512 bits.
1524  // We cannot support more than this yet.
1525  if (hlen > 64)
1526  hlen = 64;
1527 
1528  // RFC 6979, Section 3.2, Step a. Hash the message, reduce modulo q,
1529  // and produce an octet string the same length as q, bits2octets(H(m)).
1530  // We support hashes up to 512 bits and q is a 521-bit number, so "hm"
1531  // is already the bits2octets(H(m)) value that we need.
1532 
1533  // Steps b and c. Set V to all-ones and K to all-zeroes.
1534  memset(V, 0x01, hlen);
1535  memset(K, 0x00, hlen);
1536 
1537  // Step d. K = HMAC_K(V || 0x00 || x || hm). We make a small
1538  // modification here to append the count value if it is non-zero.
1539  // We use this to generate a new k if we have to re-enter this
1540  // function because the previous one was rejected by sign().
1541  // This is slightly different to RFC 6979 which says that the
1542  // loop in step h below should be continued. That code path is
1543  // difficult to access, so instead modify K and V in steps d and f.
1544  // This alternative construction is compatible with the second
1545  // variant described in section 3.6 of RFC 6979.
1546  hash->resetHMAC(K, hlen);
1547  hash->update(V, hlen);
1548  marker = 0x00;
1549  hash->update(&marker, 1);
1550  hash->update(x, 66);
1551  hash->update(hm, 66);
1552  if (count)
1553  hash->update(&count, sizeof(count));
1554  hash->finalizeHMAC(K, hlen, K, hlen);
1555 
1556  // Step e. V = HMAC_K(V)
1557  hash->resetHMAC(K, hlen);
1558  hash->update(V, hlen);
1559  hash->finalizeHMAC(K, hlen, V, hlen);
1560 
1561  // Step f. K = HMAC_K(V || 0x01 || x || hm)
1562  hash->resetHMAC(K, hlen);
1563  hash->update(V, hlen);
1564  marker = 0x01;
1565  hash->update(&marker, 1);
1566  hash->update(x, 66);
1567  hash->update(hm, 66);
1568  if (count)
1569  hash->update(&count, sizeof(count));
1570  hash->finalizeHMAC(K, hlen, K, hlen);
1571 
1572  // Step g. V = HMAC_K(V)
1573  hash->resetHMAC(K, hlen);
1574  hash->update(V, hlen);
1575  hash->finalizeHMAC(K, hlen, V, hlen);
1576 
1577  // Step h. Generate candidate k values until we find what we want.
1578  for (;;) {
1579  // Step h.1 and h.2. Generate a string of 66 bytes in length.
1580  // T = empty
1581  // while (len(T) < 66)
1582  // V = HMAC_K(V)
1583  // T = T || V
1584  size_t posn = 0;
1585  while (posn < 66) {
1586  size_t temp = 66 - posn;
1587  if (temp > hlen)
1588  temp = hlen;
1589  hash->resetHMAC(K, hlen);
1590  hash->update(V, hlen);
1591  hash->finalizeHMAC(K, hlen, V, hlen);
1592  memcpy(k + posn, V, temp);
1593  posn += temp;
1594  }
1595 
1596  // Step h.3. k = bits2int(T) and exit the loop if k is not in
1597  // the range 1 to q - 1. Note: We have to extract the 521 most
1598  // significant bits of T, which means shifting it right by seven
1599  // bits to put it into the correct form.
1600  for (posn = 65; posn > 0; --posn)
1601  k[posn] = (k[posn - 1] << 1) | (k[posn] >> 7);
1602  k[0] >>= 7;
1603  if (isValidPrivateKey(k))
1604  break;
1605 
1606  // Generate new K and V values and try again.
1607  // K = HMAC_K(V || 0x00)
1608  // V = HMAC_K(V)
1609  hash->resetHMAC(K, hlen);
1610  hash->update(V, hlen);
1611  marker = 0x00;
1612  hash->update(&marker, 1);
1613  hash->finalizeHMAC(K, hlen, K, hlen);
1614  hash->resetHMAC(K, hlen);
1615  hash->update(V, hlen);
1616  hash->finalizeHMAC(K, hlen, V, hlen);
1617  }
1618 
1619  // Clean up.
1620  clean(V);
1621  clean(K);
1622 }
1623 
1636 void P521::generateK(uint8_t k[66], const uint8_t hm[66],
1637  const uint8_t x[66], uint64_t count)
1638 {
1639  SHA512 hash;
1640  generateK(k, hm, x, &hash, count);
1641 }
static void reduceQuick_P(limb_t *result, const limb_t *x, const limb_t *y, size_t size)
Reduces x modulo y using subtraction where y is in program memory.
static bool eval(uint8_t result[132], const uint8_t f[66], const uint8_t point[132])
Evaluates the curve function.
Definition: P521.cpp:135
static limb_t add(limb_t *result, const limb_t *x, const limb_t *y, size_t size)
Adds two big numbers.
static void generatePrivateKey(uint8_t privateKey[66])
Generates a private key for P-521 signing operations.
Definition: P521.cpp:466
static limb_t sub_P(limb_t *result, const limb_t *x, const limb_t *y, size_t size)
Subtracts one big number from another where one is in program memory.
void rand(uint8_t *data, size_t len)
Generates random bytes into a caller-supplied buffer.
Definition: RNG.cpp:516
static bool dh2(const uint8_t k[132], uint8_t f[66])
Performs phase 2 of an ECDH key exchange using P-521.
Definition: P521.cpp:229
Abstract base class for cryptographic hash algorithms.
Definition: Hash.h:29
virtual void finalizeHMAC(const void *key, size_t keyLen, void *hash, size_t hashLen)=0
Finalizes the HMAC hashing process and returns the hash.
static bool isValidPrivateKey(const uint8_t privateKey[66])
Validates a private key value to ensure that it is between 1 and q - 1.
Definition: P521.cpp:524
SHA-512 hash algorithm.
Definition: SHA512.h:30
static void derivePublicKey(uint8_t publicKey[132], const uint8_t privateKey[66])
Derives the public key from a private key for P-521 signing operations.
Definition: P521.cpp:497
static void sign(uint8_t signature[132], const uint8_t privateKey[66], const void *message, size_t len, Hash *hash=0)
Signs a message using a specific P-521 private key.
Definition: P521.cpp:276
static limb_t sub(limb_t *result, const limb_t *x, const limb_t *y, size_t size)
Subtracts one big number from another.
virtual void reset()=0
Resets the hash ready for a new hashing process.
static void dh1(uint8_t k[132], uint8_t f[66])
Performs phase 1 of an ECDH key exchange using P-521.
Definition: P521.cpp:208
static void mul_P(limb_t *result, const limb_t *x, size_t xcount, const limb_t *y, size_t ycount)
Multiplies two big numbers where one is in program memory.
static void packBE(uint8_t *bytes, size_t len, const limb_t *limbs, size_t count)
Packs the big-endian byte representation of a big number into a byte array.
virtual void resetHMAC(const void *key, size_t keyLen)=0
Resets the hash ready for a new HMAC hashing process.
static bool verify(const uint8_t signature[132], const uint8_t publicKey[132], const void *message, size_t len, Hash *hash=0)
Verifies a signature using a specific P-521 public key.
Definition: P521.cpp:373
static bool isValidPublicKey(const uint8_t publicKey[132])
Validates a public key to ensure that it is a valid curve point.
Definition: P521.cpp:564
static void unpackBE(limb_t *limbs, size_t count, const uint8_t *bytes, size_t len)
Unpacks the big-endian byte representation of a big number into a limb array.
virtual size_t hashSize() const =0
Size of the hash result from finalize().
virtual void update(const void *data, size_t len)=0
Updates the hash with more data.
virtual void finalize(void *hash, size_t len)=0
Finalizes the hashing process and returns the hash.
static limb_t isZero(const limb_t *x, size_t size)
Determine if a big number is zero.