ArduinoLibs
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
P521.cpp
1 /*
2  * Copyright (C) 2016 Southern Storm Software, Pty Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "P521.h"
24 #include "Crypto.h"
25 #include "RNG.h"
26 #include "SHA512.h"
27 #include "utility/LimbUtil.h"
28 #include <string.h>
29 #include <Arduino.h>
30 
49 // Number of limbs that are needed to represent a 521-bit number.
50 #define NUM_LIMBS_521BIT NUM_LIMBS_BITS(521)
51 
52 // Number of limbs that are needed to represent a 1042-bit number.
53 // To simply things we also require that this be twice the size of
54 // NUM_LIMB_521BIT which involves a little wastage at the high end
55 // of one extra limb for 8-bit and 32-bit limbs. There is no
56 // wastage for 16-bit limbs.
57 #define NUM_LIMBS_1042BIT (NUM_LIMBS_BITS(521) * 2)
58 
59 // The overhead of clean() calls in mul(), etc can add up to a lot of
60 // processing time. Only do such cleanups if strict mode has been enabled.
61 #if defined(P521_STRICT_CLEAN)
62 #define strict_clean(x) clean(x)
63 #else
64 #define strict_clean(x) do { ; } while (0)
65 #endif
66 
67 // Expand the partial 9-bit left over limb at the top of a 521-bit number.
68 #if BIGNUMBER_LIMB_8BIT
69 #define LIMB_PARTIAL(value) ((uint8_t)(value)), \
70  ((uint8_t)((value) >> 8))
71 #else
72 #define LIMB_PARTIAL(value) (value)
73 #endif
74 
77 // The group order "q" value from RFC 4754 and RFC 5903. This is the
78 // same as the "n" value from Appendix D.1.2.5 of NIST FIPS 186-4.
79 static limb_t const P521_q[NUM_LIMBS_521BIT] PROGMEM = {
80  LIMB(0x91386409), LIMB(0xbb6fb71e), LIMB(0x899c47ae), LIMB(0x3bb5c9b8),
81  LIMB(0xf709a5d0), LIMB(0x7fcc0148), LIMB(0xbf2f966b), LIMB(0x51868783),
82  LIMB(0xfffffffa), LIMB(0xffffffff), LIMB(0xffffffff), LIMB(0xffffffff),
83  LIMB(0xffffffff), LIMB(0xffffffff), LIMB(0xffffffff), LIMB(0xffffffff),
84  LIMB_PARTIAL(0x1ff)
85 };
86 
87 // The "b" value from Appendix D.1.2.5 of NIST FIPS 186-4.
88 static limb_t const P521_b[NUM_LIMBS_521BIT] PROGMEM = {
89  LIMB(0x6b503f00), LIMB(0xef451fd4), LIMB(0x3d2c34f1), LIMB(0x3573df88),
90  LIMB(0x3bb1bf07), LIMB(0x1652c0bd), LIMB(0xec7e937b), LIMB(0x56193951),
91  LIMB(0x8ef109e1), LIMB(0xb8b48991), LIMB(0x99b315f3), LIMB(0xa2da725b),
92  LIMB(0xb68540ee), LIMB(0x929a21a0), LIMB(0x8e1c9a1f), LIMB(0x953eb961),
93  LIMB_PARTIAL(0x051)
94 };
95 
96 // The "Gx" value from Appendix D.1.2.5 of NIST FIPS 186-4.
97 static limb_t const P521_Gx[NUM_LIMBS_521BIT] PROGMEM = {
98  LIMB(0xc2e5bd66), LIMB(0xf97e7e31), LIMB(0x856a429b), LIMB(0x3348b3c1),
99  LIMB(0xa2ffa8de), LIMB(0xfe1dc127), LIMB(0xefe75928), LIMB(0xa14b5e77),
100  LIMB(0x6b4d3dba), LIMB(0xf828af60), LIMB(0x053fb521), LIMB(0x9c648139),
101  LIMB(0x2395b442), LIMB(0x9e3ecb66), LIMB(0x0404e9cd), LIMB(0x858e06b7),
102  LIMB_PARTIAL(0x0c6)
103 };
104 
105 // The "Gy" value from Appendix D.1.2.5 of NIST FIPS 186-4.
106 static limb_t const P521_Gy[NUM_LIMBS_521BIT] PROGMEM = {
107  LIMB(0x9fd16650), LIMB(0x88be9476), LIMB(0xa272c240), LIMB(0x353c7086),
108  LIMB(0x3fad0761), LIMB(0xc550b901), LIMB(0x5ef42640), LIMB(0x97ee7299),
109  LIMB(0x273e662c), LIMB(0x17afbd17), LIMB(0x579b4468), LIMB(0x98f54449),
110  LIMB(0x2c7d1bd9), LIMB(0x5c8a5fb4), LIMB(0x9a3bc004), LIMB(0x39296a78),
111  LIMB_PARTIAL(0x118)
112 };
113 
136 bool P521::eval(uint8_t result[132], const uint8_t f[66], const uint8_t point[132])
137 {
138  limb_t x[NUM_LIMBS_521BIT];
139  limb_t y[NUM_LIMBS_521BIT];
140  bool ok;
141 
142  // Unpack the curve point from the parameters and validate it.
143  if (point) {
144  BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, point, 66);
145  BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, point + 66, 66);
146  ok = validate(x, y);
147  } else {
148  memcpy_P(x, P521_Gx, sizeof(x));
149  memcpy_P(y, P521_Gy, sizeof(y));
150  ok = true;
151  }
152 
153  // Evaluate the curve function.
154  evaluate(x, y, f);
155 
156  // Pack the answer into the result array.
157  BigNumberUtil::packBE(result, 66, x, NUM_LIMBS_521BIT);
158  BigNumberUtil::packBE(result + 66, 66, y, NUM_LIMBS_521BIT);
159 
160  // Clean up.
161  clean(x);
162  clean(y);
163  return ok;
164 }
165 
209 void P521::dh1(uint8_t k[132], uint8_t f[66])
210 {
212  derivePublicKey(k, f);
213 }
214 
230 bool P521::dh2(const uint8_t k[132], uint8_t f[66])
231 {
232  // Unpack the (x, y) point from k.
233  limb_t x[NUM_LIMBS_521BIT];
234  limb_t y[NUM_LIMBS_521BIT];
235  BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, k, 66);
236  BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, k + 66, 66);
237 
238  // Validate the curve point. We keep going to preserve the timing.
239  bool ok = validate(x, y);
240 
241  // Evaluate the curve function.
242  evaluate(x, y, f);
243 
244  // The secret key is the x component of the final value.
245  BigNumberUtil::packBE(f, 66, x, NUM_LIMBS_521BIT);
246 
247  // Clean up.
248  clean(x);
249  clean(y);
250  return ok;
251 }
252 
277 void P521::sign(uint8_t signature[132], const uint8_t privateKey[66],
278  const void *message, size_t len, Hash *hash)
279 {
280  uint8_t hm[66];
281  uint8_t k[66];
282  limb_t x[NUM_LIMBS_521BIT];
283  limb_t y[NUM_LIMBS_521BIT];
284  limb_t t[NUM_LIMBS_521BIT];
285  uint64_t count = 0;
286 
287  // Format the incoming message, hashing it if necessary.
288  if (hash) {
289  // Hash the message.
290  hash->reset();
291  hash->update(message, len);
292  len = hash->hashSize();
293  if (len > 64)
294  len = 64;
295  memset(hm, 0, 66 - len);
296  hash->finalize(hm + 66 - len, len);
297  } else {
298  // The message is the hash.
299  if (len > 64)
300  len = 64;
301  memset(hm, 0, 66 - len);
302  memcpy(hm + 66 - len, message, len);
303  }
304 
305  // Keep generating k values until both r and s are non-zero.
306  for (;;) {
307  // Generate the k value deterministically according to RFC 6979.
308  if (hash)
309  generateK(k, hm, privateKey, hash, count);
310  else
311  generateK(k, hm, privateKey, count);
312 
313  // Generate r = kG.x mod q.
314  memcpy_P(x, P521_Gx, sizeof(x));
315  memcpy_P(y, P521_Gy, sizeof(y));
316  evaluate(x, y, k);
317  BigNumberUtil::reduceQuick_P(x, x, P521_q, NUM_LIMBS_521BIT);
318  BigNumberUtil::packBE(signature, 66, x, NUM_LIMBS_521BIT);
319 
320  // If r is zero, then we need to generate a new k value.
321  // This is utterly improbable, but let's be safe anyway.
322  if (BigNumberUtil::isZero(x, NUM_LIMBS_521BIT)) {
323  ++count;
324  continue;
325  }
326 
327  // Generate s = (privateKey * r + hm) / k mod q.
328  BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, privateKey, 66);
329  mulQ(y, y, x);
330  BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, hm, 66);
331  BigNumberUtil::add(x, x, y, NUM_LIMBS_521BIT);
332  BigNumberUtil::reduceQuick_P(x, x, P521_q, NUM_LIMBS_521BIT);
333  BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, k, 66);
334  recipQ(t, y);
335  mulQ(x, x, t);
336  BigNumberUtil::packBE(signature + 66, 66, x, NUM_LIMBS_521BIT);
337 
338  // Exit the loop if s is non-zero.
339  if (!BigNumberUtil::isZero(x, NUM_LIMBS_521BIT))
340  break;
341 
342  // We need to generate a new k value according to RFC 6979.
343  // This is utterly improbable, but let's be safe anyway.
344  ++count;
345  }
346 
347  // Clean up.
348  clean(hm);
349  clean(k);
350  clean(x);
351  clean(y);
352  clean(t);
353 }
354 
374 bool P521::verify(const uint8_t signature[132],
375  const uint8_t publicKey[132],
376  const void *message, size_t len, Hash *hash)
377 {
378  limb_t x[NUM_LIMBS_521BIT];
379  limb_t y[NUM_LIMBS_521BIT];
380  limb_t r[NUM_LIMBS_521BIT];
381  limb_t s[NUM_LIMBS_521BIT];
382  limb_t u1[NUM_LIMBS_521BIT];
383  limb_t u2[NUM_LIMBS_521BIT];
384  uint8_t t[66];
385  bool ok = false;
386 
387  // Because we are operating on public values, we don't need to
388  // be as strict about constant time. Bail out early if there
389  // is a problem with the parameters.
390 
391  // Unpack the signature. The values must be between 1 and q - 1.
392  BigNumberUtil::unpackBE(r, NUM_LIMBS_521BIT, signature, 66);
393  BigNumberUtil::unpackBE(s, NUM_LIMBS_521BIT, signature + 66, 66);
394  if (BigNumberUtil::isZero(r, NUM_LIMBS_521BIT) ||
395  BigNumberUtil::isZero(s, NUM_LIMBS_521BIT) ||
396  !BigNumberUtil::sub_P(x, r, P521_q, NUM_LIMBS_521BIT) ||
397  !BigNumberUtil::sub_P(x, s, P521_q, NUM_LIMBS_521BIT)) {
398  goto failed;
399  }
400 
401  // Unpack the public key and check that it is a valid curve point.
402  BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, publicKey, 66);
403  BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, publicKey + 66, 66);
404  if (!validate(x, y)) {
405  goto failed;
406  }
407 
408  // Hash the message to generate hm, which we store into u1.
409  if (hash) {
410  // Hash the message.
411  hash->reset();
412  hash->update(message, len);
413  len = hash->hashSize();
414  if (len > 64)
415  len = 64;
416  hash->finalize(u2, len);
417  BigNumberUtil::unpackBE(u1, NUM_LIMBS_521BIT, (uint8_t *)u2, len);
418  } else {
419  // The message is the hash.
420  if (len > 64)
421  len = 64;
422  BigNumberUtil::unpackBE(u1, NUM_LIMBS_521BIT, (uint8_t *)message, len);
423  }
424 
425  // Compute u1 = hm * s^-1 mod q and u2 = r * s^-1 mod q.
426  recipQ(u2, s);
427  mulQ(u1, u1, u2);
428  mulQ(u2, r, u2);
429 
430  // Compute the curve point R = u2 * publicKey + u1 * G.
431  BigNumberUtil::packBE(t, 66, u2, NUM_LIMBS_521BIT);
432  evaluate(x, y, t);
433  memcpy_P(u2, P521_Gx, sizeof(x));
434  memcpy_P(s, P521_Gy, sizeof(y));
435  BigNumberUtil::packBE(t, 66, u1, NUM_LIMBS_521BIT);
436  evaluate(u2, s, t);
437  addAffine(u2, s, x, y);
438 
439  // If R.x = r mod q, then the signature is valid.
440  BigNumberUtil::reduceQuick_P(u1, u2, P521_q, NUM_LIMBS_521BIT);
441  ok = secure_compare(u1, r, NUM_LIMBS_521BIT * sizeof(limb_t));
442 
443  // Clean up and exit.
444 failed:
445  clean(x);
446  clean(y);
447  clean(r);
448  clean(s);
449  clean(u1);
450  clean(u2);
451  clean(t);
452  return ok;
453 }
454 
467 void P521::generatePrivateKey(uint8_t privateKey[66])
468 {
469  // Generate a random 521-bit value for the private key. The value
470  // must be generated uniformly at random between 1 and q - 1 where q
471  // is the group order (RFC 6090). We use the recommended algorithm
472  // from Appendix B of RFC 6090: generate a random 521-bit value
473  // and discard it if it is not within the range 1 to q - 1.
474  limb_t x[NUM_LIMBS_521BIT];
475  do {
476  RNG.rand((uint8_t *)x, sizeof(x));
477 #if BIGNUMBER_LIMB_8BIT
478  x[NUM_LIMBS_521BIT - 1] &= 0x01;
479 #else
480  x[NUM_LIMBS_521BIT - 1] &= 0x1FF;
481 #endif
482  BigNumberUtil::packBE(privateKey, 66, x, NUM_LIMBS_521BIT);
483  } while (BigNumberUtil::isZero(x, NUM_LIMBS_521BIT) ||
484  !BigNumberUtil::sub_P(x, x, P521_q, NUM_LIMBS_521BIT));
485  clean(x);
486 }
487 
498 void P521::derivePublicKey(uint8_t publicKey[132], const uint8_t privateKey[66])
499 {
500  // Evaluate the curve function starting with the generator.
501  limb_t x[NUM_LIMBS_521BIT];
502  limb_t y[NUM_LIMBS_521BIT];
503  memcpy_P(x, P521_Gx, sizeof(x));
504  memcpy_P(y, P521_Gy, sizeof(y));
505  evaluate(x, y, privateKey);
506 
507  // Pack the (x, y) point into the public key.
508  BigNumberUtil::packBE(publicKey, 66, x, NUM_LIMBS_521BIT);
509  BigNumberUtil::packBE(publicKey + 66, 66, y, NUM_LIMBS_521BIT);
510 
511  // Clean up.
512  clean(x);
513  clean(y);
514 }
515 
525 bool P521::isValidPrivateKey(const uint8_t privateKey[66])
526 {
527  // The value "q" as a byte array from most to least significant.
528  static uint8_t const P521_q_bytes[66] PROGMEM = {
529  0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
530  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
531  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
532  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
533  0xFF, 0xFA, 0x51, 0x86, 0x87, 0x83, 0xBF, 0x2F,
534  0x96, 0x6B, 0x7F, 0xCC, 0x01, 0x48, 0xF7, 0x09,
535  0xA5, 0xD0, 0x3B, 0xB5, 0xC9, 0xB8, 0x89, 0x9C,
536  0x47, 0xAE, 0xBB, 0x6F, 0xB7, 0x1E, 0x91, 0x38,
537  0x64, 0x09
538  };
539  uint8_t zeroTest = 0;
540  uint8_t posn = 66;
541  uint16_t borrow = 0;
542  while (posn > 0) {
543  --posn;
544 
545  // Check for zero.
546  zeroTest |= privateKey[posn];
547 
548  // Subtract P521_q_bytes from the key. If there is no borrow,
549  // then the key value was greater than or equal to q.
550  borrow = ((uint16_t)(privateKey[posn])) -
551  pgm_read_byte(&(P521_q_bytes[posn])) -
552  ((borrow >> 8) & 0x01);
553  }
554  return zeroTest != 0 && borrow != 0;
555 }
556 
565 bool P521::isValidPublicKey(const uint8_t publicKey[132])
566 {
567  limb_t x[NUM_LIMBS_521BIT];
568  limb_t y[NUM_LIMBS_521BIT];
569  BigNumberUtil::unpackBE(x, NUM_LIMBS_521BIT, publicKey, 66);
570  BigNumberUtil::unpackBE(y, NUM_LIMBS_521BIT, publicKey + 66, 66);
571  bool ok = validate(x, y);
572  clean(x);
573  clean(y);
574  return ok;
575 }
576 
598 void P521::evaluate(limb_t *x, limb_t *y, const uint8_t f[66])
599 {
600  limb_t x1[NUM_LIMBS_521BIT];
601  limb_t y1[NUM_LIMBS_521BIT];
602  limb_t z1[NUM_LIMBS_521BIT];
603  limb_t x2[NUM_LIMBS_521BIT];
604  limb_t y2[NUM_LIMBS_521BIT];
605  limb_t z2[NUM_LIMBS_521BIT];
606 
607  // We want the input in Jacobian co-ordinates. The point (x, y, z)
608  // corresponds to the affine point (x / z^2, y / z^3), so if we set z
609  // to 1 we end up with Jacobian co-ordinates. Remember that z is 1
610  // and continue on.
611 
612  // Set the answer to the point-at-infinity initially (z = 0).
613  memset(x1, 0, sizeof(x1));
614  memset(y1, 0, sizeof(y1));
615  memset(z1, 0, sizeof(z1));
616 
617  // Special handling for the highest bit. We can skip dblPoint()/addPoint()
618  // and simply conditionally move (x, y, z) into (x1, y1, z1).
619  uint8_t select = (f[0] & 0x01);
620  cmove(select, x1, x);
621  cmove(select, y1, y);
622  cmove1(select, z1); // z = 1
623 
624  // Iterate over the remaining 520 bits of f from highest to lowest.
625  uint8_t mask = 0x80;
626  uint8_t fposn = 1;
627  for (uint16_t t = 520; t > 0; --t) {
628  // Double the answer.
629  dblPoint(x1, y1, z1, x1, y1, z1);
630 
631  // Add (x, y, z) to (x1, y1, z1) for the next 1 bit.
632  // We must always do this to preserve the overall timing.
633  // The z value is always 1 so we can omit that argument.
634  addPoint(x2, y2, z2, x1, y1, z1, x, y/*, z*/);
635 
636  // If the bit was 1, then move (x2, y2, z2) into (x1, y1, z1).
637  select = (f[fposn] & mask);
638  cmove(select, x1, x2);
639  cmove(select, y1, y2);
640  cmove(select, z1, z2);
641 
642  // Move onto the next bit.
643  mask >>= 1;
644  if (!mask) {
645  ++fposn;
646  mask = 0x80;
647  }
648  }
649 
650  // Convert from Jacobian co-ordinates back into affine co-ordinates.
651  // x = x1 * (z1^2)^-1, y = y1 * (z1^3)^-1.
652  recip(x2, z1);
653  square(y2, x2);
654  mul(x, x1, y2);
655  mul(y2, y2, x2);
656  mul(y, y1, y2);
657 
658  // Clean up.
659  clean(x1);
660  clean(y1);
661  clean(z1);
662  clean(x2);
663  clean(y2);
664  clean(z2);
665 }
666 
677 void P521::addAffine(limb_t *x1, limb_t *y1, const limb_t *x2, const limb_t *y2)
678 {
679  limb_t xout[NUM_LIMBS_521BIT];
680  limb_t yout[NUM_LIMBS_521BIT];
681  limb_t zout[NUM_LIMBS_521BIT];
682  limb_t z1[NUM_LIMBS_521BIT];
683 
684  // z1 = 1
685  z1[0] = 1;
686  memset(z1 + 1, 0, (NUM_LIMBS_521BIT - 1) * sizeof(limb_t));
687 
688  // Add the two points.
689  addPoint(xout, yout, zout, x1, y1, z1, x2, y2/*, z2*/);
690 
691  // Convert from Jacobian co-ordinates back into affine co-ordinates.
692  // x1 = xout * (zout^2)^-1, y1 = yout * (zout^3)^-1.
693  recip(z1, zout);
694  square(zout, z1);
695  mul(x1, xout, zout);
696  mul(zout, zout, z1);
697  mul(y1, yout, zout);
698 
699  // Clean up.
700  clean(xout);
701  clean(yout);
702  clean(zout);
703  clean(z1);
704 }
705 
715 bool P521::validate(const limb_t *x, const limb_t *y)
716 {
717  bool result;
718 
719  // If x or y is greater than or equal to 2^521 - 1, then the
720  // point is definitely not on the curve. Preserve timing by
721  // delaying the reporting of the result until later.
722  result = inRange(x);
723  result &= inRange(y);
724 
725  // We need to check that y^2 = x^3 - 3 * x + b mod 2^521 - 1.
726  limb_t t1[NUM_LIMBS_521BIT];
727  limb_t t2[NUM_LIMBS_521BIT];
728  square(t1, x);
729  mul(t1, t1, x);
730  mulLiteral(t2, x, 3);
731  sub(t1, t1, t2);
732  memcpy_P(t2, P521_b, sizeof(t2));
733  add(t1, t1, t2);
734  square(t2, y);
735  result &= secure_compare(t1, t2, sizeof(t1));
736  clean(t1);
737  clean(t2);
738  return result;
739 }
740 
749 bool P521::inRange(const limb_t *x)
750 {
751  // Do a trial subtraction of 2^521 - 1 from x, which is equivalent
752  // to adding 1 and subtracting 2^521. We only need the carry.
753  dlimb_t carry = 1;
754  limb_t word = 0;
755  for (uint8_t index = 0; index < NUM_LIMBS_521BIT; ++index) {
756  carry += *x++;
757  word = (limb_t)carry;
758  carry >>= LIMB_BITS;
759  }
760 
761  // Determine the carry out from the low 521 bits.
762 #if BIGNUMBER_LIMB_8BIT
763  carry = (carry << 7) + (word >> 1);
764 #else
765  carry = (carry << (LIMB_BITS - 9)) + (word >> 9);
766 #endif
767 
768  // If the carry is zero, then x was in range. Otherwise it is out
769  // of range. Check for zero in a way that preserves constant timing.
770  word = (limb_t)(carry | (carry >> LIMB_BITS));
771  word = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - word) >> LIMB_BITS);
772  return (bool)word;
773 }
774 
784 void P521::reduce(limb_t *result, const limb_t *x)
785 {
786 #if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT
787  // According to NIST FIPS 186-4, we add the high 521 bits to the
788  // low 521 bits and then do a trial subtraction of 2^521 - 1.
789  // We do both in a single step. Subtracting 2^521 - 1 is equivalent
790  // to adding 1 and subtracting 2^521.
791  uint8_t index;
792  const limb_t *xl = x;
793  const limb_t *xh = x + NUM_LIMBS_521BIT;
794  limb_t *rr = result;
795  dlimb_t carry;
796  limb_t word = x[NUM_LIMBS_521BIT - 1];
797  carry = (word >> 9) + 1;
798  word &= 0x1FF;
799  for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) {
800  carry += *xl++;
801  carry += ((dlimb_t)(*xh++)) << (LIMB_BITS - 9);
802  *rr++ = (limb_t)carry;
803  carry >>= LIMB_BITS;
804  }
805  carry += word;
806  carry += ((dlimb_t)(x[NUM_LIMBS_1042BIT - 1])) << (LIMB_BITS - 9);
807  word = (limb_t)carry;
808  *rr = word;
809 
810  // If the carry out was 1, then mask it off and we have the answer.
811  // If the carry out was 0, then we need to add 2^521 - 1 back again.
812  // To preserve the timing we perform a conditional subtract of 1 and
813  // then mask off the high bits.
814  carry = ((word >> 9) ^ 0x01) & 0x01;
815  rr = result;
816  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
817  carry = ((dlimb_t)(*rr)) - carry;
818  *rr++ = (limb_t)carry;
819  carry = (carry >> LIMB_BITS) & 0x01;
820  }
821  *(--rr) &= 0x1FF;
822 #elif BIGNUMBER_LIMB_8BIT
823  // Same as above, but for 8-bit limbs.
824  uint8_t index;
825  const limb_t *xl = x;
826  const limb_t *xh = x + NUM_LIMBS_521BIT;
827  limb_t *rr = result;
828  dlimb_t carry;
829  limb_t word = x[NUM_LIMBS_521BIT - 1];
830  carry = (word >> 1) + 1;
831  word &= 0x01;
832  for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) {
833  carry += *xl++;
834  carry += ((dlimb_t)(*xh++)) << 7;
835  *rr++ = (limb_t)carry;
836  carry >>= LIMB_BITS;
837  }
838  carry += word;
839  carry += ((dlimb_t)(x[NUM_LIMBS_1042BIT - 1])) << 1;
840  word = (limb_t)carry;
841  *rr = word;
842  carry = ((word >> 1) ^ 0x01) & 0x01;
843  rr = result;
844  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
845  carry = ((dlimb_t)(*rr)) - carry;
846  *rr++ = (limb_t)carry;
847  carry = (carry >> LIMB_BITS) & 0x01;
848  }
849  *(--rr) &= 0x01;
850 #else
851  #error "Don't know how to reduce values mod 2^521 - 1"
852 #endif
853 }
854 
867 void P521::reduceQuick(limb_t *x)
868 {
869  // Perform a trial subtraction of 2^521 - 1 from x. This is
870  // equivalent to adding 1 and subtracting 2^521 - 1.
871  uint8_t index;
872  limb_t *xx = x;
873  dlimb_t carry = 1;
874  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
875  carry += *xx;
876  *xx++ = (limb_t)carry;
877  carry >>= LIMB_BITS;
878  }
879 
880  // If the carry out was 1, then mask it off and we have the answer.
881  // If the carry out was 0, then we need to add 2^521 - 1 back again.
882  // To preserve the timing we perform a conditional subtract of 1 and
883  // then mask off the high bits.
884 #if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT
885  carry = ((x[NUM_LIMBS_521BIT - 1] >> 9) ^ 0x01) & 0x01;
886  xx = x;
887  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
888  carry = ((dlimb_t)(*xx)) - carry;
889  *xx++ = (limb_t)carry;
890  carry = (carry >> LIMB_BITS) & 0x01;
891  }
892  *(--xx) &= 0x1FF;
893 #elif BIGNUMBER_LIMB_8BIT
894  carry = ((x[NUM_LIMBS_521BIT - 1] >> 1) ^ 0x01) & 0x01;
895  xx = x;
896  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
897  carry = ((dlimb_t)(*xx)) - carry;
898  *xx++ = (limb_t)carry;
899  carry = (carry >> LIMB_BITS) & 0x01;
900  }
901  *(--xx) &= 0x01;
902 #endif
903 }
904 
917 void P521::mulNoReduce(limb_t *result, const limb_t *x, const limb_t *y)
918 {
919  uint8_t i, j;
920  dlimb_t carry;
921  limb_t word;
922  const limb_t *yy;
923  limb_t *rr;
924 
925  // Multiply the lowest word of x by y.
926  carry = 0;
927  word = x[0];
928  yy = y;
929  rr = result;
930  for (i = 0; i < NUM_LIMBS_521BIT; ++i) {
931  carry += ((dlimb_t)(*yy++)) * word;
932  *rr++ = (limb_t)carry;
933  carry >>= LIMB_BITS;
934  }
935  *rr = (limb_t)carry;
936 
937  // Multiply and add the remaining words of x by y.
938  for (i = 1; i < NUM_LIMBS_521BIT; ++i) {
939  word = x[i];
940  carry = 0;
941  yy = y;
942  rr = result + i;
943  for (j = 0; j < NUM_LIMBS_521BIT; ++j) {
944  carry += ((dlimb_t)(*yy++)) * word;
945  carry += *rr;
946  *rr++ = (limb_t)carry;
947  carry >>= LIMB_BITS;
948  }
949  *rr = (limb_t)carry;
950  }
951 }
952 
963 void P521::mul(limb_t *result, const limb_t *x, const limb_t *y)
964 {
965  limb_t temp[NUM_LIMBS_1042BIT];
966  mulNoReduce(temp, x, y);
967  reduce(result, temp);
968  strict_clean(temp);
969 }
970 
990 void P521::mulLiteral(limb_t *result, const limb_t *x, limb_t y)
991 {
992  uint8_t index;
993  dlimb_t carry = 0;
994  const limb_t *xx = x;
995  limb_t *rr = result;
996 
997  // Multiply x by the literal and put it into the result array.
998  // We assume that y is small enough that overflow from the
999  // highest limb will not occur during this process.
1000  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
1001  carry += ((dlimb_t)(*xx++)) * y;
1002  *rr++ = (limb_t)carry;
1003  carry >>= LIMB_BITS;
1004  }
1005 
1006  // Reduce the value modulo 2^521 - 1. The high half is only a
1007  // single limb, so we can short-cut some of reduce() here.
1008 #if BIGNUMBER_LIMB_16BIT || BIGNUMBER_LIMB_32BIT
1009  limb_t word = result[NUM_LIMBS_521BIT - 1];
1010  carry = (word >> 9) + 1;
1011  word &= 0x1FF;
1012  rr = result;
1013  for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) {
1014  carry += *rr;
1015  *rr++ = (limb_t)carry;
1016  carry >>= LIMB_BITS;
1017  }
1018  carry += word;
1019  word = (limb_t)carry;
1020  *rr = word;
1021 
1022  // If the carry out was 1, then mask it off and we have the answer.
1023  // If the carry out was 0, then we need to add 2^521 - 1 back again.
1024  // To preserve the timing we perform a conditional subtract of 1 and
1025  // then mask off the high bits.
1026  carry = ((word >> 9) ^ 0x01) & 0x01;
1027  rr = result;
1028  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
1029  carry = ((dlimb_t)(*rr)) - carry;
1030  *rr++ = (limb_t)carry;
1031  carry = (carry >> LIMB_BITS) & 0x01;
1032  }
1033  *(--rr) &= 0x1FF;
1034 #elif BIGNUMBER_LIMB_8BIT
1035  // Same as above, but for 8-bit limbs.
1036  limb_t word = result[NUM_LIMBS_521BIT - 1];
1037  carry = (word >> 1) + 1;
1038  word &= 0x01;
1039  rr = result;
1040  for (index = 0; index < (NUM_LIMBS_521BIT - 1); ++index) {
1041  carry += *rr;
1042  *rr++ = (limb_t)carry;
1043  carry >>= LIMB_BITS;
1044  }
1045  carry += word;
1046  word = (limb_t)carry;
1047  *rr = word;
1048  carry = ((word >> 1) ^ 0x01) & 0x01;
1049  rr = result;
1050  for (index = 0; index < NUM_LIMBS_521BIT; ++index) {
1051  carry = ((dlimb_t)(*rr)) - carry;
1052  *rr++ = (limb_t)carry;
1053  carry = (carry >> LIMB_BITS) & 0x01;
1054  }
1055  *(--rr) &= 0x01;
1056 #endif
1057 }
1058 
1069 void P521::add(limb_t *result, const limb_t *x, const limb_t *y)
1070 {
1071  dlimb_t carry = 0;
1072  limb_t *rr = result;
1073  for (uint8_t posn = 0; posn < NUM_LIMBS_521BIT; ++posn) {
1074  carry += *x++;
1075  carry += *y++;
1076  *rr++ = (limb_t)carry;
1077  carry >>= LIMB_BITS;
1078  }
1079  reduceQuick(result);
1080 }
1081 
1092 void P521::sub(limb_t *result, const limb_t *x, const limb_t *y)
1093 {
1094  dlimb_t borrow;
1095  uint8_t posn;
1096  limb_t *rr = result;
1097 
1098  // Subtract y from x to generate the intermediate result.
1099  borrow = 0;
1100  for (posn = 0; posn < NUM_LIMBS_521BIT; ++posn) {
1101  borrow = ((dlimb_t)(*x++)) - (*y++) - ((borrow >> LIMB_BITS) & 0x01);
1102  *rr++ = (limb_t)borrow;
1103  }
1104 
1105  // If we had a borrow, then the result has gone negative and we
1106  // have to add 2^521 - 1 to the result to make it positive again.
1107  // The top bits of "borrow" will be all 1's if there is a borrow
1108  // or it will be all 0's if there was no borrow. Easiest is to
1109  // conditionally subtract 1 and then mask off the high bits.
1110  rr = result;
1111  borrow = (borrow >> LIMB_BITS) & 1U;
1112  borrow = ((dlimb_t)(*rr)) - borrow;
1113  *rr++ = (limb_t)borrow;
1114  for (posn = 1; posn < NUM_LIMBS_521BIT; ++posn) {
1115  borrow = ((dlimb_t)(*rr)) - ((borrow >> LIMB_BITS) & 0x01);
1116  *rr++ = (limb_t)borrow;
1117  }
1118 #if BIGNUMBER_LIMB_8BIT
1119  *(--rr) &= 0x01;
1120 #else
1121  *(--rr) &= 0x1FF;
1122 #endif
1123 }
1124 
1140 void P521::dblPoint(limb_t *xout, limb_t *yout, limb_t *zout,
1141  const limb_t *xin, const limb_t *yin,
1142  const limb_t *zin)
1143 {
1144  limb_t alpha[NUM_LIMBS_521BIT];
1145  limb_t beta[NUM_LIMBS_521BIT];
1146  limb_t gamma[NUM_LIMBS_521BIT];
1147  limb_t delta[NUM_LIMBS_521BIT];
1148  limb_t tmp[NUM_LIMBS_521BIT];
1149 
1150  // Double the point. If it is the point at infinity (z = 0),
1151  // then zout will still be zero at the end of this process so
1152  // we don't need any special handling for that case.
1153  square(delta, zin); // delta = z^2
1154  square(gamma, yin); // gamma = y^2
1155  mul(beta, xin, gamma); // beta = x * gamma
1156  sub(tmp, xin, delta); // alpha = 3 * (x - delta) * (x + delta)
1157  mulLiteral(alpha, tmp, 3);
1158  add(tmp, xin, delta);
1159  mul(alpha, alpha, tmp);
1160  square(xout, alpha); // xout = alpha^2 - 8 * beta
1161  mulLiteral(tmp, beta, 8);
1162  sub(xout, xout, tmp);
1163  add(zout, yin, zin); // zout = (y + z)^2 - gamma - delta
1164  square(zout, zout);
1165  sub(zout, zout, gamma);
1166  sub(zout, zout, delta);
1167  mulLiteral(yout, beta, 4);// yout = alpha * (4 * beta - xout) - 8 * gamma^2
1168  sub(yout, yout, xout);
1169  mul(yout, alpha, yout);
1170  square(gamma, gamma);
1171  mulLiteral(gamma, gamma, 8);
1172  sub(yout, yout, gamma);
1173 
1174  // Clean up.
1175  strict_clean(alpha);
1176  strict_clean(beta);
1177  strict_clean(gamma);
1178  strict_clean(delta);
1179  strict_clean(tmp);
1180 }
1181 
1201 void P521::addPoint(limb_t *xout, limb_t *yout, limb_t *zout,
1202  const limb_t *x1, const limb_t *y1,
1203  const limb_t *z1, const limb_t *x2,
1204  const limb_t *y2)
1205 {
1206  limb_t z1z1[NUM_LIMBS_521BIT];
1207  limb_t u2[NUM_LIMBS_521BIT];
1208  limb_t s2[NUM_LIMBS_521BIT];
1209  limb_t h[NUM_LIMBS_521BIT];
1210  limb_t i[NUM_LIMBS_521BIT];
1211  limb_t j[NUM_LIMBS_521BIT];
1212  limb_t r[NUM_LIMBS_521BIT];
1213  limb_t v[NUM_LIMBS_521BIT];
1214 
1215  // Determine if the first value is the point-at-infinity identity element.
1216  // The second z value is always 1 so it cannot be the point-at-infinity.
1217  limb_t p1IsIdentity = BigNumberUtil::isZero(z1, NUM_LIMBS_521BIT);
1218 
1219  // Multiply the points, assuming that z2 = 1.
1220  square(z1z1, z1); // z1z1 = z1^2
1221  mul(u2, x2, z1z1); // u2 = x2 * z1z1
1222  mul(s2, y2, z1); // s2 = y2 * z1 * z1z1
1223  mul(s2, s2, z1z1);
1224  sub(h, u2, x1); // h = u2 - x1
1225  mulLiteral(i, h, 2); // i = (2 * h)^2
1226  square(i, i);
1227  sub(r, s2, y1); // r = 2 * (s2 - y1)
1228  add(r, r, r);
1229  mul(j, h, i); // j = h * i
1230  mul(v, x1, i); // v = x1 * i
1231  square(xout, r); // xout = r^2 - j - 2 * v
1232  sub(xout, xout, j);
1233  sub(xout, xout, v);
1234  sub(xout, xout, v);
1235  sub(yout, v, xout); // yout = r * (v - xout) - 2 * y1 * j
1236  mul(yout, r, yout);
1237  mul(j, y1, j);
1238  sub(yout, yout, j);
1239  sub(yout, yout, j);
1240  mul(zout, z1, h); // zout = 2 * z1 * h
1241  add(zout, zout, zout);
1242 
1243  // Select the answer to return. If (x1, y1, z1) was the identity,
1244  // then the answer is (x2, y2, z2). Otherwise it is (xout, yout, zout).
1245  // Conditionally move the second argument over the output if necessary.
1246  cmove(p1IsIdentity, xout, x2);
1247  cmove(p1IsIdentity, yout, y2);
1248  cmove1(p1IsIdentity, zout); // z2 = 1
1249 
1250  // Clean up.
1251  strict_clean(z1z1);
1252  strict_clean(u2);
1253  strict_clean(s2);
1254  strict_clean(h);
1255  strict_clean(i);
1256  strict_clean(j);
1257  strict_clean(r);
1258  strict_clean(v);
1259 }
1260 
1273 void P521::cmove(limb_t select, limb_t *x, const limb_t *y)
1274 {
1275  uint8_t posn;
1276  limb_t dummy;
1277  limb_t sel;
1278 
1279  // Turn "select" into an all-zeroes or all-ones mask. We don't care
1280  // which bit or bits is set in the original "select" value.
1281  sel = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - select) >> LIMB_BITS);
1282  --sel;
1283 
1284  // Move y into x based on "select".
1285  for (posn = 0; posn < NUM_LIMBS_521BIT; ++posn) {
1286  dummy = sel & (*x ^ *y++);
1287  *x++ ^= dummy;
1288  }
1289 }
1290 
1302 void P521::cmove1(limb_t select, limb_t *x)
1303 {
1304  uint8_t posn;
1305  limb_t dummy;
1306  limb_t sel;
1307 
1308  // Turn "select" into an all-zeroes or all-ones mask. We don't care
1309  // which bit or bits is set in the original "select" value.
1310  sel = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - select) >> LIMB_BITS);
1311  --sel;
1312 
1313  // Move 1 into x based on "select".
1314  dummy = sel & (*x ^ 1);
1315  *x++ ^= dummy;
1316  for (posn = 1; posn < NUM_LIMBS_521BIT; ++posn) {
1317  dummy = sel & *x;
1318  *x++ ^= dummy;
1319  }
1320 }
1321 
1330 void P521::recip(limb_t *result, const limb_t *x)
1331 {
1332  limb_t t1[NUM_LIMBS_521BIT];
1333 
1334  // The reciprocal is the same as x ^ (p - 2) where p = 2^521 - 1.
1335  // The big-endian hexadecimal expansion of (p - 2) is:
1336  // 01FF FFFFFFF FFFFFFFF ... FFFFFFFF FFFFFFFD
1337  //
1338  // The naive implementation needs to do 2 multiplications per 1 bit and
1339  // 1 multiplication per 0 bit. We can improve upon this by creating a
1340  // pattern 1111 and then shifting and multiplying to create 11111111,
1341  // and then 1111111111111111, and so on for the top 512-bits.
1342 
1343  // Build a 4-bit pattern 1111 in the result.
1344  square(result, x);
1345  mul(result, result, x);
1346  square(result, result);
1347  mul(result, result, x);
1348  square(result, result);
1349  mul(result, result, x);
1350 
1351  // Shift and multiply by increasing powers of two. This turns
1352  // 1111 into 11111111, and then 1111111111111111, and so on.
1353  for (size_t power = 4; power <= 256; power <<= 1) {
1354  square(t1, result);
1355  for (size_t temp = 1; temp < power; ++temp)
1356  square(t1, t1);
1357  mul(result, result, t1);
1358  }
1359 
1360  // Handle the 9 lowest bits of (p - 2), 111111101, from highest to lowest.
1361  for (uint8_t index = 0; index < 7; ++index) {
1362  square(result, result);
1363  mul(result, result, x);
1364  }
1365  square(result, result);
1366  square(result, result);
1367  mul(result, result, x);
1368 
1369  // Clean up.
1370  clean(t1);
1371 }
1372 
1381 void P521::reduceQ(limb_t *result, const limb_t *r)
1382 {
1383  // Algorithm from: http://en.wikipedia.org/wiki/Barrett_reduction
1384  //
1385  // We assume that r is less than or equal to (q - 1)^2.
1386  //
1387  // We want to compute result = r mod q. Find the smallest k such
1388  // that 2^k > q. In our case, k = 521. Then set m = floor(4^k / q)
1389  // and let r = r - q * floor(m * r / 4^k). This will be the result
1390  // or it will be at most one subtraction of q away from the result.
1391  //
1392  // Note: m is a 522-bit number, which fits in the same number of limbs
1393  // as a 521-bit number assuming that limbs are 8 bits or more in size.
1394  static limb_t const numM[NUM_LIMBS_521BIT] PROGMEM = {
1395  LIMB(0x6EC79BF7), LIMB(0x449048E1), LIMB(0x7663B851), LIMB(0xC44A3647),
1396  LIMB(0x08F65A2F), LIMB(0x8033FEB7), LIMB(0x40D06994), LIMB(0xAE79787C),
1397  LIMB(0x00000005), LIMB(0x00000000), LIMB(0x00000000), LIMB(0x00000000),
1398  LIMB(0x00000000), LIMB(0x00000000), LIMB(0x00000000), LIMB(0x00000000),
1399  LIMB_PARTIAL(0x200)
1400  };
1401  limb_t temp[NUM_LIMBS_1042BIT + NUM_LIMBS_521BIT];
1402  limb_t temp2[NUM_LIMBS_521BIT];
1403 
1404  // Multiply r by m.
1405  BigNumberUtil::mul_P(temp, r, NUM_LIMBS_1042BIT, numM, NUM_LIMBS_521BIT);
1406 
1407  // Compute (m * r / 4^521) = (m * r / 2^1042).
1408 #if BIGNUMBER_LIMB_8BIT || BIGNUMBER_LIMB_16BIT
1409  dlimb_t carry = temp[NUM_LIMBS_BITS(1040)] >> 2;
1410  for (uint8_t index = 0; index < NUM_LIMBS_521BIT; ++index) {
1411  carry += ((dlimb_t)(temp[NUM_LIMBS_BITS(1040) + index + 1])) << (LIMB_BITS - 2);
1412  temp2[index] = (limb_t)carry;
1413  carry >>= LIMB_BITS;
1414  }
1415 #elif BIGNUMBER_LIMB_32BIT
1416  dlimb_t carry = temp[NUM_LIMBS_BITS(1024)] >> 18;
1417  for (uint8_t index = 0; index < NUM_LIMBS_521BIT; ++index) {
1418  carry += ((dlimb_t)(temp[NUM_LIMBS_BITS(1024) + index + 1])) << 14;
1419  temp2[index] = (limb_t)carry;
1420  carry >>= LIMB_BITS;
1421  }
1422 #endif
1423 
1424  // Multiply (m * r) / 2^1042 by q and subtract it from r.
1425  // We can ignore the high words of the subtraction result
1426  // because they will all turn into zero after the subtraction.
1427  BigNumberUtil::mul_P(temp, temp2, NUM_LIMBS_521BIT,
1428  P521_q, NUM_LIMBS_521BIT);
1429  BigNumberUtil::sub(result, r, temp, NUM_LIMBS_521BIT);
1430 
1431  // Perform a trial subtraction of q from the result to reduce it.
1432  BigNumberUtil::reduceQuick_P(result, result, P521_q, NUM_LIMBS_521BIT);
1433 
1434  // Clean up and exit.
1435  clean(temp);
1436  clean(temp2);
1437 }
1438 
1449 void P521::mulQ(limb_t *result, const limb_t *x, const limb_t *y)
1450 {
1451  limb_t temp[NUM_LIMBS_1042BIT];
1452  mulNoReduce(temp, x, y);
1453  reduceQ(result, temp);
1454  strict_clean(temp);
1455 }
1456 
1465 void P521::recipQ(limb_t *result, const limb_t *x)
1466 {
1467  // Bottom 265 bits of q - 2. The top 256 bits are all-1's.
1468  static limb_t const P521_q_m2[] PROGMEM = {
1469  LIMB(0x91386407), LIMB(0xbb6fb71e), LIMB(0x899c47ae), LIMB(0x3bb5c9b8),
1470  LIMB(0xf709a5d0), LIMB(0x7fcc0148), LIMB(0xbf2f966b), LIMB(0x51868783),
1471  LIMB_PARTIAL(0x1fa)
1472  };
1473 
1474  // Raise x to the power of q - 2, mod q. We start with the top
1475  // 256 bits which are all-1's, using a similar technique to recip().
1476  limb_t t1[NUM_LIMBS_521BIT];
1477  mulQ(result, x, x);
1478  mulQ(result, result, x);
1479  mulQ(result, result, result);
1480  mulQ(result, result, x);
1481  mulQ(result, result, result);
1482  mulQ(result, result, x);
1483  for (size_t power = 4; power <= 128; power <<= 1) {
1484  mulQ(t1, result, result);
1485  for (size_t temp = 1; temp < power; ++temp)
1486  mulQ(t1, t1, t1);
1487  mulQ(result, result, t1);
1488  }
1489  clean(t1);
1490 
1491  // Deal with the bottom 265 bits from highest to lowest. Square for
1492  // each bit and multiply in x whenever there is a 1 bit. The timing
1493  // is based on the publicly-known constant q - 2, not on the value of x.
1494  size_t bit = 265;
1495  while (bit > 0) {
1496  --bit;
1497  mulQ(result, result, result);
1498  if (pgm_read_limb(&(P521_q_m2[bit / LIMB_BITS])) &
1499  (((limb_t)1) << (bit % LIMB_BITS))) {
1500  mulQ(result, result, x);
1501  }
1502  }
1503 }
1504 
1515 void P521::generateK(uint8_t k[66], const uint8_t hm[66],
1516  const uint8_t x[66], Hash *hash, uint64_t count)
1517 {
1518  size_t hlen = hash->hashSize();
1519  uint8_t V[64];
1520  uint8_t K[64];
1521  uint8_t marker;
1522 
1523  // If for some reason a hash function was supplied with more than
1524  // 512 bits of output, truncate hash values to the first 512 bits.
1525  // We cannot support more than this yet.
1526  if (hlen > 64)
1527  hlen = 64;
1528 
1529  // RFC 6979, Section 3.2, Step a. Hash the message, reduce modulo q,
1530  // and produce an octet string the same length as q, bits2octets(H(m)).
1531  // We support hashes up to 512 bits and q is a 521-bit number, so "hm"
1532  // is already the bits2octets(H(m)) value that we need.
1533 
1534  // Steps b and c. Set V to all-ones and K to all-zeroes.
1535  memset(V, 0x01, hlen);
1536  memset(K, 0x00, hlen);
1537 
1538  // Step d. K = HMAC_K(V || 0x00 || x || hm). We make a small
1539  // modification here to append the count value if it is non-zero.
1540  // We use this to generate a new k if we have to re-enter this
1541  // function because the previous one was rejected by sign().
1542  // This is slightly different to RFC 6979 which says that the
1543  // loop in step h below should be continued. That code path is
1544  // difficult to access, so instead modify K and V in steps d and f.
1545  // This alternative construction is compatible with the second
1546  // variant described in section 3.6 of RFC 6979.
1547  hash->resetHMAC(K, hlen);
1548  hash->update(V, hlen);
1549  marker = 0x00;
1550  hash->update(&marker, 1);
1551  hash->update(x, 66);
1552  hash->update(hm, 66);
1553  if (count)
1554  hash->update(&count, sizeof(count));
1555  hash->finalizeHMAC(K, hlen, K, hlen);
1556 
1557  // Step e. V = HMAC_K(V)
1558  hash->resetHMAC(K, hlen);
1559  hash->update(V, hlen);
1560  hash->finalizeHMAC(K, hlen, V, hlen);
1561 
1562  // Step f. K = HMAC_K(V || 0x01 || x || hm)
1563  hash->resetHMAC(K, hlen);
1564  hash->update(V, hlen);
1565  marker = 0x01;
1566  hash->update(&marker, 1);
1567  hash->update(x, 66);
1568  hash->update(hm, 66);
1569  if (count)
1570  hash->update(&count, sizeof(count));
1571  hash->finalizeHMAC(K, hlen, K, hlen);
1572 
1573  // Step g. V = HMAC_K(V)
1574  hash->resetHMAC(K, hlen);
1575  hash->update(V, hlen);
1576  hash->finalizeHMAC(K, hlen, V, hlen);
1577 
1578  // Step h. Generate candidate k values until we find what we want.
1579  for (;;) {
1580  // Step h.1 and h.2. Generate a string of 66 bytes in length.
1581  // T = empty
1582  // while (len(T) < 66)
1583  // V = HMAC_K(V)
1584  // T = T || V
1585  size_t posn = 0;
1586  while (posn < 66) {
1587  size_t temp = 66 - posn;
1588  if (temp > hlen)
1589  temp = hlen;
1590  hash->resetHMAC(K, hlen);
1591  hash->update(V, hlen);
1592  hash->finalizeHMAC(K, hlen, V, hlen);
1593  memcpy(k + posn, V, temp);
1594  posn += temp;
1595  }
1596 
1597  // Step h.3. k = bits2int(T) and exit the loop if k is not in
1598  // the range 1 to q - 1. Note: We have to extract the 521 most
1599  // significant bits of T, which means shifting it right by seven
1600  // bits to put it into the correct form.
1601  for (posn = 65; posn > 0; --posn)
1602  k[posn] = (k[posn - 1] << 1) | (k[posn] >> 7);
1603  k[0] >>= 7;
1604  if (isValidPrivateKey(k))
1605  break;
1606 
1607  // Generate new K and V values and try again.
1608  // K = HMAC_K(V || 0x00)
1609  // V = HMAC_K(V)
1610  hash->resetHMAC(K, hlen);
1611  hash->update(V, hlen);
1612  marker = 0x00;
1613  hash->update(&marker, 1);
1614  hash->finalizeHMAC(K, hlen, K, hlen);
1615  hash->resetHMAC(K, hlen);
1616  hash->update(V, hlen);
1617  hash->finalizeHMAC(K, hlen, V, hlen);
1618  }
1619 
1620  // Clean up.
1621  clean(V);
1622  clean(K);
1623 }
1624 
1637 void P521::generateK(uint8_t k[66], const uint8_t hm[66],
1638  const uint8_t x[66], uint64_t count)
1639 {
1640  SHA512 hash;
1641  generateK(k, hm, x, &hash, count);
1642 }
static void reduceQuick_P(limb_t *result, const limb_t *x, const limb_t *y, size_t size)
Reduces x modulo y using subtraction where y is in program memory.
static bool eval(uint8_t result[132], const uint8_t f[66], const uint8_t point[132])
Evaluates the curve function.
Definition: P521.cpp:136
static limb_t add(limb_t *result, const limb_t *x, const limb_t *y, size_t size)
Adds two big numbers.
static void generatePrivateKey(uint8_t privateKey[66])
Generates a private key for P-521 signing operations.
Definition: P521.cpp:467
static limb_t sub_P(limb_t *result, const limb_t *x, const limb_t *y, size_t size)
Subtracts one big number from another where one is in program memory.
void rand(uint8_t *data, size_t len)
Generates random bytes into a caller-supplied buffer.
Definition: RNG.cpp:508
static bool dh2(const uint8_t k[132], uint8_t f[66])
Performs phase 2 of an ECDH key exchange using P-521.
Definition: P521.cpp:230
Abstract base class for cryptographic hash algorithms.
Definition: Hash.h:29
virtual void finalizeHMAC(const void *key, size_t keyLen, void *hash, size_t hashLen)=0
Finalizes the HMAC hashing process and returns the hash.
static bool isValidPrivateKey(const uint8_t privateKey[66])
Validates a private key value to ensure that it is between 1 and q - 1.
Definition: P521.cpp:525
SHA-512 hash algorithm.
Definition: SHA512.h:30
static void derivePublicKey(uint8_t publicKey[132], const uint8_t privateKey[66])
Derives the public key from a private key for P-521 signing operations.
Definition: P521.cpp:498
static void sign(uint8_t signature[132], const uint8_t privateKey[66], const void *message, size_t len, Hash *hash=0)
Signs a message using a specific P-521 private key.
Definition: P521.cpp:277
static limb_t sub(limb_t *result, const limb_t *x, const limb_t *y, size_t size)
Subtracts one big number from another.
virtual void reset()=0
Resets the hash ready for a new hashing process.
static void dh1(uint8_t k[132], uint8_t f[66])
Performs phase 1 of an ECDH key exchange using P-521.
Definition: P521.cpp:209
static void mul_P(limb_t *result, const limb_t *x, size_t xcount, const limb_t *y, size_t ycount)
Multiplies two big numbers where one is in program memory.
static void packBE(uint8_t *bytes, size_t len, const limb_t *limbs, size_t count)
Packs the big-endian byte representation of a big number into a byte array.
virtual void resetHMAC(const void *key, size_t keyLen)=0
Resets the hash ready for a new HMAC hashing process.
static bool verify(const uint8_t signature[132], const uint8_t publicKey[132], const void *message, size_t len, Hash *hash=0)
Verifies a signature using a specific P-521 public key.
Definition: P521.cpp:374
static bool isValidPublicKey(const uint8_t publicKey[132])
Validates a public key to ensure that it is a valid curve point.
Definition: P521.cpp:565
static void unpackBE(limb_t *limbs, size_t count, const uint8_t *bytes, size_t len)
Unpacks the big-endian byte representation of a big number into a limb array.
virtual size_t hashSize() const =0
Size of the hash result from finalize().
virtual void update(const void *data, size_t len)=0
Updates the hash with more data.
virtual void finalize(void *hash, size_t len)=0
Finalizes the hashing process and returns the hash.
static limb_t isZero(const limb_t *x, size_t size)
Determine if a big number is zero.