ArduinoLibs
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Groups Pages
Curve25519.cpp
1 /*
2  * Copyright (C) 2015 Southern Storm Software, Pty Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "Curve25519.h"
24 #include "Crypto.h"
25 #include "RNG.h"
26 #include "utility/LimbUtil.h"
27 #include <string.h>
28 
44 // The overhead of clean() calls in mul(), reduceQuick(), etc can
45 // add up to a lot of processing time during eval(). Only do such
46 // cleanups if strict mode has been enabled. Other implementations
47 // like curve25519-donna don't do any cleaning at all so the value
48 // of cleaning up the stack is dubious at best anyway.
49 #if defined(CURVE25519_STRICT_CLEAN)
50 #define strict_clean(x) clean(x)
51 #else
52 #define strict_clean(x) do { ; } while (0)
53 #endif
54 
74 bool Curve25519::eval(uint8_t result[32], const uint8_t s[32], const uint8_t x[32])
75 {
76  limb_t x_1[NUM_LIMBS_256BIT];
77  limb_t x_2[NUM_LIMBS_256BIT];
78  limb_t x_3[NUM_LIMBS_256BIT];
79  limb_t z_2[NUM_LIMBS_256BIT];
80  limb_t z_3[NUM_LIMBS_256BIT];
81  limb_t A[NUM_LIMBS_256BIT];
82  limb_t B[NUM_LIMBS_256BIT];
83  limb_t C[NUM_LIMBS_256BIT];
84  limb_t D[NUM_LIMBS_256BIT];
85  limb_t E[NUM_LIMBS_256BIT];
86  limb_t AA[NUM_LIMBS_256BIT];
87  limb_t BB[NUM_LIMBS_256BIT];
88  limb_t DA[NUM_LIMBS_256BIT];
89  limb_t CB[NUM_LIMBS_256BIT];
90  uint8_t mask;
91  uint8_t sposn;
92  uint8_t select;
93  uint8_t swap;
94  bool retval;
95 
96  // Unpack the "x" argument into the limb representation
97  // which also masks off the high bit. NULL means 9.
98  if (x) {
99  // x1 = x
100  BigNumberUtil::unpackLE(x_1, NUM_LIMBS_256BIT, x, 32);
101  x_1[NUM_LIMBS_256BIT - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1);
102  } else {
103  memset(x_1, 0, sizeof(x_1)); // x_1 = 9
104  x_1[0] = 9;
105  }
106 
107  // Check that "x" is within the range of the modulo field.
108  // We can do this with a reduction - if there was no borrow
109  // then the value of "x" was out of range. Timing is sensitive
110  // here so that we don't reveal anything about the value of "x".
111  // If there was a reduction, then continue executing the rest
112  // of this function with the (now) in-range "x" value and
113  // report the failure at the end.
114  retval = (bool)(reduceQuick(x_1) & 0x01);
115 
116  // Initialize the other temporary variables.
117  memset(x_2, 0, sizeof(x_2)); // x_2 = 1
118  x_2[0] = 1;
119  memset(z_2, 0, sizeof(z_2)); // z_2 = 0
120  memcpy(x_3, x_1, sizeof(x_1)); // x_3 = x
121  memcpy(z_3, x_2, sizeof(x_2)); // z_3 = 1
122 
123  // Iterate over all 255 bits of "s" from the highest to the lowest.
124  // We ignore the high bit of the 256-bit representation of "s".
125  mask = 0x40;
126  sposn = 31;
127  swap = 0;
128  for (uint8_t t = 255; t > 0; --t) {
129  // Conditional swaps on entry to this bit but only if we
130  // didn't swap on the previous bit.
131  select = s[sposn] & mask;
132  swap ^= select;
133  cswap(swap, x_2, x_3);
134  cswap(swap, z_2, z_3);
135 
136  // Evaluate the curve.
137  add(A, x_2, z_2); // A = x_2 + z_2
138  square(AA, A); // AA = A^2
139  sub(B, x_2, z_2); // B = x_2 - z_2
140  square(BB, B); // BB = B^2
141  sub(E, AA, BB); // E = AA - BB
142  add(C, x_3, z_3); // C = x_3 + z_3
143  sub(D, x_3, z_3); // D = x_3 - z_3
144  mul(DA, D, A); // DA = D * A
145  mul(CB, C, B); // CB = C * B
146  add(x_3, DA, CB); // x_3 = (DA + CB)^2
147  square(x_3, x_3);
148  sub(z_3, DA, CB); // z_3 = x_1 * (DA - CB)^2
149  square(z_3, z_3);
150  mul(z_3, z_3, x_1);
151  mul(x_2, AA, BB); // x_2 = AA * BB
152  mulA24(z_2, E); // z_2 = E * (AA + a24 * E)
153  add(z_2, z_2, AA);
154  mul(z_2, z_2, E);
155 
156  // Move onto the next lower bit of "s".
157  mask >>= 1;
158  if (!mask) {
159  --sposn;
160  mask = 0x80;
161  swap = select << 7;
162  } else {
163  swap = select >> 1;
164  }
165  }
166 
167  // Final conditional swaps.
168  cswap(swap, x_2, x_3);
169  cswap(swap, z_2, z_3);
170 
171  // Compute x_2 * (z_2 ^ (p - 2)) where p = 2^255 - 19.
172  recip(z_3, z_2);
173  mul(x_2, x_2, z_3);
174 
175  // Pack the result into the return array.
176  BigNumberUtil::packLE(result, 32, x_2, NUM_LIMBS_256BIT);
177 
178  // Clean up and exit.
179  clean(x_1);
180  clean(x_2);
181  clean(x_3);
182  clean(z_2);
183  clean(z_3);
184  clean(A);
185  clean(B);
186  clean(C);
187  clean(D);
188  clean(E);
189  clean(AA);
190  clean(BB);
191  clean(DA);
192  clean(CB);
193  return retval;
194 }
195 
239 void Curve25519::dh1(uint8_t k[32], uint8_t f[32])
240 {
241  do {
242  // Generate a random "f" value and then adjust the value to make
243  // it valid as an "s" value for eval(). According to the specification
244  // we need to mask off the 3 right-most bits of f[0], mask off the
245  // left-most bit of f[31], and set the second to left-most bit of f[31].
246  RNG.rand(f, 32);
247  f[0] &= 0xF8;
248  f[31] = (f[31] & 0x7F) | 0x40;
249 
250  // Evaluate the curve function: k = Curve25519::eval(f, 9).
251  // We pass NULL to eval() to indicate the value 9. There is no
252  // need to check the return value from eval() because we know
253  // that 9 is a valid field element.
254  eval(k, f, 0);
255 
256  // If "k" is weak for contributory behaviour then reject it,
257  // generate another "f" value, and try again. This case is
258  // highly unlikely but we still perform the check just in case.
259  } while (isWeakPoint(k));
260 }
261 
277 bool Curve25519::dh2(uint8_t k[32], uint8_t f[32])
278 {
279  uint8_t weak;
280 
281  // Evaluate the curve function: k = Curve25519::eval(f, k).
282  // If "k" is weak for contributory behaviour before or after
283  // the curve evaluation, then fail the exchange. For safety
284  // we perform every phase of the weak checks even if we could
285  // bail out earlier so that the execution takes the same
286  // amount of time for weak and non-weak "k" values.
287  weak = isWeakPoint(k); // Is "k" weak before?
288  weak |= ((eval(k, f, k) ^ 0x01) & 0x01); // Is "k" weak during?
289  weak |= isWeakPoint(k); // Is "k" weak after?
290  clean(f, 32);
291  return (bool)((weak ^ 0x01) & 0x01);
292 }
293 
301 uint8_t Curve25519::isWeakPoint(const uint8_t k[32])
302 {
303  // List of weak points from http://cr.yp.to/ecdh.html
304  // That page lists some others but they are variants on these
305  // of the form "point + i * (2^255 - 19)" for i = 0, 1, 2.
306  // Here we mask off the high bit and eval() catches the rest.
307  static const uint8_t points[5][32] PROGMEM = {
308  {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
309  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
310  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
311  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
312  {0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
313  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
314  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
315  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
316  {0xE0, 0xEB, 0x7A, 0x7C, 0x3B, 0x41, 0xB8, 0xAE,
317  0x16, 0x56, 0xE3, 0xFA, 0xF1, 0x9F, 0xC4, 0x6A,
318  0xDA, 0x09, 0x8D, 0xEB, 0x9C, 0x32, 0xB1, 0xFD,
319  0x86, 0x62, 0x05, 0x16, 0x5F, 0x49, 0xB8, 0x00},
320  {0x5F, 0x9C, 0x95, 0xBC, 0xA3, 0x50, 0x8C, 0x24,
321  0xB1, 0xD0, 0xB1, 0x55, 0x9C, 0x83, 0xEF, 0x5B,
322  0x04, 0x44, 0x5C, 0xC4, 0x58, 0x1C, 0x8E, 0x86,
323  0xD8, 0x22, 0x4E, 0xDD, 0xD0, 0x9F, 0x11, 0x57},
324  {0xEC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
325  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
326  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
327  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F}
328  };
329 
330  // Check each of the weak points in turn. We perform the
331  // comparisons carefully so as not to reveal the value of "k"
332  // in the instruction timing. If "k" is indeed weak then
333  // we still check everything so as not to reveal which
334  // weak point it is.
335  uint8_t result = 0;
336  for (uint8_t posn = 0; posn < 5; ++posn) {
337  const uint8_t *point = points[posn];
338  uint8_t check = (pgm_read_byte(point + 31) ^ k[31]) & 0x7F;
339  for (uint8_t index = 31; index > 0; --index)
340  check |= (pgm_read_byte(point + index - 1) ^ k[index - 1]);
341  result |= (uint8_t)((((uint16_t)0x0100) - check) >> 8);
342  }
343 
344  // The "result" variable will be non-zero if there was a match.
345  return result;
346 }
347 
360 void Curve25519::reduce(limb_t *result, limb_t *x, uint8_t size)
361 {
362  /*
363  Note: This explaination is best viewed with a UTF-8 text viewer.
364 
365  To help explain what this function is doing, the following describes
366  how to efficiently compute reductions modulo a base of the form (2ⁿ - b)
367  where b is greater than zero and (b + 1)² <= 2ⁿ.
368 
369  Here we are interested in reducing the result of multiplying two
370  numbers that are less than or equal to (2ⁿ - b - 1). That is,
371  multiplying numbers that have already been reduced.
372 
373  Given some x less than or equal to (2ⁿ - b - 1)², we want to find a
374  y less than (2ⁿ - b) such that:
375 
376  y ≡ x mod (2ⁿ - b)
377 
378  We know that for all integer values of k >= 0:
379 
380  y ≡ x - k * (2ⁿ - b)
381  ≡ x - k * 2ⁿ + k * b
382 
383  In our case we choose k = ⌊x / 2ⁿ⌋ and then let:
384 
385  w = (x mod 2ⁿ) + ⌊x / 2ⁿ⌋ * b
386 
387  The value w will either be the answer y or y can be obtained by
388  repeatedly subtracting (2ⁿ - b) from w until it is less than (2ⁿ - b).
389  At most b subtractions will be required.
390 
391  In our case b is 19 which is more subtractions than we would like to do,
392  but we can handle that by performing the above reduction twice and then
393  performing a single trial subtraction:
394 
395  w = (x mod 2ⁿ) + ⌊x / 2ⁿ⌋ * b
396  y = (w mod 2ⁿ) + ⌊w / 2ⁿ⌋ * b
397  if y >= (2ⁿ - b)
398  y -= (2ⁿ - b)
399 
400  The value y is the answer we want for reducing x modulo (2ⁿ - b).
401  */
402 
403  dlimb_t carry;
404  uint8_t posn;
405 
406  // Calculate (x mod 2^255) + ((x / 2^255) * 19) which will
407  // either produce the answer we want or it will produce a
408  // value of the form "answer + j * (2^255 - 19)".
409  carry = ((dlimb_t)(x[NUM_LIMBS_256BIT - 1] >> (LIMB_BITS - 1))) * 19U;
410  x[NUM_LIMBS_256BIT - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1);
411  for (posn = 0; posn < size; ++posn) {
412  carry += ((dlimb_t)(x[posn + NUM_LIMBS_256BIT])) * 38U;
413  carry += x[posn];
414  x[posn] = (limb_t)carry;
415  carry >>= LIMB_BITS;
416  }
417  if (size < NUM_LIMBS_256BIT) {
418  // The high order half of the number is short; e.g. for mulA24().
419  // Propagate the carry through the rest of the low order part.
420  for (posn = size; posn < NUM_LIMBS_256BIT; ++posn) {
421  carry += x[posn];
422  x[posn] = (limb_t)carry;
423  carry >>= LIMB_BITS;
424  }
425  }
426 
427  // The "j" value may still be too large due to the final carry-out.
428  // We must repeat the reduction. If we already have the answer,
429  // then this won't do any harm but we must still do the calculation
430  // to preserve the overall timing.
431  carry *= 38U;
432  carry += ((dlimb_t)(x[NUM_LIMBS_256BIT - 1] >> (LIMB_BITS - 1))) * 19U;
433  x[NUM_LIMBS_256BIT - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1);
434  for (posn = 0; posn < NUM_LIMBS_256BIT; ++posn) {
435  carry += x[posn];
436  x[posn] = (limb_t)carry;
437  carry >>= LIMB_BITS;
438  }
439 
440  // At this point "x" will either be the answer or it will be the
441  // answer plus (2^255 - 19). Perform a trial subtraction which
442  // is equivalent to adding 19 and subtracting 2^255. We put the
443  // trial answer into the top-most limbs of the original "x" array.
444  // We add 19 here; the subtraction of 2^255 occurs in the next step.
445  carry = 19U;
446  for (posn = 0; posn < NUM_LIMBS_256BIT; ++posn) {
447  carry += x[posn];
448  x[posn + NUM_LIMBS_256BIT] = (limb_t)carry;
449  carry >>= LIMB_BITS;
450  }
451 
452  // If there was a borrow, then the bottom-most limbs of "x" are the
453  // correct answer. If there was no borrow, then the top-most limbs
454  // of "x" are the correct answer. Select the correct answer but do
455  // it in a way that instruction timing will not reveal which value
456  // was selected. Borrow will occur if the high bit of the previous
457  // result is 0: turn the high bit into a selection mask.
458  limb_t mask = (limb_t)(((slimb_t)(x[NUM_LIMBS_512BIT - 1])) >> (LIMB_BITS - 1));
459  limb_t nmask = ~mask;
460  x[NUM_LIMBS_512BIT - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1);
461  for (posn = 0; posn < NUM_LIMBS_256BIT; ++posn) {
462  result[posn] = (x[posn] & nmask) | (x[posn + NUM_LIMBS_256BIT] & mask);
463  }
464 }
465 
479 limb_t Curve25519::reduceQuick(limb_t *x)
480 {
481  limb_t temp[NUM_LIMBS_256BIT];
482  dlimb_t carry;
483  uint8_t posn;
484  limb_t *xx;
485  limb_t *tt;
486 
487  // Perform a trial subtraction of (2^255 - 19) from "x" which is
488  // equivalent to adding 19 and subtracting 2^255. We add 19 here;
489  // the subtraction of 2^255 occurs in the next step.
490  carry = 19U;
491  xx = x;
492  tt = temp;
493  for (posn = 0; posn < NUM_LIMBS_256BIT; ++posn) {
494  carry += *xx++;
495  *tt++ = (limb_t)carry;
496  carry >>= LIMB_BITS;
497  }
498 
499  // If there was a borrow, then the original "x" is the correct answer.
500  // If there was no borrow, then "temp" is the correct answer. Select the
501  // correct answer but do it in a way that instruction timing will not
502  // reveal which value was selected. Borrow will occur if the high bit
503  // of "temp" is 0: turn the high bit into a selection mask.
504  limb_t mask = (limb_t)(((slimb_t)(temp[NUM_LIMBS_256BIT - 1])) >> (LIMB_BITS - 1));
505  limb_t nmask = ~mask;
506  temp[NUM_LIMBS_256BIT - 1] &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1);
507  xx = x;
508  tt = temp;
509  for (posn = 0; posn < NUM_LIMBS_256BIT; ++posn) {
510  *xx = ((*xx) & nmask) | ((*tt++) & mask);
511  ++xx;
512  }
513 
514  // Clean up "temp".
515  strict_clean(temp);
516 
517  // Return a zero value if we actually subtracted (2^255 - 19) from "x".
518  return nmask;
519 }
520 
533 void Curve25519::mulNoReduce(limb_t *result, const limb_t *x, const limb_t *y)
534 {
535  uint8_t i, j;
536  dlimb_t carry;
537  limb_t word;
538  const limb_t *yy;
539  limb_t *rr;
540 
541  // Multiply the lowest word of x by y.
542  carry = 0;
543  word = x[0];
544  yy = y;
545  rr = result;
546  for (i = 0; i < NUM_LIMBS_256BIT; ++i) {
547  carry += ((dlimb_t)(*yy++)) * word;
548  *rr++ = (limb_t)carry;
549  carry >>= LIMB_BITS;
550  }
551  *rr = (limb_t)carry;
552 
553  // Multiply and add the remaining words of x by y.
554  for (i = 1; i < NUM_LIMBS_256BIT; ++i) {
555  word = x[i];
556  carry = 0;
557  yy = y;
558  rr = result + i;
559  for (j = 0; j < NUM_LIMBS_256BIT; ++j) {
560  carry += ((dlimb_t)(*yy++)) * word;
561  carry += *rr;
562  *rr++ = (limb_t)carry;
563  carry >>= LIMB_BITS;
564  }
565  *rr = (limb_t)carry;
566  }
567 }
568 
579 void Curve25519::mul(limb_t *result, const limb_t *x, const limb_t *y)
580 {
581  limb_t temp[NUM_LIMBS_512BIT];
582  mulNoReduce(temp, x, y);
583  reduce(result, temp, NUM_LIMBS_256BIT);
584  strict_clean(temp);
585 }
586 
606 void Curve25519::mulA24(limb_t *result, const limb_t *x)
607 {
608  // The constant a24 = 121665 (0x1DB41) as a limb array.
609 #if BIGNUMBER_LIMB_8BIT
610  static limb_t const a24[3] PROGMEM = {0x41, 0xDB, 0x01};
611 #elif BIGNUMBER_LIMB_16BIT
612  static limb_t const a24[2] PROGMEM = {0xDB41, 0x0001};
613 #elif BIGNUMBER_LIMB_32BIT
614  static limb_t const a24[1] PROGMEM = {0x0001DB41};
615 #else
616  #error "limb_t must be 8, 16, or 32 bits in size"
617 #endif
618  #define NUM_A24_LIMBS (sizeof(a24) / sizeof(limb_t))
619 
620  // Multiply the lowest limb of a24 by x and zero-extend into the result.
621  limb_t temp[NUM_LIMBS_512BIT];
622  uint8_t i, j;
623  dlimb_t carry = 0;
624  limb_t word = pgm_read_limb(&(a24[0]));
625  const limb_t *xx = x;
626  limb_t *tt = temp;
627  for (i = 0; i < NUM_LIMBS_256BIT; ++i) {
628  carry += ((dlimb_t)(*xx++)) * word;
629  *tt++ = (limb_t)carry;
630  carry >>= LIMB_BITS;
631  }
632  *tt = (limb_t)carry;
633 
634  // Multiply and add the remaining limbs of a24.
635  for (i = 1; i < NUM_A24_LIMBS; ++i) {
636  word = pgm_read_limb(&(a24[i]));
637  carry = 0;
638  xx = x;
639  tt = temp + i;
640  for (j = 0; j < NUM_LIMBS_256BIT; ++j) {
641  carry += ((dlimb_t)(*xx++)) * word;
642  carry += *tt;
643  *tt++ = (limb_t)carry;
644  carry >>= LIMB_BITS;
645  }
646  *tt = (limb_t)carry;
647  }
648 
649  // Reduce the intermediate result modulo 2^255 - 19.
650  reduce(result, temp, NUM_A24_LIMBS);
651  strict_clean(temp);
652 }
653 
665 void Curve25519::mul_P(limb_t *result, const limb_t *x, const limb_t *y)
666 {
667  limb_t temp[NUM_LIMBS_512BIT];
668  uint8_t i, j;
669  dlimb_t carry;
670  limb_t word;
671  const limb_t *yy;
672  limb_t *tt;
673 
674  // Multiply the lowest word of x by y.
675  carry = 0;
676  word = x[0];
677  yy = y;
678  tt = temp;
679  for (i = 0; i < NUM_LIMBS_256BIT; ++i) {
680  carry += ((dlimb_t)(pgm_read_limb(yy))) * word;
681  *tt++ = (limb_t)carry;
682  carry >>= LIMB_BITS;
683  ++yy;
684  }
685  *tt = (limb_t)carry;
686 
687  // Multiply and add the remaining words of x by y.
688  for (i = 1; i < NUM_LIMBS_256BIT; ++i) {
689  word = x[i];
690  carry = 0;
691  yy = y;
692  tt = temp + i;
693  for (j = 0; j < NUM_LIMBS_256BIT; ++j) {
694  carry += ((dlimb_t)(pgm_read_limb(yy))) * word;
695  carry += *tt;
696  *tt++ = (limb_t)carry;
697  carry >>= LIMB_BITS;
698  ++yy;
699  }
700  *tt = (limb_t)carry;
701  }
702 
703  // Reduce the intermediate result modulo 2^255 - 19.
704  reduce(result, temp, NUM_LIMBS_256BIT);
705  strict_clean(temp);
706 }
707 
718 void Curve25519::add(limb_t *result, const limb_t *x, const limb_t *y)
719 {
720  dlimb_t carry = 0;
721  uint8_t posn;
722  limb_t *rr = result;
723 
724  // Add the two arrays to obtain the intermediate result.
725  for (posn = 0; posn < NUM_LIMBS_256BIT; ++posn) {
726  carry += *x++;
727  carry += *y++;
728  *rr++ = (limb_t)carry;
729  carry >>= LIMB_BITS;
730  }
731 
732  // Reduce the result using the quick trial subtraction method.
733  reduceQuick(result);
734 }
735 
746 void Curve25519::sub(limb_t *result, const limb_t *x, const limb_t *y)
747 {
748  dlimb_t borrow;
749  uint8_t posn;
750  limb_t *rr = result;
751 
752  // Subtract y from x to generate the intermediate result.
753  borrow = 0;
754  for (posn = 0; posn < NUM_LIMBS_256BIT; ++posn) {
755  borrow = ((dlimb_t)(*x++)) - (*y++) - ((borrow >> LIMB_BITS) & 0x01);
756  *rr++ = (limb_t)borrow;
757  }
758 
759  // If we had a borrow, then the result has gone negative and we
760  // have to add 2^255 - 19 to the result to make it positive again.
761  // The top bits of "borrow" will be all 1's if there is a borrow
762  // or it will be all 0's if there was no borrow. Easiest is to
763  // conditionally subtract 19 and then mask off the high bit.
764  rr = result;
765  borrow = (borrow >> LIMB_BITS) & 19U;
766  borrow = ((dlimb_t)(*rr)) - borrow;
767  *rr++ = (limb_t)borrow;
768  for (posn = 1; posn < NUM_LIMBS_256BIT; ++posn) {
769  borrow = ((dlimb_t)(*rr)) - ((borrow >> LIMB_BITS) & 0x01);
770  *rr++ = (limb_t)borrow;
771  }
772  *(--rr) &= ((((limb_t)1) << (LIMB_BITS - 1)) - 1);
773 }
774 
787 void Curve25519::cswap(limb_t select, limb_t *x, limb_t *y)
788 {
789  uint8_t posn;
790  limb_t dummy;
791  limb_t sel;
792 
793  // Turn "select" into an all-zeroes or all-ones mask. We don't care
794  // which bit or bits is set in the original "select" value.
795  sel = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - select) >> LIMB_BITS);
796  --sel;
797 
798  // Swap the two values based on "select". Algorithm from:
799  // https://tools.ietf.org/html/draft-irtf-cfrg-curves-02
800  for (posn = 0; posn < NUM_LIMBS_256BIT; ++posn) {
801  dummy = sel & (x[posn] ^ y[posn]);
802  x[posn] ^= dummy;
803  y[posn] ^= dummy;
804  }
805 }
806 
819 void Curve25519::cmove(limb_t select, limb_t *x, const limb_t *y)
820 {
821  uint8_t posn;
822  limb_t dummy;
823  limb_t sel;
824 
825  // Turn "select" into an all-zeroes or all-ones mask. We don't care
826  // which bit or bits is set in the original "select" value.
827  sel = (limb_t)(((((dlimb_t)1) << LIMB_BITS) - select) >> LIMB_BITS);
828  --sel;
829 
830  // Move y into x based on "select". Similar to conditional swap above.
831  for (posn = 0; posn < NUM_LIMBS_256BIT; ++posn) {
832  dummy = sel & (x[posn] ^ y[posn]);
833  x[posn] ^= dummy;
834  }
835 }
836 
843 void Curve25519::pow250(limb_t *result, const limb_t *x)
844 {
845  limb_t t1[NUM_LIMBS_256BIT];
846  uint8_t i, j;
847 
848  // The big-endian hexadecimal expansion of (2^250 - 1) is:
849  // 03FFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF
850  //
851  // The naive implementation needs to do 2 multiplications per 1 bit and
852  // 1 multiplication per 0 bit. We can improve upon this by creating a
853  // pattern 0000000001 ... 0000000001. If we square and multiply the
854  // pattern by itself we can turn the pattern into the partial results
855  // 0000000011 ... 0000000011, 0000000111 ... 0000000111, etc.
856  // This averages out to about 1.1 multiplications per 1 bit instead of 2.
857 
858  // Build a pattern of 250 bits in length of repeated copies of 0000000001.
859  #define RECIP_GROUP_SIZE 10
860  #define RECIP_GROUP_BITS 250 // Must be a multiple of RECIP_GROUP_SIZE.
861  square(t1, x);
862  for (j = 0; j < (RECIP_GROUP_SIZE - 1); ++j)
863  square(t1, t1);
864  mul(result, t1, x);
865  for (i = 0; i < ((RECIP_GROUP_BITS / RECIP_GROUP_SIZE) - 2); ++i) {
866  for (j = 0; j < RECIP_GROUP_SIZE; ++j)
867  square(t1, t1);
868  mul(result, result, t1);
869  }
870 
871  // Multiply bit-shifted versions of the 0000000001 pattern into
872  // the result to "fill in" the gaps in the pattern.
873  square(t1, result);
874  mul(result, result, t1);
875  for (j = 0; j < (RECIP_GROUP_SIZE - 2); ++j) {
876  square(t1, t1);
877  mul(result, result, t1);
878  }
879 
880  // Clean up and exit.
881  clean(t1);
882 }
883 
891 void Curve25519::recip(limb_t *result, const limb_t *x)
892 {
893  // The reciprocal is the same as x ^ (p - 2) where p = 2^255 - 19.
894  // The big-endian hexadecimal expansion of (p - 2) is:
895  // 7FFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFEB
896  // Start with the 250 upper bits of the expansion of (p - 2).
897  pow250(result, x);
898 
899  // Deal with the 5 lowest bits of (p - 2), 01011, from highest to lowest.
900  square(result, result);
901  square(result, result);
902  mul(result, result, x);
903  square(result, result);
904  square(result, result);
905  mul(result, result, x);
906  square(result, result);
907  mul(result, result, x);
908 }
909 
925 bool Curve25519::sqrt(limb_t *result, const limb_t *x)
926 {
927  // sqrt(-1) mod (2^255 - 19).
928  static limb_t const numSqrtM1[NUM_LIMBS_256BIT] PROGMEM = {
929  LIMB(0x4A0EA0B0), LIMB(0xC4EE1B27), LIMB(0xAD2FE478), LIMB(0x2F431806),
930  LIMB(0x3DFBD7A7), LIMB(0x2B4D0099), LIMB(0x4FC1DF0B), LIMB(0x2B832480)
931  };
932  limb_t y[NUM_LIMBS_256BIT];
933 
934  // Algorithm from:
935  // https://tools.ietf.org/id/draft-josefsson-eddsa-ed25519-02.txt
936 
937  // Compute a candidate root: result = x^((p + 3) / 8) mod p.
938  // (p + 3) / 8 = (2^252 - 2) which is 251 one bits followed by a zero:
939  // 0FFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFF FFFFFFFE
940  pow250(result, x);
941  square(result, result);
942  mul(result, result, x);
943  square(result, result);
944 
945  // Did we get the square root immediately?
946  square(y, result);
947  if (memcmp(x, y, sizeof(y)) == 0) {
948  clean(y);
949  return true;
950  }
951 
952  // Multiply the result by sqrt(-1) and check again.
953  mul_P(result, result, numSqrtM1);
954  square(y, result);
955  if (memcmp(x, y, sizeof(y)) == 0) {
956  clean(y);
957  return true;
958  }
959 
960  // The number does not have a square root.
961  clean(y);
962  return false;
963 }
void rand(uint8_t *data, size_t len)
Generates random bytes into a caller-supplied buffer.
Definition: RNG.cpp:508
static bool eval(uint8_t result[32], const uint8_t s[32], const uint8_t x[32])
Evaluates the raw Curve25519 function.
Definition: Curve25519.cpp:74
static void unpackLE(limb_t *limbs, size_t count, const uint8_t *bytes, size_t len)
Unpacks the little-endian byte representation of a big number into a limb array.
static void packLE(uint8_t *bytes, size_t len, const limb_t *limbs, size_t count)
Packs the little-endian byte representation of a big number into a byte array.
static void dh1(uint8_t k[32], uint8_t f[32])
Performs phase 1 of a Diffie-Hellman key exchange using Curve25519.
Definition: Curve25519.cpp:239
static bool dh2(uint8_t k[32], uint8_t f[32])
Performs phase 2 of a Diffie-Hellman key exchange using Curve25519.
Definition: Curve25519.cpp:277