ArduinoLibs
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Groups Pages
SpeckSmall.cpp
1 /*
2  * Copyright (C) 2016 Southern Storm Software, Pty Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "SpeckSmall.h"
24 #include "Crypto.h"
25 #include "utility/RotateUtil.h"
26 #include "utility/EndianUtil.h"
27 #include <string.h>
28 
57 // The "avr-gcc" compiler doesn't do a very good job of compiling
58 // code involving 64-bit values. So we have to use inline assembly.
59 // It also helps to break the state up into 32-bit quantities
60 // because "asm" supports register names like %A0, %B0, %C0, %D0
61 // for the bytes in a 32-bit quantity, but it does not support
62 // %E0, %F0, %G0, %H0 for the high bytes of a 64-bit quantity.
63 #if defined(__AVR__)
64 #define USE_AVR_INLINE_ASM 1
65 #endif
66 
67 // Pack/unpack byte-aligned big-endian 64-bit quantities.
68 #define pack64(data, value) \
69  do { \
70  uint64_t v = htobe64((value)); \
71  memcpy((data), &v, sizeof(uint64_t)); \
72  } while (0)
73 #define unpack64(value, data) \
74  do { \
75  memcpy(&(value), (data), sizeof(uint64_t)); \
76  (value) = be64toh((value)); \
77  } while (0)
78 
86 {
87 }
88 
89 SpeckSmall::~SpeckSmall()
90 {
91  clean(l);
92 }
93 
94 bool SpeckSmall::setKey(const uint8_t *key, size_t len)
95 {
96  // Try setting the key for the forward encryption direction.
97  if (!SpeckTiny::setKey(key, len))
98  return false;
99 
100 #if USE_AVR_INLINE_ASM
101  // Expand the key schedule to get the l and s values at the end
102  // of the schedule, which will allow us to reverse it later.
103  uint8_t mb = (rounds - 31) * 8;
104  __asm__ __volatile__ (
105  "ld r16,Z+\n" // s = k[0]
106  "ld r17,Z+\n"
107  "ld r18,Z+\n"
108  "ld r19,Z+\n"
109  "ld r20,Z+\n"
110  "ld r21,Z+\n"
111  "ld r22,Z+\n"
112  "ld r23,Z+\n"
113 
114  "mov r24,%3\n" // memcpy(l, k + 1, mb)
115  "3:\n"
116  "ld __tmp_reg__,Z+\n"
117  "st X+,__tmp_reg__\n"
118  "dec r24\n"
119  "brne 3b\n"
120  "sub %A1,%3\n" // return X to its initial value
121  "sbc %B1,__zero_reg__\n"
122 
123  "1:\n"
124 
125  // l[li_out] = (s + rightRotate8_64(l[li_in])) ^ i;
126  "add %A1,%2\n" // X = &(l[li_in])
127  "adc %B1,__zero_reg__\n"
128  "ld r15,X+\n" // x = rightRotate8_64(l[li_in])
129  "ld r8,X+\n"
130  "ld r9,X+\n"
131  "ld r10,X+\n"
132  "ld r11,X+\n"
133  "ld r12,X+\n"
134  "ld r13,X+\n"
135  "ld r14,X+\n"
136 
137  "add r8,r16\n" // x += s
138  "adc r9,r17\n"
139  "adc r10,r18\n"
140  "adc r11,r19\n"
141  "adc r12,r20\n"
142  "adc r13,r21\n"
143  "adc r14,r22\n"
144  "adc r15,r23\n"
145 
146  "eor r8,%4\n" // x ^= i
147 
148  // X = X - li_in + li_out
149  "ldi r24,8\n" // li_in = li_in + 1
150  "add %2,r24\n"
151  "sub %A1,%2\n" // return X to its initial value
152  "sbc %B1,__zero_reg__\n"
153  "ldi r25,0x1f\n"
154  "and %2,r25\n" // li_in = li_in % 4
155  "add %A1,%3\n" // X = &(l[li_out])
156  "adc %B1,__zero_reg__\n"
157 
158  "st X+,r8\n" // l[li_out] = x
159  "st X+,r9\n"
160  "st X+,r10\n"
161  "st X+,r11\n"
162  "st X+,r12\n"
163  "st X+,r13\n"
164  "st X+,r14\n"
165  "st X+,r15\n"
166 
167  "add %3,r24\n" // li_out = li_out + 1
168  "sub %A1,%3\n" // return X to its initial value
169  "sbc %B1,__zero_reg__\n"
170  "and %3,r25\n" // li_out = li_out % 4
171 
172  // s = leftRotate3_64(s) ^ l[li_out];
173  "lsl r16\n" // s = leftRotate1_64(s)
174  "rol r17\n"
175  "rol r18\n"
176  "rol r19\n"
177  "rol r20\n"
178  "rol r21\n"
179  "rol r22\n"
180  "rol r23\n"
181  "adc r16,__zero_reg__\n"
182 
183  "lsl r16\n" // s = leftRotate1_64(s)
184  "rol r17\n"
185  "rol r18\n"
186  "rol r19\n"
187  "rol r20\n"
188  "rol r21\n"
189  "rol r22\n"
190  "rol r23\n"
191  "adc r16,__zero_reg__\n"
192 
193  "lsl r16\n" // s = leftRotate1_64(s)
194  "rol r17\n"
195  "rol r18\n"
196  "rol r19\n"
197  "rol r20\n"
198  "rol r21\n"
199  "rol r22\n"
200  "rol r23\n"
201  "adc r16,__zero_reg__\n"
202 
203  "eor r16,r8\n" // s ^= x
204  "eor r17,r9\n"
205  "eor r18,r10\n"
206  "eor r19,r11\n"
207  "eor r20,r12\n"
208  "eor r21,r13\n"
209  "eor r22,r14\n"
210  "eor r23,r15\n"
211 
212  // Loop
213  "inc %4\n" // ++i
214  "dec %5\n" // --rounds
215  "breq 2f\n"
216  "rjmp 1b\n"
217  "2:\n"
218 
219  "add %A1,%3\n" // X = &(l[li_out])
220  "adc %B1,__zero_reg__\n"
221  "st X+,r16\n" // l[li_out] = s
222  "st X+,r17\n"
223  "st X+,r18\n"
224  "st X+,r19\n"
225  "st X+,r20\n"
226  "st X+,r21\n"
227  "st X+,r22\n"
228  "st X+,r23\n"
229 
230  : : "z"(k), "x"(l),
231  "r"((uint8_t)0), // initial value of li_in
232  "r"((uint8_t)mb), // initial value of li_out
233  "r"(0), // initial value of i
234  "r"(rounds - 1)
235  : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
236  "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
237  "r24", "r25"
238  );
239  return true;
240 #else
241  // Expand the key schedule to get the l and s values at the end
242  // of the schedule, which will allow us to reverse it later.
243  uint8_t m = rounds - 30;
244  uint8_t li_in = 0;
245  uint8_t li_out = m - 1;
246  uint64_t s = k[0];
247  memcpy(l, k + 1, (m - 1) * sizeof(uint64_t));
248  for (uint8_t i = 0; i < (rounds - 1); ++i) {
249  l[li_out] = (s + rightRotate8_64(l[li_in])) ^ i;
250  s = leftRotate3_64(s) ^ l[li_out];
251  li_in = (li_in + 1) & 0x03;
252  li_out = (li_out + 1) & 0x03;
253  }
254 
255  // Save the final s value in the l array so that we can recover it later.
256  l[li_out] = s;
257  return true;
258 #endif
259 }
260 
261 void SpeckSmall::decryptBlock(uint8_t *output, const uint8_t *input)
262 {
263 #if USE_AVR_INLINE_ASM
264  uint64_t l[4];
265  uint32_t xlow, xhigh, ylow, yhigh;
266  uint32_t slow, shigh;
267  uint8_t li_in = (rounds + 3) & 0x03;
268  uint8_t li_out = (((rounds - 31) + li_in) & 0x03) * 8;
269  li_in *= 8;
270 
271  // Prepare to expand the key schedule.
272  __asm__ __volatile__ (
273  "add r30,%4\n" // Z = &(this->l[li_out])
274  "adc r31,__zero_reg__\n"
275  "ld __tmp_reg__,Z\n" // s = this->l[li_out]
276  "std %A0,__tmp_reg__\n"
277  "ldd __tmp_reg__,Z+1\n"
278  "std %B0,__tmp_reg__\n"
279  "ldd __tmp_reg__,Z+2\n"
280  "std %C0,__tmp_reg__\n"
281  "ldd __tmp_reg__,Z+3\n"
282  "std %D0,__tmp_reg__\n"
283  "ldd __tmp_reg__,Z+4\n"
284  "std %A1,__tmp_reg__\n"
285  "ldd __tmp_reg__,Z+5\n"
286  "std %B1,__tmp_reg__\n"
287  "ldd __tmp_reg__,Z+6\n"
288  "std %C1,__tmp_reg__\n"
289  "ldd __tmp_reg__,Z+7\n"
290  "std %D1,__tmp_reg__\n"
291  "sub r30,%4\n" // Point Z back to the start of this->l.
292  "sbc r31,__zero_reg__\n"
293 
294  "ldi r25,32\n" // Copy the entire this->l array into l.
295  "1:\n"
296  "ld __tmp_reg__,Z+\n"
297  "st X+,__tmp_reg__\n"
298  "dec r25\n"
299  "brne 1b\n"
300  : "=Q"(slow), "=Q"(shigh)
301  : "z"(this->l), "x"(l), "r"(li_out)
302  : "r25"
303  );
304 
305  // Unpack the input into the x and y variables, converting
306  // from big-endian into little-endian in the process.
307  __asm__ __volatile__ (
308  "ld %D1,Z\n"
309  "ldd %C1,Z+1\n"
310  "ldd %B1,Z+2\n"
311  "ldd %A1,Z+3\n"
312  "ldd %D0,Z+4\n"
313  "ldd %C0,Z+5\n"
314  "ldd %B0,Z+6\n"
315  "ldd %A0,Z+7\n"
316  "ldd %D3,Z+8\n"
317  "ldd %C3,Z+9\n"
318  "ldd %B3,Z+10\n"
319  "ldd %A3,Z+11\n"
320  "ldd %D2,Z+12\n"
321  "ldd %C2,Z+13\n"
322  "ldd %B2,Z+14\n"
323  "ldd %A2,Z+15\n"
324  : "=r"(xlow), "=r"(xhigh), "=r"(ylow), "=r"(yhigh)
325  : "z"(input)
326  );
327 
328  // Perform all decryption rounds while expanding the key schedule in-place.
329  __asm__ __volatile__ (
330  "mov r23,%9\n" // i = rounds - 1
331  "dec r23\n"
332  "1:\n"
333 
334  // Adjust x and y for this round using the key schedule word s.
335 
336  // y = rightRotate3_64(x ^ y);
337  "eor %A2,%A0\n" // y ^= x
338  "eor %B2,%B0\n"
339  "eor %C2,%C0\n"
340  "eor %D2,%D0\n"
341  "eor %A3,%A1\n"
342  "eor %B3,%B1\n"
343  "eor %C3,%C1\n"
344  "eor %D3,%D1\n"
345 
346  "bst %A2,0\n" // y = rightRotate1_64(y)
347  "ror %D3\n"
348  "ror %C3\n"
349  "ror %B3\n"
350  "ror %A3\n"
351  "ror %D2\n"
352  "ror %C2\n"
353  "ror %B2\n"
354  "ror %A2\n"
355  "bld %D3,7\n"
356 
357  "bst %A2,0\n" // y = rightRotate1_64(y)
358  "ror %D3\n"
359  "ror %C3\n"
360  "ror %B3\n"
361  "ror %A3\n"
362  "ror %D2\n"
363  "ror %C2\n"
364  "ror %B2\n"
365  "ror %A2\n"
366  "bld %D3,7\n"
367 
368  "bst %A2,0\n" // y = rightRotate1_64(y)
369  "ror %D3\n"
370  "ror %C3\n"
371  "ror %B3\n"
372  "ror %A3\n"
373  "ror %D2\n"
374  "ror %C2\n"
375  "ror %B2\n"
376  "ror %A2\n"
377  "bld %D3,7\n"
378 
379  // x = leftRotate8_64((x ^ s) - y);
380  "ldd __tmp_reg__,%A4\n" // x ^= s
381  "eor %A0,__tmp_reg__\n"
382  "ldd __tmp_reg__,%B4\n"
383  "eor %B0,__tmp_reg__\n"
384  "ldd __tmp_reg__,%C4\n"
385  "eor %C0,__tmp_reg__\n"
386  "ldd __tmp_reg__,%D4\n"
387  "eor %D0,__tmp_reg__\n"
388  "ldd __tmp_reg__,%A5\n"
389  "eor %A1,__tmp_reg__\n"
390  "ldd __tmp_reg__,%B5\n"
391  "eor %B1,__tmp_reg__\n"
392  "ldd __tmp_reg__,%C5\n"
393  "eor %C1,__tmp_reg__\n"
394  "ldd __tmp_reg__,%D5\n"
395  "eor %D1,__tmp_reg__\n"
396 
397  "sub %A0,%A2\n" // x -= y
398  "sbc %B0,%B2\n"
399  "sbc %C0,%C2\n"
400  "sbc %D0,%D2\n"
401  "sbc %A1,%A3\n"
402  "sbc %B1,%B3\n"
403  "sbc %C1,%C3\n"
404  "sbc %D1,%D3\n"
405 
406  "mov __tmp_reg__,%D1\n" // x = lefRotate8_64(x)
407  "mov %D1,%C1\n"
408  "mov %C1,%B1\n"
409  "mov %B1,%A1\n"
410  "mov %A1,%D0\n"
411  "mov %D0,%C0\n"
412  "mov %C0,%B0\n"
413  "mov %B0,%A0\n"
414  "mov %A0,__tmp_reg__\n"
415 
416  // On the last round we don't need to compute s so we
417  // can exit early here if i == 0.
418  "or r23,r23\n" // if (i == 0)
419  "brne 2f\n"
420  "rjmp 3f\n"
421  "2:\n"
422  "dec r23\n" // --i
423 
424  // Save x and y on the stack so we can reuse registers for t and s.
425  "push %A0\n"
426  "push %B0\n"
427  "push %C0\n"
428  "push %D0\n"
429  "push %A1\n"
430  "push %B1\n"
431  "push %C1\n"
432  "push %D1\n"
433  "push %A2\n"
434  "push %B2\n"
435  "push %C2\n"
436  "push %D2\n"
437  "push %A3\n"
438  "push %B3\n"
439  "push %C3\n"
440  "push %D3\n"
441 
442  // Compute the key schedule word s for the next round.
443 
444  // li_out = (li_out + 3) & 0x03;
445  "ldd r24,%7\n"
446  "ldi r25,24\n"
447  "add r24,r25\n"
448  "andi r24,0x1f\n"
449  "std %7,r24\n"
450 
451  // s = rightRotate3_64(s ^ l[li_out]);
452  "add %A8,r24\n" // Z = &(l[li_out])
453  "adc %B8,__zero_reg__\n"
454 
455  "ld %A0,Z\n" // t = l[li_out]
456  "ldd %B0,Z+1\n"
457  "ldd %C0,Z+2\n"
458  "ldd %D0,Z+3\n"
459  "ldd %A1,Z+4\n"
460  "ldd %B1,Z+5\n"
461  "ldd %C1,Z+6\n"
462  "ldd %D1,Z+7\n"
463 
464  "ldd %A2,%A4\n" // load s
465  "ldd %B2,%B4\n"
466  "ldd %C2,%C4\n"
467  "ldd %D2,%D4\n"
468  "ldd %A3,%A5\n"
469  "ldd %B3,%B5\n"
470  "ldd %C3,%C5\n"
471  "ldd %D3,%D5\n"
472 
473  "eor %A2,%A0\n" // s ^= t
474  "eor %B2,%B0\n"
475  "eor %C2,%C0\n"
476  "eor %D2,%D0\n"
477  "eor %A3,%A1\n"
478  "eor %B3,%B1\n"
479  "eor %C3,%C1\n"
480  "eor %D3,%D1\n"
481 
482  "bst %A2,0\n" // s = rightRotate1_64(s)
483  "ror %D3\n"
484  "ror %C3\n"
485  "ror %B3\n"
486  "ror %A3\n"
487  "ror %D2\n"
488  "ror %C2\n"
489  "ror %B2\n"
490  "ror %A2\n"
491  "bld %D3,7\n"
492 
493  "bst %A2,0\n" // s = rightRotate1_64(s)
494  "ror %D3\n"
495  "ror %C3\n"
496  "ror %B3\n"
497  "ror %A3\n"
498  "ror %D2\n"
499  "ror %C2\n"
500  "ror %B2\n"
501  "ror %A2\n"
502  "bld %D3,7\n"
503 
504  "bst %A2,0\n" // s = rightRotate1_64(s)
505  "ror %D3\n"
506  "ror %C3\n"
507  "ror %B3\n"
508  "ror %A3\n"
509  "ror %D2\n"
510  "ror %C2\n"
511  "ror %B2\n"
512  "ror %A2\n"
513  "bld %D3,7\n"
514 
515  "sub %A8,r24\n" // Z -= li_out
516  "sbc %B8,__zero_reg__\n"
517 
518  // li_in = (li_in + 3) & 0x03;
519  "ldd r24,%6\n"
520  "add r24,r25\n"
521  "andi r24,0x1f\n"
522  "std %6,r24\n"
523 
524  // l[li_in] = leftRotate8_64((l[li_out] ^ i) - s);
525  "add %A8,r24\n" // Z = &(l[li_in])
526  "adc %B8,__zero_reg__\n"
527 
528  "eor %A0,r23\n" // t ^= i
529 
530  "sub %A0,%A2\n" // t -= s
531  "sbc %B0,%B2\n"
532  "sbc %C0,%C2\n"
533  "sbc %D0,%D2\n"
534  "sbc %A1,%A3\n"
535  "sbc %B1,%B3\n"
536  "sbc %C1,%C3\n"
537  "sbc %D1,%D3\n"
538 
539  "st Z,%D1\n" // l[li_in] = leftRotate8_64(t)
540  "std Z+1,%A0\n"
541  "std Z+2,%B0\n"
542  "std Z+3,%C0\n"
543  "std Z+4,%D0\n"
544  "std Z+5,%A1\n"
545  "std Z+6,%B1\n"
546  "std Z+7,%C1\n"
547 
548  "sub %A8,r24\n" // Z -= li_in
549  "sbc %B8,__zero_reg__\n"
550 
551  "std %A4,%A2\n" // store s
552  "std %B4,%B2\n"
553  "std %C4,%C2\n"
554  "std %D4,%D2\n"
555  "std %A5,%A3\n"
556  "std %B5,%B3\n"
557  "std %C5,%C3\n"
558  "std %D5,%D3\n"
559 
560  // Pop registers from the stack to recover the x and y values.
561  "pop %D3\n"
562  "pop %C3\n"
563  "pop %B3\n"
564  "pop %A3\n"
565  "pop %D2\n"
566  "pop %C2\n"
567  "pop %B2\n"
568  "pop %A2\n"
569  "pop %D1\n"
570  "pop %C1\n"
571  "pop %B1\n"
572  "pop %A1\n"
573  "pop %D0\n"
574  "pop %C0\n"
575  "pop %B0\n"
576  "pop %A0\n"
577 
578  // Bottom of the loop.
579  "rjmp 1b\n"
580  "3:\n"
581 
582  : "+r"(xlow), "+r"(xhigh), "+r"(ylow), "+r"(yhigh),
583  "+Q"(slow), "+Q"(shigh), "+Q"(li_in), "+Q"(li_out)
584  : "z"(l), "r"(rounds)
585  : "r23", "r24", "r25"
586  );
587 
588  // Pack the results into the output and convert back to big-endian.
589  __asm__ __volatile__ (
590  "st Z,%D1\n"
591  "std Z+1,%C1\n"
592  "std Z+2,%B1\n"
593  "std Z+3,%A1\n"
594  "std Z+4,%D0\n"
595  "std Z+5,%C0\n"
596  "std Z+6,%B0\n"
597  "std Z+7,%A0\n"
598  "std Z+8,%D3\n"
599  "std Z+9,%C3\n"
600  "std Z+10,%B3\n"
601  "std Z+11,%A3\n"
602  "std Z+12,%D2\n"
603  "std Z+13,%C2\n"
604  "std Z+14,%B2\n"
605  "std Z+15,%A2\n"
606  : : "r"(xlow), "r"(xhigh), "r"(ylow), "r"(yhigh), "z"(output)
607  );
608 #else
609  uint64_t l[4];
610  uint64_t x, y, s;
611  uint8_t round;
612  uint8_t li_in = (rounds + 3) & 0x03;
613  uint8_t li_out = ((rounds - 31) + li_in) & 0x03;
614 
615  // Prepare the key schedule, starting at the end.
616  for (round = li_in; round != li_out; round = (round + 1) & 0x03)
617  l[round] = this->l[round];
618  s = this->l[li_out];
619 
620  // Unpack the input and convert from big-endian.
621  unpack64(x, input);
622  unpack64(y, input + 8);
623 
624  // Perform all decryption rounds except the last while
625  // expanding the decryption schedule on the fly.
626  for (uint8_t round = rounds - 1; round > 0; --round) {
627  // Decrypt using the current round key.
628  y = rightRotate3_64(x ^ y);
629  x = leftRotate8_64((x ^ s) - y);
630 
631  // Generate the round key for the previous round.
632  li_in = (li_in + 3) & 0x03;
633  li_out = (li_out + 3) & 0x03;
634  s = rightRotate3_64(s ^ l[li_out]);
635  l[li_in] = leftRotate8_64((l[li_out] ^ (round - 1)) - s);
636  }
637 
638  // Perform the final decryption round.
639  y = rightRotate3_64(x ^ y);
640  x = leftRotate8_64((x ^ s) - y);
641 
642  // Pack the output and convert to big-endian.
643  pack64(output, x);
644  pack64(output + 8, y);
645 #endif
646 }
647 
649 {
651  clean(l);
652 }
bool setKey(const uint8_t *key, size_t len)
Sets the key to use for future encryption and decryption operations.
Definition: SpeckTiny.cpp:109
void decryptBlock(uint8_t *output, const uint8_t *input)
Decrypts a single block using this cipher.
Definition: SpeckSmall.cpp:261
bool setKey(const uint8_t *key, size_t len)
Sets the key to use for future encryption and decryption operations.
Definition: SpeckSmall.cpp:94
void clear()
Clears all security-sensitive state from this block cipher.
Definition: SpeckSmall.cpp:648
SpeckSmall()
Constructs a small-memory Speck block cipher with no initial key.
Definition: SpeckSmall.cpp:85
void clear()
Clears all security-sensitive state from this block cipher.
Definition: SpeckTiny.cpp:532