ArduinoLibs
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Groups Pages
KeccakCore.cpp
1 /*
2  * Copyright (C) 2015 Southern Storm Software, Pty Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "KeccakCore.h"
24 #include "Crypto.h"
25 #include "utility/EndianUtil.h"
26 #include "utility/RotateUtil.h"
27 #include "utility/ProgMemUtil.h"
28 #include <string.h>
29 
42 #if !defined(CRYPTO_LITTLE_ENDIAN)
43 // All of the Arduino platforms we care about are little-endian.
44 #error "KeccakCore is not supported on big-endian platforms yet - todo"
45 #endif
46 
55  : _blockSize(8)
56 {
57  memset(state.A, 0, sizeof(state.A));
58  state.inputSize = 0;
59  state.outputSize = 0;
60 }
61 
67 {
68  clean(state);
69 }
70 
76 size_t KeccakCore::capacity() const
77 {
78  return 1600 - ((size_t)_blockSize) * 8;
79 }
80 
94 void KeccakCore::setCapacity(size_t capacity)
95 {
96  _blockSize = (1600 - capacity) / 8;
97  reset();
98 }
99 
115 {
116  memset(state.A, 0, sizeof(state.A));
117  state.inputSize = 0;
118  state.outputSize = 0;
119 }
120 
133 void KeccakCore::update(const void *data, size_t size)
134 {
135  // Stop generating output while we incorporate the new data.
136  state.outputSize = 0;
137 
138  // Break the input up into chunks and process each in turn.
139  const uint8_t *d = (const uint8_t *)data;
140  while (size > 0) {
141  uint8_t len = _blockSize - state.inputSize;
142  if (len > size)
143  len = size;
144  uint8_t *Abytes = ((uint8_t *)state.A) + state.inputSize;
145  for (uint8_t posn = 0; posn < len; ++posn)
146  Abytes[posn] ^= d[posn];
147  state.inputSize += len;
148  size -= len;
149  d += len;
150  if (state.inputSize == _blockSize) {
151  keccakp();
152  state.inputSize = 0;
153  }
154  }
155 }
156 
167 void KeccakCore::pad(uint8_t tag)
168 {
169  // Padding for SHA3-NNN variants according to FIPS 202 appends "01",
170  // then another "1", then many zero bits, followed by a final "1".
171  // SHAKE appends "1111" first instead of "01". Note that SHA-3 numbers
172  // bits from the least significant, so appending "01" is equivalent
173  // to 0x02 for byte-aligned data, not 0x40.
174  uint8_t size = state.inputSize;
175  uint64_t *Awords = &(state.A[0][0]);
176  Awords[size / 8] ^= (((uint64_t)tag) << ((size % 8) * 8));
177  Awords[(_blockSize - 1) / 8] ^= 0x8000000000000000ULL;
178  keccakp();
179  state.inputSize = 0;
180  state.outputSize = 0;
181 }
182 
194 void KeccakCore::extract(void *data, size_t size)
195 {
196  // Stop accepting input while we are generating output.
197  state.inputSize = 0;
198 
199  // Copy the output data into the caller's return buffer.
200  uint8_t *d = (uint8_t *)data;
201  uint8_t tempSize;
202  while (size > 0) {
203  // Generate another output block if the current one has been exhausted.
204  if (state.outputSize >= _blockSize) {
205  keccakp();
206  state.outputSize = 0;
207  }
208 
209  // How many bytes can we copy this time around?
210  tempSize = _blockSize - state.outputSize;
211  if (tempSize > size)
212  tempSize = size;
213 
214  // Copy the partial output data into the caller's return buffer.
215  memcpy(d, ((uint8_t *)(state.A)) + state.outputSize, tempSize);
216  state.outputSize += tempSize;
217  size -= tempSize;
218  d += tempSize;
219  }
220 }
221 
226 {
227  clean(state);
228 }
229 
243 void KeccakCore::setHMACKey(const void *key, size_t len, uint8_t pad, size_t hashSize)
244 {
245  uint8_t *Abytes = (uint8_t *)state.A;
246  size_t size = blockSize();
247  reset();
248  if (len <= size) {
249  // Because the state has just been reset, state.A is set to
250  // all-zeroes. We can copy the key directly into the state
251  // and then XOR the block with the pad value.
252  memcpy(Abytes, key, len);
253  } else {
254  // The key is larger than the block size. Hash it down.
255  // Afterwards, state.A will contain the first block of data
256  // to be extracted. We truncate it to the first "hashSize"
257  // bytes and XOR with the padding.
258  update(key, len);
259  this->pad(0x06);
260  memset(Abytes + hashSize, pad, size - hashSize);
261  memset(Abytes + size, 0, sizeof(state.A) - size);
262  size = hashSize;
263  }
264  while (size > 0) {
265  *Abytes++ ^= pad;
266  --size;
267  }
268  keccakp();
269 }
270 
274 void KeccakCore::keccakp()
275 {
276  uint64_t B[5][5];
277 #if defined(__AVR__)
278  // This assembly code was generated by the "genkeccak.c" program.
279  // Do not modify this code directly. Instead modify "genkeccak.c"
280  // and then re-generate the code here.
281  for (uint8_t round = 0; round < 24; ++round) {
282  __asm__ __volatile__ (
283  "push r29\n"
284  "push r28\n"
285  "mov r28,r26\n"
286  "mov r29,r27\n"
287 
288  // Step mapping theta. Compute C.
289  "ldi r20,5\n"
290  "100:\n"
291  "ld r8,Z\n"
292  "ldd r9,Z+1\n"
293  "ldd r10,Z+2\n"
294  "ldd r11,Z+3\n"
295  "ldd r12,Z+4\n"
296  "ldd r13,Z+5\n"
297  "ldd r14,Z+6\n"
298  "ldd r15,Z+7\n"
299  "ldi r19,4\n"
300  "101:\n"
301  "adiw r30,40\n"
302  "ld __tmp_reg__,Z\n"
303  "eor r8,__tmp_reg__\n"
304  "ldd __tmp_reg__,Z+1\n"
305  "eor r9,__tmp_reg__\n"
306  "ldd __tmp_reg__,Z+2\n"
307  "eor r10,__tmp_reg__\n"
308  "ldd __tmp_reg__,Z+3\n"
309  "eor r11,__tmp_reg__\n"
310  "ldd __tmp_reg__,Z+4\n"
311  "eor r12,__tmp_reg__\n"
312  "ldd __tmp_reg__,Z+5\n"
313  "eor r13,__tmp_reg__\n"
314  "ldd __tmp_reg__,Z+6\n"
315  "eor r14,__tmp_reg__\n"
316  "ldd __tmp_reg__,Z+7\n"
317  "eor r15,__tmp_reg__\n"
318  "dec r19\n"
319  "brne 101b\n"
320  "st X+,r8\n"
321  "st X+,r9\n"
322  "st X+,r10\n"
323  "st X+,r11\n"
324  "st X+,r12\n"
325  "st X+,r13\n"
326  "st X+,r14\n"
327  "st X+,r15\n"
328  "subi r30,152\n"
329  "sbc r31,__zero_reg__\n"
330  "dec r20\n"
331  "brne 100b\n"
332  "sbiw r30,40\n"
333  "sbiw r26,40\n"
334 
335  // Step mapping theta. Compute D and XOR with A.
336  "ldd r8,Y+8\n"
337  "ldd r9,Y+9\n"
338  "ldd r10,Y+10\n"
339  "ldd r11,Y+11\n"
340  "ldd r12,Y+12\n"
341  "ldd r13,Y+13\n"
342  "ldd r14,Y+14\n"
343  "ldd r15,Y+15\n"
344  "lsl r8\n"
345  "rol r9\n"
346  "rol r10\n"
347  "rol r11\n"
348  "rol r12\n"
349  "rol r13\n"
350  "rol r14\n"
351  "rol r15\n"
352  "adc r8, __zero_reg__\n"
353  "ldd __tmp_reg__,Y+32\n"
354  "eor r8,__tmp_reg__\n"
355  "ldd __tmp_reg__,Y+33\n"
356  "eor r9,__tmp_reg__\n"
357  "ldd __tmp_reg__,Y+34\n"
358  "eor r10,__tmp_reg__\n"
359  "ldd __tmp_reg__,Y+35\n"
360  "eor r11,__tmp_reg__\n"
361  "ldd __tmp_reg__,Y+36\n"
362  "eor r12,__tmp_reg__\n"
363  "ldd __tmp_reg__,Y+37\n"
364  "eor r13,__tmp_reg__\n"
365  "ldd __tmp_reg__,Y+38\n"
366  "eor r14,__tmp_reg__\n"
367  "ldd __tmp_reg__,Y+39\n"
368  "eor r15,__tmp_reg__\n"
369  "ldi r19,5\n"
370  "103:\n"
371  "ld __tmp_reg__,Z\n"
372  "eor __tmp_reg__,r8\n"
373  "st Z,__tmp_reg__\n"
374  "ldd __tmp_reg__,Z+1\n"
375  "eor __tmp_reg__,r9\n"
376  "std Z+1,__tmp_reg__\n"
377  "ldd __tmp_reg__,Z+2\n"
378  "eor __tmp_reg__,r10\n"
379  "std Z+2,__tmp_reg__\n"
380  "ldd __tmp_reg__,Z+3\n"
381  "eor __tmp_reg__,r11\n"
382  "std Z+3,__tmp_reg__\n"
383  "ldd __tmp_reg__,Z+4\n"
384  "eor __tmp_reg__,r12\n"
385  "std Z+4,__tmp_reg__\n"
386  "ldd __tmp_reg__,Z+5\n"
387  "eor __tmp_reg__,r13\n"
388  "std Z+5,__tmp_reg__\n"
389  "ldd __tmp_reg__,Z+6\n"
390  "eor __tmp_reg__,r14\n"
391  "std Z+6,__tmp_reg__\n"
392  "ldd __tmp_reg__,Z+7\n"
393  "eor __tmp_reg__,r15\n"
394  "std Z+7,__tmp_reg__\n"
395  "adiw r30,40\n"
396  "dec r19\n"
397  "brne 103b\n"
398  "subi r30,192\n"
399  "sbc r31,__zero_reg__\n"
400  "ldd r8,Y+16\n"
401  "ldd r9,Y+17\n"
402  "ldd r10,Y+18\n"
403  "ldd r11,Y+19\n"
404  "ldd r12,Y+20\n"
405  "ldd r13,Y+21\n"
406  "ldd r14,Y+22\n"
407  "ldd r15,Y+23\n"
408  "lsl r8\n"
409  "rol r9\n"
410  "rol r10\n"
411  "rol r11\n"
412  "rol r12\n"
413  "rol r13\n"
414  "rol r14\n"
415  "rol r15\n"
416  "adc r8, __zero_reg__\n"
417  "ld __tmp_reg__,Y\n"
418  "eor r8,__tmp_reg__\n"
419  "ldd __tmp_reg__,Y+1\n"
420  "eor r9,__tmp_reg__\n"
421  "ldd __tmp_reg__,Y+2\n"
422  "eor r10,__tmp_reg__\n"
423  "ldd __tmp_reg__,Y+3\n"
424  "eor r11,__tmp_reg__\n"
425  "ldd __tmp_reg__,Y+4\n"
426  "eor r12,__tmp_reg__\n"
427  "ldd __tmp_reg__,Y+5\n"
428  "eor r13,__tmp_reg__\n"
429  "ldd __tmp_reg__,Y+6\n"
430  "eor r14,__tmp_reg__\n"
431  "ldd __tmp_reg__,Y+7\n"
432  "eor r15,__tmp_reg__\n"
433  "ldi r19,5\n"
434  "104:\n"
435  "ld __tmp_reg__,Z\n"
436  "eor __tmp_reg__,r8\n"
437  "st Z,__tmp_reg__\n"
438  "ldd __tmp_reg__,Z+1\n"
439  "eor __tmp_reg__,r9\n"
440  "std Z+1,__tmp_reg__\n"
441  "ldd __tmp_reg__,Z+2\n"
442  "eor __tmp_reg__,r10\n"
443  "std Z+2,__tmp_reg__\n"
444  "ldd __tmp_reg__,Z+3\n"
445  "eor __tmp_reg__,r11\n"
446  "std Z+3,__tmp_reg__\n"
447  "ldd __tmp_reg__,Z+4\n"
448  "eor __tmp_reg__,r12\n"
449  "std Z+4,__tmp_reg__\n"
450  "ldd __tmp_reg__,Z+5\n"
451  "eor __tmp_reg__,r13\n"
452  "std Z+5,__tmp_reg__\n"
453  "ldd __tmp_reg__,Z+6\n"
454  "eor __tmp_reg__,r14\n"
455  "std Z+6,__tmp_reg__\n"
456  "ldd __tmp_reg__,Z+7\n"
457  "eor __tmp_reg__,r15\n"
458  "std Z+7,__tmp_reg__\n"
459  "adiw r30,40\n"
460  "dec r19\n"
461  "brne 104b\n"
462  "subi r30,192\n"
463  "sbc r31,__zero_reg__\n"
464  "ldd r8,Y+24\n"
465  "ldd r9,Y+25\n"
466  "ldd r10,Y+26\n"
467  "ldd r11,Y+27\n"
468  "ldd r12,Y+28\n"
469  "ldd r13,Y+29\n"
470  "ldd r14,Y+30\n"
471  "ldd r15,Y+31\n"
472  "lsl r8\n"
473  "rol r9\n"
474  "rol r10\n"
475  "rol r11\n"
476  "rol r12\n"
477  "rol r13\n"
478  "rol r14\n"
479  "rol r15\n"
480  "adc r8, __zero_reg__\n"
481  "ldd __tmp_reg__,Y+8\n"
482  "eor r8,__tmp_reg__\n"
483  "ldd __tmp_reg__,Y+9\n"
484  "eor r9,__tmp_reg__\n"
485  "ldd __tmp_reg__,Y+10\n"
486  "eor r10,__tmp_reg__\n"
487  "ldd __tmp_reg__,Y+11\n"
488  "eor r11,__tmp_reg__\n"
489  "ldd __tmp_reg__,Y+12\n"
490  "eor r12,__tmp_reg__\n"
491  "ldd __tmp_reg__,Y+13\n"
492  "eor r13,__tmp_reg__\n"
493  "ldd __tmp_reg__,Y+14\n"
494  "eor r14,__tmp_reg__\n"
495  "ldd __tmp_reg__,Y+15\n"
496  "eor r15,__tmp_reg__\n"
497  "ldi r19,5\n"
498  "105:\n"
499  "ld __tmp_reg__,Z\n"
500  "eor __tmp_reg__,r8\n"
501  "st Z,__tmp_reg__\n"
502  "ldd __tmp_reg__,Z+1\n"
503  "eor __tmp_reg__,r9\n"
504  "std Z+1,__tmp_reg__\n"
505  "ldd __tmp_reg__,Z+2\n"
506  "eor __tmp_reg__,r10\n"
507  "std Z+2,__tmp_reg__\n"
508  "ldd __tmp_reg__,Z+3\n"
509  "eor __tmp_reg__,r11\n"
510  "std Z+3,__tmp_reg__\n"
511  "ldd __tmp_reg__,Z+4\n"
512  "eor __tmp_reg__,r12\n"
513  "std Z+4,__tmp_reg__\n"
514  "ldd __tmp_reg__,Z+5\n"
515  "eor __tmp_reg__,r13\n"
516  "std Z+5,__tmp_reg__\n"
517  "ldd __tmp_reg__,Z+6\n"
518  "eor __tmp_reg__,r14\n"
519  "std Z+6,__tmp_reg__\n"
520  "ldd __tmp_reg__,Z+7\n"
521  "eor __tmp_reg__,r15\n"
522  "std Z+7,__tmp_reg__\n"
523  "adiw r30,40\n"
524  "dec r19\n"
525  "brne 105b\n"
526  "subi r30,192\n"
527  "sbc r31,__zero_reg__\n"
528  "ldd r8,Y+32\n"
529  "ldd r9,Y+33\n"
530  "ldd r10,Y+34\n"
531  "ldd r11,Y+35\n"
532  "ldd r12,Y+36\n"
533  "ldd r13,Y+37\n"
534  "ldd r14,Y+38\n"
535  "ldd r15,Y+39\n"
536  "lsl r8\n"
537  "rol r9\n"
538  "rol r10\n"
539  "rol r11\n"
540  "rol r12\n"
541  "rol r13\n"
542  "rol r14\n"
543  "rol r15\n"
544  "adc r8, __zero_reg__\n"
545  "ldd __tmp_reg__,Y+16\n"
546  "eor r8,__tmp_reg__\n"
547  "ldd __tmp_reg__,Y+17\n"
548  "eor r9,__tmp_reg__\n"
549  "ldd __tmp_reg__,Y+18\n"
550  "eor r10,__tmp_reg__\n"
551  "ldd __tmp_reg__,Y+19\n"
552  "eor r11,__tmp_reg__\n"
553  "ldd __tmp_reg__,Y+20\n"
554  "eor r12,__tmp_reg__\n"
555  "ldd __tmp_reg__,Y+21\n"
556  "eor r13,__tmp_reg__\n"
557  "ldd __tmp_reg__,Y+22\n"
558  "eor r14,__tmp_reg__\n"
559  "ldd __tmp_reg__,Y+23\n"
560  "eor r15,__tmp_reg__\n"
561  "ldi r19,5\n"
562  "106:\n"
563  "ld __tmp_reg__,Z\n"
564  "eor __tmp_reg__,r8\n"
565  "st Z,__tmp_reg__\n"
566  "ldd __tmp_reg__,Z+1\n"
567  "eor __tmp_reg__,r9\n"
568  "std Z+1,__tmp_reg__\n"
569  "ldd __tmp_reg__,Z+2\n"
570  "eor __tmp_reg__,r10\n"
571  "std Z+2,__tmp_reg__\n"
572  "ldd __tmp_reg__,Z+3\n"
573  "eor __tmp_reg__,r11\n"
574  "std Z+3,__tmp_reg__\n"
575  "ldd __tmp_reg__,Z+4\n"
576  "eor __tmp_reg__,r12\n"
577  "std Z+4,__tmp_reg__\n"
578  "ldd __tmp_reg__,Z+5\n"
579  "eor __tmp_reg__,r13\n"
580  "std Z+5,__tmp_reg__\n"
581  "ldd __tmp_reg__,Z+6\n"
582  "eor __tmp_reg__,r14\n"
583  "std Z+6,__tmp_reg__\n"
584  "ldd __tmp_reg__,Z+7\n"
585  "eor __tmp_reg__,r15\n"
586  "std Z+7,__tmp_reg__\n"
587  "adiw r30,40\n"
588  "dec r19\n"
589  "brne 106b\n"
590  "subi r30,192\n"
591  "sbc r31,__zero_reg__\n"
592  "ld r8,Y\n"
593  "ldd r9,Y+1\n"
594  "ldd r10,Y+2\n"
595  "ldd r11,Y+3\n"
596  "ldd r12,Y+4\n"
597  "ldd r13,Y+5\n"
598  "ldd r14,Y+6\n"
599  "ldd r15,Y+7\n"
600  "lsl r8\n"
601  "rol r9\n"
602  "rol r10\n"
603  "rol r11\n"
604  "rol r12\n"
605  "rol r13\n"
606  "rol r14\n"
607  "rol r15\n"
608  "adc r8, __zero_reg__\n"
609  "ldd __tmp_reg__,Y+24\n"
610  "eor r8,__tmp_reg__\n"
611  "ldd __tmp_reg__,Y+25\n"
612  "eor r9,__tmp_reg__\n"
613  "ldd __tmp_reg__,Y+26\n"
614  "eor r10,__tmp_reg__\n"
615  "ldd __tmp_reg__,Y+27\n"
616  "eor r11,__tmp_reg__\n"
617  "ldd __tmp_reg__,Y+28\n"
618  "eor r12,__tmp_reg__\n"
619  "ldd __tmp_reg__,Y+29\n"
620  "eor r13,__tmp_reg__\n"
621  "ldd __tmp_reg__,Y+30\n"
622  "eor r14,__tmp_reg__\n"
623  "ldd __tmp_reg__,Y+31\n"
624  "eor r15,__tmp_reg__\n"
625  "ldi r19,5\n"
626  "107:\n"
627  "ld __tmp_reg__,Z\n"
628  "eor __tmp_reg__,r8\n"
629  "st Z,__tmp_reg__\n"
630  "ldd __tmp_reg__,Z+1\n"
631  "eor __tmp_reg__,r9\n"
632  "std Z+1,__tmp_reg__\n"
633  "ldd __tmp_reg__,Z+2\n"
634  "eor __tmp_reg__,r10\n"
635  "std Z+2,__tmp_reg__\n"
636  "ldd __tmp_reg__,Z+3\n"
637  "eor __tmp_reg__,r11\n"
638  "std Z+3,__tmp_reg__\n"
639  "ldd __tmp_reg__,Z+4\n"
640  "eor __tmp_reg__,r12\n"
641  "std Z+4,__tmp_reg__\n"
642  "ldd __tmp_reg__,Z+5\n"
643  "eor __tmp_reg__,r13\n"
644  "std Z+5,__tmp_reg__\n"
645  "ldd __tmp_reg__,Z+6\n"
646  "eor __tmp_reg__,r14\n"
647  "std Z+6,__tmp_reg__\n"
648  "ldd __tmp_reg__,Z+7\n"
649  "eor __tmp_reg__,r15\n"
650  "std Z+7,__tmp_reg__\n"
651  "adiw r30,40\n"
652  "dec r19\n"
653  "brne 107b\n"
654  "subi r30,232\n"
655  "sbc r31,__zero_reg__\n"
656 
657  // Step mappings rho and pi combined into one step.
658 
659  // B[0][0] = A[0][0]
660  "ld r8,Z\n"
661  "ldd r9,Z+1\n"
662  "ldd r10,Z+2\n"
663  "ldd r11,Z+3\n"
664  "ldd r12,Z+4\n"
665  "ldd r13,Z+5\n"
666  "ldd r14,Z+6\n"
667  "ldd r15,Z+7\n"
668  "st X+,r8\n"
669  "st X+,r9\n"
670  "st X+,r10\n"
671  "st X+,r11\n"
672  "st X+,r12\n"
673  "st X+,r13\n"
674  "st X+,r14\n"
675  "st X+,r15\n"
676 
677  // B[1][0] = leftRotate28_64(A[0][3])
678  "adiw r26,32\n"
679  "ldd r8,Z+24\n"
680  "ldd r9,Z+25\n"
681  "ldd r10,Z+26\n"
682  "ldd r11,Z+27\n"
683  "ldd r12,Z+28\n"
684  "ldd r13,Z+29\n"
685  "ldd r14,Z+30\n"
686  "ldd r15,Z+31\n"
687  "lsl r8\n"
688  "rol r9\n"
689  "rol r10\n"
690  "rol r11\n"
691  "rol r12\n"
692  "rol r13\n"
693  "rol r14\n"
694  "rol r15\n"
695  "adc r8, __zero_reg__\n"
696  "lsl r8\n"
697  "rol r9\n"
698  "rol r10\n"
699  "rol r11\n"
700  "rol r12\n"
701  "rol r13\n"
702  "rol r14\n"
703  "rol r15\n"
704  "adc r8, __zero_reg__\n"
705  "lsl r8\n"
706  "rol r9\n"
707  "rol r10\n"
708  "rol r11\n"
709  "rol r12\n"
710  "rol r13\n"
711  "rol r14\n"
712  "rol r15\n"
713  "adc r8, __zero_reg__\n"
714  "lsl r8\n"
715  "rol r9\n"
716  "rol r10\n"
717  "rol r11\n"
718  "rol r12\n"
719  "rol r13\n"
720  "rol r14\n"
721  "rol r15\n"
722  "adc r8, __zero_reg__\n"
723  "st X+,r13\n"
724  "st X+,r14\n"
725  "st X+,r15\n"
726  "st X+,r8\n"
727  "st X+,r9\n"
728  "st X+,r10\n"
729  "st X+,r11\n"
730  "st X+,r12\n"
731 
732  // B[2][0] = leftRotate1_64(A[0][1])
733  "adiw r26,32\n"
734  "ldd r8,Z+8\n"
735  "ldd r9,Z+9\n"
736  "ldd r10,Z+10\n"
737  "ldd r11,Z+11\n"
738  "ldd r12,Z+12\n"
739  "ldd r13,Z+13\n"
740  "ldd r14,Z+14\n"
741  "ldd r15,Z+15\n"
742  "lsl r8\n"
743  "rol r9\n"
744  "rol r10\n"
745  "rol r11\n"
746  "rol r12\n"
747  "rol r13\n"
748  "rol r14\n"
749  "rol r15\n"
750  "adc r8, __zero_reg__\n"
751  "st X+,r8\n"
752  "st X+,r9\n"
753  "st X+,r10\n"
754  "st X+,r11\n"
755  "st X+,r12\n"
756  "st X+,r13\n"
757  "st X+,r14\n"
758  "st X+,r15\n"
759 
760  // B[3][0] = leftRotate27_64(A[0][4])
761  "adiw r26,32\n"
762  "ldd r8,Z+32\n"
763  "ldd r9,Z+33\n"
764  "ldd r10,Z+34\n"
765  "ldd r11,Z+35\n"
766  "ldd r12,Z+36\n"
767  "ldd r13,Z+37\n"
768  "ldd r14,Z+38\n"
769  "ldd r15,Z+39\n"
770  "lsl r8\n"
771  "rol r9\n"
772  "rol r10\n"
773  "rol r11\n"
774  "rol r12\n"
775  "rol r13\n"
776  "rol r14\n"
777  "rol r15\n"
778  "adc r8, __zero_reg__\n"
779  "lsl r8\n"
780  "rol r9\n"
781  "rol r10\n"
782  "rol r11\n"
783  "rol r12\n"
784  "rol r13\n"
785  "rol r14\n"
786  "rol r15\n"
787  "adc r8, __zero_reg__\n"
788  "lsl r8\n"
789  "rol r9\n"
790  "rol r10\n"
791  "rol r11\n"
792  "rol r12\n"
793  "rol r13\n"
794  "rol r14\n"
795  "rol r15\n"
796  "adc r8, __zero_reg__\n"
797  "st X+,r13\n"
798  "st X+,r14\n"
799  "st X+,r15\n"
800  "st X+,r8\n"
801  "st X+,r9\n"
802  "st X+,r10\n"
803  "st X+,r11\n"
804  "st X+,r12\n"
805 
806  // B[4][0] = leftRotate62_64(A[0][2])
807  "adiw r26,32\n"
808  "ldd r8,Z+16\n"
809  "ldd r9,Z+17\n"
810  "ldd r10,Z+18\n"
811  "ldd r11,Z+19\n"
812  "ldd r12,Z+20\n"
813  "ldd r13,Z+21\n"
814  "ldd r14,Z+22\n"
815  "ldd r15,Z+23\n"
816  "bst r8,0\n"
817  "ror r15\n"
818  "ror r14\n"
819  "ror r13\n"
820  "ror r12\n"
821  "ror r11\n"
822  "ror r10\n"
823  "ror r9\n"
824  "ror r8\n"
825  "bld r15,7\n"
826  "bst r8,0\n"
827  "ror r15\n"
828  "ror r14\n"
829  "ror r13\n"
830  "ror r12\n"
831  "ror r11\n"
832  "ror r10\n"
833  "ror r9\n"
834  "ror r8\n"
835  "bld r15,7\n"
836  "st X+,r8\n"
837  "st X+,r9\n"
838  "st X+,r10\n"
839  "st X+,r11\n"
840  "st X+,r12\n"
841  "st X+,r13\n"
842  "st X+,r14\n"
843  "st X+,r15\n"
844 
845  // B[0][1] = leftRotate44_64(A[1][1])
846  "subi r26,160\n"
847  "sbc r27,__zero_reg__\n"
848  "adiw r30,40\n"
849  "ldd r8,Z+8\n"
850  "ldd r9,Z+9\n"
851  "ldd r10,Z+10\n"
852  "ldd r11,Z+11\n"
853  "ldd r12,Z+12\n"
854  "ldd r13,Z+13\n"
855  "ldd r14,Z+14\n"
856  "ldd r15,Z+15\n"
857  "lsl r8\n"
858  "rol r9\n"
859  "rol r10\n"
860  "rol r11\n"
861  "rol r12\n"
862  "rol r13\n"
863  "rol r14\n"
864  "rol r15\n"
865  "adc r8, __zero_reg__\n"
866  "lsl r8\n"
867  "rol r9\n"
868  "rol r10\n"
869  "rol r11\n"
870  "rol r12\n"
871  "rol r13\n"
872  "rol r14\n"
873  "rol r15\n"
874  "adc r8, __zero_reg__\n"
875  "lsl r8\n"
876  "rol r9\n"
877  "rol r10\n"
878  "rol r11\n"
879  "rol r12\n"
880  "rol r13\n"
881  "rol r14\n"
882  "rol r15\n"
883  "adc r8, __zero_reg__\n"
884  "lsl r8\n"
885  "rol r9\n"
886  "rol r10\n"
887  "rol r11\n"
888  "rol r12\n"
889  "rol r13\n"
890  "rol r14\n"
891  "rol r15\n"
892  "adc r8, __zero_reg__\n"
893  "st X+,r11\n"
894  "st X+,r12\n"
895  "st X+,r13\n"
896  "st X+,r14\n"
897  "st X+,r15\n"
898  "st X+,r8\n"
899  "st X+,r9\n"
900  "st X+,r10\n"
901 
902  // B[1][1] = leftRotate20_64(A[1][4])
903  "adiw r26,32\n"
904  "ldd r8,Z+32\n"
905  "ldd r9,Z+33\n"
906  "ldd r10,Z+34\n"
907  "ldd r11,Z+35\n"
908  "ldd r12,Z+36\n"
909  "ldd r13,Z+37\n"
910  "ldd r14,Z+38\n"
911  "ldd r15,Z+39\n"
912  "lsl r8\n"
913  "rol r9\n"
914  "rol r10\n"
915  "rol r11\n"
916  "rol r12\n"
917  "rol r13\n"
918  "rol r14\n"
919  "rol r15\n"
920  "adc r8, __zero_reg__\n"
921  "lsl r8\n"
922  "rol r9\n"
923  "rol r10\n"
924  "rol r11\n"
925  "rol r12\n"
926  "rol r13\n"
927  "rol r14\n"
928  "rol r15\n"
929  "adc r8, __zero_reg__\n"
930  "lsl r8\n"
931  "rol r9\n"
932  "rol r10\n"
933  "rol r11\n"
934  "rol r12\n"
935  "rol r13\n"
936  "rol r14\n"
937  "rol r15\n"
938  "adc r8, __zero_reg__\n"
939  "lsl r8\n"
940  "rol r9\n"
941  "rol r10\n"
942  "rol r11\n"
943  "rol r12\n"
944  "rol r13\n"
945  "rol r14\n"
946  "rol r15\n"
947  "adc r8, __zero_reg__\n"
948  "st X+,r14\n"
949  "st X+,r15\n"
950  "st X+,r8\n"
951  "st X+,r9\n"
952  "st X+,r10\n"
953  "st X+,r11\n"
954  "st X+,r12\n"
955  "st X+,r13\n"
956 
957  // B[2][1] = leftRotate6_64(A[1][2])
958  "adiw r26,32\n"
959  "ldd r8,Z+16\n"
960  "ldd r9,Z+17\n"
961  "ldd r10,Z+18\n"
962  "ldd r11,Z+19\n"
963  "ldd r12,Z+20\n"
964  "ldd r13,Z+21\n"
965  "ldd r14,Z+22\n"
966  "ldd r15,Z+23\n"
967  "bst r8,0\n"
968  "ror r15\n"
969  "ror r14\n"
970  "ror r13\n"
971  "ror r12\n"
972  "ror r11\n"
973  "ror r10\n"
974  "ror r9\n"
975  "ror r8\n"
976  "bld r15,7\n"
977  "bst r8,0\n"
978  "ror r15\n"
979  "ror r14\n"
980  "ror r13\n"
981  "ror r12\n"
982  "ror r11\n"
983  "ror r10\n"
984  "ror r9\n"
985  "ror r8\n"
986  "bld r15,7\n"
987  "st X+,r15\n"
988  "st X+,r8\n"
989  "st X+,r9\n"
990  "st X+,r10\n"
991  "st X+,r11\n"
992  "st X+,r12\n"
993  "st X+,r13\n"
994  "st X+,r14\n"
995 
996  // B[3][1] = leftRotate36_64(A[1][0])
997  "adiw r26,32\n"
998  "ld r8,Z\n"
999  "ldd r9,Z+1\n"
1000  "ldd r10,Z+2\n"
1001  "ldd r11,Z+3\n"
1002  "ldd r12,Z+4\n"
1003  "ldd r13,Z+5\n"
1004  "ldd r14,Z+6\n"
1005  "ldd r15,Z+7\n"
1006  "lsl r8\n"
1007  "rol r9\n"
1008  "rol r10\n"
1009  "rol r11\n"
1010  "rol r12\n"
1011  "rol r13\n"
1012  "rol r14\n"
1013  "rol r15\n"
1014  "adc r8, __zero_reg__\n"
1015  "lsl r8\n"
1016  "rol r9\n"
1017  "rol r10\n"
1018  "rol r11\n"
1019  "rol r12\n"
1020  "rol r13\n"
1021  "rol r14\n"
1022  "rol r15\n"
1023  "adc r8, __zero_reg__\n"
1024  "lsl r8\n"
1025  "rol r9\n"
1026  "rol r10\n"
1027  "rol r11\n"
1028  "rol r12\n"
1029  "rol r13\n"
1030  "rol r14\n"
1031  "rol r15\n"
1032  "adc r8, __zero_reg__\n"
1033  "lsl r8\n"
1034  "rol r9\n"
1035  "rol r10\n"
1036  "rol r11\n"
1037  "rol r12\n"
1038  "rol r13\n"
1039  "rol r14\n"
1040  "rol r15\n"
1041  "adc r8, __zero_reg__\n"
1042  "st X+,r12\n"
1043  "st X+,r13\n"
1044  "st X+,r14\n"
1045  "st X+,r15\n"
1046  "st X+,r8\n"
1047  "st X+,r9\n"
1048  "st X+,r10\n"
1049  "st X+,r11\n"
1050 
1051  // B[4][1] = leftRotate55_64(A[1][3])
1052  "adiw r26,32\n"
1053  "ldd r8,Z+24\n"
1054  "ldd r9,Z+25\n"
1055  "ldd r10,Z+26\n"
1056  "ldd r11,Z+27\n"
1057  "ldd r12,Z+28\n"
1058  "ldd r13,Z+29\n"
1059  "ldd r14,Z+30\n"
1060  "ldd r15,Z+31\n"
1061  "bst r8,0\n"
1062  "ror r15\n"
1063  "ror r14\n"
1064  "ror r13\n"
1065  "ror r12\n"
1066  "ror r11\n"
1067  "ror r10\n"
1068  "ror r9\n"
1069  "ror r8\n"
1070  "bld r15,7\n"
1071  "st X+,r9\n"
1072  "st X+,r10\n"
1073  "st X+,r11\n"
1074  "st X+,r12\n"
1075  "st X+,r13\n"
1076  "st X+,r14\n"
1077  "st X+,r15\n"
1078  "st X+,r8\n"
1079 
1080  // B[0][2] = leftRotate43_64(A[2][2])
1081  "subi r26,160\n"
1082  "sbc r27,__zero_reg__\n"
1083  "adiw r30,40\n"
1084  "ldd r8,Z+16\n"
1085  "ldd r9,Z+17\n"
1086  "ldd r10,Z+18\n"
1087  "ldd r11,Z+19\n"
1088  "ldd r12,Z+20\n"
1089  "ldd r13,Z+21\n"
1090  "ldd r14,Z+22\n"
1091  "ldd r15,Z+23\n"
1092  "lsl r8\n"
1093  "rol r9\n"
1094  "rol r10\n"
1095  "rol r11\n"
1096  "rol r12\n"
1097  "rol r13\n"
1098  "rol r14\n"
1099  "rol r15\n"
1100  "adc r8, __zero_reg__\n"
1101  "lsl r8\n"
1102  "rol r9\n"
1103  "rol r10\n"
1104  "rol r11\n"
1105  "rol r12\n"
1106  "rol r13\n"
1107  "rol r14\n"
1108  "rol r15\n"
1109  "adc r8, __zero_reg__\n"
1110  "lsl r8\n"
1111  "rol r9\n"
1112  "rol r10\n"
1113  "rol r11\n"
1114  "rol r12\n"
1115  "rol r13\n"
1116  "rol r14\n"
1117  "rol r15\n"
1118  "adc r8, __zero_reg__\n"
1119  "st X+,r11\n"
1120  "st X+,r12\n"
1121  "st X+,r13\n"
1122  "st X+,r14\n"
1123  "st X+,r15\n"
1124  "st X+,r8\n"
1125  "st X+,r9\n"
1126  "st X+,r10\n"
1127 
1128  // B[1][2] = leftRotate3_64(A[2][0])
1129  "adiw r26,32\n"
1130  "ld r8,Z\n"
1131  "ldd r9,Z+1\n"
1132  "ldd r10,Z+2\n"
1133  "ldd r11,Z+3\n"
1134  "ldd r12,Z+4\n"
1135  "ldd r13,Z+5\n"
1136  "ldd r14,Z+6\n"
1137  "ldd r15,Z+7\n"
1138  "lsl r8\n"
1139  "rol r9\n"
1140  "rol r10\n"
1141  "rol r11\n"
1142  "rol r12\n"
1143  "rol r13\n"
1144  "rol r14\n"
1145  "rol r15\n"
1146  "adc r8, __zero_reg__\n"
1147  "lsl r8\n"
1148  "rol r9\n"
1149  "rol r10\n"
1150  "rol r11\n"
1151  "rol r12\n"
1152  "rol r13\n"
1153  "rol r14\n"
1154  "rol r15\n"
1155  "adc r8, __zero_reg__\n"
1156  "lsl r8\n"
1157  "rol r9\n"
1158  "rol r10\n"
1159  "rol r11\n"
1160  "rol r12\n"
1161  "rol r13\n"
1162  "rol r14\n"
1163  "rol r15\n"
1164  "adc r8, __zero_reg__\n"
1165  "st X+,r8\n"
1166  "st X+,r9\n"
1167  "st X+,r10\n"
1168  "st X+,r11\n"
1169  "st X+,r12\n"
1170  "st X+,r13\n"
1171  "st X+,r14\n"
1172  "st X+,r15\n"
1173 
1174  // B[2][2] = leftRotate25_64(A[2][3])
1175  "adiw r26,32\n"
1176  "ldd r8,Z+24\n"
1177  "ldd r9,Z+25\n"
1178  "ldd r10,Z+26\n"
1179  "ldd r11,Z+27\n"
1180  "ldd r12,Z+28\n"
1181  "ldd r13,Z+29\n"
1182  "ldd r14,Z+30\n"
1183  "ldd r15,Z+31\n"
1184  "lsl r8\n"
1185  "rol r9\n"
1186  "rol r10\n"
1187  "rol r11\n"
1188  "rol r12\n"
1189  "rol r13\n"
1190  "rol r14\n"
1191  "rol r15\n"
1192  "adc r8, __zero_reg__\n"
1193  "st X+,r13\n"
1194  "st X+,r14\n"
1195  "st X+,r15\n"
1196  "st X+,r8\n"
1197  "st X+,r9\n"
1198  "st X+,r10\n"
1199  "st X+,r11\n"
1200  "st X+,r12\n"
1201 
1202  // B[3][2] = leftRotate10_64(A[2][1])
1203  "adiw r26,32\n"
1204  "ldd r8,Z+8\n"
1205  "ldd r9,Z+9\n"
1206  "ldd r10,Z+10\n"
1207  "ldd r11,Z+11\n"
1208  "ldd r12,Z+12\n"
1209  "ldd r13,Z+13\n"
1210  "ldd r14,Z+14\n"
1211  "ldd r15,Z+15\n"
1212  "lsl r8\n"
1213  "rol r9\n"
1214  "rol r10\n"
1215  "rol r11\n"
1216  "rol r12\n"
1217  "rol r13\n"
1218  "rol r14\n"
1219  "rol r15\n"
1220  "adc r8, __zero_reg__\n"
1221  "lsl r8\n"
1222  "rol r9\n"
1223  "rol r10\n"
1224  "rol r11\n"
1225  "rol r12\n"
1226  "rol r13\n"
1227  "rol r14\n"
1228  "rol r15\n"
1229  "adc r8, __zero_reg__\n"
1230  "st X+,r15\n"
1231  "st X+,r8\n"
1232  "st X+,r9\n"
1233  "st X+,r10\n"
1234  "st X+,r11\n"
1235  "st X+,r12\n"
1236  "st X+,r13\n"
1237  "st X+,r14\n"
1238 
1239  // B[4][2] = leftRotate39_64(A[2][4])
1240  "adiw r26,32\n"
1241  "ldd r8,Z+32\n"
1242  "ldd r9,Z+33\n"
1243  "ldd r10,Z+34\n"
1244  "ldd r11,Z+35\n"
1245  "ldd r12,Z+36\n"
1246  "ldd r13,Z+37\n"
1247  "ldd r14,Z+38\n"
1248  "ldd r15,Z+39\n"
1249  "bst r8,0\n"
1250  "ror r15\n"
1251  "ror r14\n"
1252  "ror r13\n"
1253  "ror r12\n"
1254  "ror r11\n"
1255  "ror r10\n"
1256  "ror r9\n"
1257  "ror r8\n"
1258  "bld r15,7\n"
1259  "st X+,r11\n"
1260  "st X+,r12\n"
1261  "st X+,r13\n"
1262  "st X+,r14\n"
1263  "st X+,r15\n"
1264  "st X+,r8\n"
1265  "st X+,r9\n"
1266  "st X+,r10\n"
1267 
1268  // B[0][3] = leftRotate21_64(A[3][3])
1269  "subi r26,160\n"
1270  "sbc r27,__zero_reg__\n"
1271  "adiw r30,40\n"
1272  "ldd r8,Z+24\n"
1273  "ldd r9,Z+25\n"
1274  "ldd r10,Z+26\n"
1275  "ldd r11,Z+27\n"
1276  "ldd r12,Z+28\n"
1277  "ldd r13,Z+29\n"
1278  "ldd r14,Z+30\n"
1279  "ldd r15,Z+31\n"
1280  "bst r8,0\n"
1281  "ror r15\n"
1282  "ror r14\n"
1283  "ror r13\n"
1284  "ror r12\n"
1285  "ror r11\n"
1286  "ror r10\n"
1287  "ror r9\n"
1288  "ror r8\n"
1289  "bld r15,7\n"
1290  "bst r8,0\n"
1291  "ror r15\n"
1292  "ror r14\n"
1293  "ror r13\n"
1294  "ror r12\n"
1295  "ror r11\n"
1296  "ror r10\n"
1297  "ror r9\n"
1298  "ror r8\n"
1299  "bld r15,7\n"
1300  "bst r8,0\n"
1301  "ror r15\n"
1302  "ror r14\n"
1303  "ror r13\n"
1304  "ror r12\n"
1305  "ror r11\n"
1306  "ror r10\n"
1307  "ror r9\n"
1308  "ror r8\n"
1309  "bld r15,7\n"
1310  "st X+,r13\n"
1311  "st X+,r14\n"
1312  "st X+,r15\n"
1313  "st X+,r8\n"
1314  "st X+,r9\n"
1315  "st X+,r10\n"
1316  "st X+,r11\n"
1317  "st X+,r12\n"
1318 
1319  // B[1][3] = leftRotate45_64(A[3][1])
1320  "adiw r26,32\n"
1321  "ldd r8,Z+8\n"
1322  "ldd r9,Z+9\n"
1323  "ldd r10,Z+10\n"
1324  "ldd r11,Z+11\n"
1325  "ldd r12,Z+12\n"
1326  "ldd r13,Z+13\n"
1327  "ldd r14,Z+14\n"
1328  "ldd r15,Z+15\n"
1329  "bst r8,0\n"
1330  "ror r15\n"
1331  "ror r14\n"
1332  "ror r13\n"
1333  "ror r12\n"
1334  "ror r11\n"
1335  "ror r10\n"
1336  "ror r9\n"
1337  "ror r8\n"
1338  "bld r15,7\n"
1339  "bst r8,0\n"
1340  "ror r15\n"
1341  "ror r14\n"
1342  "ror r13\n"
1343  "ror r12\n"
1344  "ror r11\n"
1345  "ror r10\n"
1346  "ror r9\n"
1347  "ror r8\n"
1348  "bld r15,7\n"
1349  "bst r8,0\n"
1350  "ror r15\n"
1351  "ror r14\n"
1352  "ror r13\n"
1353  "ror r12\n"
1354  "ror r11\n"
1355  "ror r10\n"
1356  "ror r9\n"
1357  "ror r8\n"
1358  "bld r15,7\n"
1359  "st X+,r10\n"
1360  "st X+,r11\n"
1361  "st X+,r12\n"
1362  "st X+,r13\n"
1363  "st X+,r14\n"
1364  "st X+,r15\n"
1365  "st X+,r8\n"
1366  "st X+,r9\n"
1367 
1368  // B[2][3] = leftRotate8_64(A[3][4])
1369  "adiw r26,32\n"
1370  "ldd r8,Z+32\n"
1371  "ldd r9,Z+33\n"
1372  "ldd r10,Z+34\n"
1373  "ldd r11,Z+35\n"
1374  "ldd r12,Z+36\n"
1375  "ldd r13,Z+37\n"
1376  "ldd r14,Z+38\n"
1377  "ldd r15,Z+39\n"
1378  "st X+,r15\n"
1379  "st X+,r8\n"
1380  "st X+,r9\n"
1381  "st X+,r10\n"
1382  "st X+,r11\n"
1383  "st X+,r12\n"
1384  "st X+,r13\n"
1385  "st X+,r14\n"
1386 
1387  // B[3][3] = leftRotate15_64(A[3][2])
1388  "adiw r26,32\n"
1389  "ldd r8,Z+16\n"
1390  "ldd r9,Z+17\n"
1391  "ldd r10,Z+18\n"
1392  "ldd r11,Z+19\n"
1393  "ldd r12,Z+20\n"
1394  "ldd r13,Z+21\n"
1395  "ldd r14,Z+22\n"
1396  "ldd r15,Z+23\n"
1397  "bst r8,0\n"
1398  "ror r15\n"
1399  "ror r14\n"
1400  "ror r13\n"
1401  "ror r12\n"
1402  "ror r11\n"
1403  "ror r10\n"
1404  "ror r9\n"
1405  "ror r8\n"
1406  "bld r15,7\n"
1407  "st X+,r14\n"
1408  "st X+,r15\n"
1409  "st X+,r8\n"
1410  "st X+,r9\n"
1411  "st X+,r10\n"
1412  "st X+,r11\n"
1413  "st X+,r12\n"
1414  "st X+,r13\n"
1415 
1416  // B[4][3] = leftRotate41_64(A[3][0])
1417  "adiw r26,32\n"
1418  "ld r8,Z\n"
1419  "ldd r9,Z+1\n"
1420  "ldd r10,Z+2\n"
1421  "ldd r11,Z+3\n"
1422  "ldd r12,Z+4\n"
1423  "ldd r13,Z+5\n"
1424  "ldd r14,Z+6\n"
1425  "ldd r15,Z+7\n"
1426  "lsl r8\n"
1427  "rol r9\n"
1428  "rol r10\n"
1429  "rol r11\n"
1430  "rol r12\n"
1431  "rol r13\n"
1432  "rol r14\n"
1433  "rol r15\n"
1434  "adc r8, __zero_reg__\n"
1435  "st X+,r11\n"
1436  "st X+,r12\n"
1437  "st X+,r13\n"
1438  "st X+,r14\n"
1439  "st X+,r15\n"
1440  "st X+,r8\n"
1441  "st X+,r9\n"
1442  "st X+,r10\n"
1443 
1444  // B[0][4] = leftRotate14_64(A[4][4])
1445  "subi r26,160\n"
1446  "sbc r27,__zero_reg__\n"
1447  "adiw r30,40\n"
1448  "ldd r8,Z+32\n"
1449  "ldd r9,Z+33\n"
1450  "ldd r10,Z+34\n"
1451  "ldd r11,Z+35\n"
1452  "ldd r12,Z+36\n"
1453  "ldd r13,Z+37\n"
1454  "ldd r14,Z+38\n"
1455  "ldd r15,Z+39\n"
1456  "bst r8,0\n"
1457  "ror r15\n"
1458  "ror r14\n"
1459  "ror r13\n"
1460  "ror r12\n"
1461  "ror r11\n"
1462  "ror r10\n"
1463  "ror r9\n"
1464  "ror r8\n"
1465  "bld r15,7\n"
1466  "bst r8,0\n"
1467  "ror r15\n"
1468  "ror r14\n"
1469  "ror r13\n"
1470  "ror r12\n"
1471  "ror r11\n"
1472  "ror r10\n"
1473  "ror r9\n"
1474  "ror r8\n"
1475  "bld r15,7\n"
1476  "st X+,r14\n"
1477  "st X+,r15\n"
1478  "st X+,r8\n"
1479  "st X+,r9\n"
1480  "st X+,r10\n"
1481  "st X+,r11\n"
1482  "st X+,r12\n"
1483  "st X+,r13\n"
1484 
1485  // B[1][4] = leftRotate61_64(A[4][2])
1486  "adiw r26,32\n"
1487  "ldd r8,Z+16\n"
1488  "ldd r9,Z+17\n"
1489  "ldd r10,Z+18\n"
1490  "ldd r11,Z+19\n"
1491  "ldd r12,Z+20\n"
1492  "ldd r13,Z+21\n"
1493  "ldd r14,Z+22\n"
1494  "ldd r15,Z+23\n"
1495  "bst r8,0\n"
1496  "ror r15\n"
1497  "ror r14\n"
1498  "ror r13\n"
1499  "ror r12\n"
1500  "ror r11\n"
1501  "ror r10\n"
1502  "ror r9\n"
1503  "ror r8\n"
1504  "bld r15,7\n"
1505  "bst r8,0\n"
1506  "ror r15\n"
1507  "ror r14\n"
1508  "ror r13\n"
1509  "ror r12\n"
1510  "ror r11\n"
1511  "ror r10\n"
1512  "ror r9\n"
1513  "ror r8\n"
1514  "bld r15,7\n"
1515  "bst r8,0\n"
1516  "ror r15\n"
1517  "ror r14\n"
1518  "ror r13\n"
1519  "ror r12\n"
1520  "ror r11\n"
1521  "ror r10\n"
1522  "ror r9\n"
1523  "ror r8\n"
1524  "bld r15,7\n"
1525  "st X+,r8\n"
1526  "st X+,r9\n"
1527  "st X+,r10\n"
1528  "st X+,r11\n"
1529  "st X+,r12\n"
1530  "st X+,r13\n"
1531  "st X+,r14\n"
1532  "st X+,r15\n"
1533 
1534  // B[2][4] = leftRotate18_64(A[4][0])
1535  "adiw r26,32\n"
1536  "ld r8,Z\n"
1537  "ldd r9,Z+1\n"
1538  "ldd r10,Z+2\n"
1539  "ldd r11,Z+3\n"
1540  "ldd r12,Z+4\n"
1541  "ldd r13,Z+5\n"
1542  "ldd r14,Z+6\n"
1543  "ldd r15,Z+7\n"
1544  "lsl r8\n"
1545  "rol r9\n"
1546  "rol r10\n"
1547  "rol r11\n"
1548  "rol r12\n"
1549  "rol r13\n"
1550  "rol r14\n"
1551  "rol r15\n"
1552  "adc r8, __zero_reg__\n"
1553  "lsl r8\n"
1554  "rol r9\n"
1555  "rol r10\n"
1556  "rol r11\n"
1557  "rol r12\n"
1558  "rol r13\n"
1559  "rol r14\n"
1560  "rol r15\n"
1561  "adc r8, __zero_reg__\n"
1562  "st X+,r14\n"
1563  "st X+,r15\n"
1564  "st X+,r8\n"
1565  "st X+,r9\n"
1566  "st X+,r10\n"
1567  "st X+,r11\n"
1568  "st X+,r12\n"
1569  "st X+,r13\n"
1570 
1571  // B[3][4] = leftRotate56_64(A[4][3])
1572  "adiw r26,32\n"
1573  "ldd r8,Z+24\n"
1574  "ldd r9,Z+25\n"
1575  "ldd r10,Z+26\n"
1576  "ldd r11,Z+27\n"
1577  "ldd r12,Z+28\n"
1578  "ldd r13,Z+29\n"
1579  "ldd r14,Z+30\n"
1580  "ldd r15,Z+31\n"
1581  "st X+,r9\n"
1582  "st X+,r10\n"
1583  "st X+,r11\n"
1584  "st X+,r12\n"
1585  "st X+,r13\n"
1586  "st X+,r14\n"
1587  "st X+,r15\n"
1588  "st X+,r8\n"
1589 
1590  // B[4][4] = leftRotate2_64(A[4][1])
1591  "adiw r26,32\n"
1592  "ldd r8,Z+8\n"
1593  "ldd r9,Z+9\n"
1594  "ldd r10,Z+10\n"
1595  "ldd r11,Z+11\n"
1596  "ldd r12,Z+12\n"
1597  "ldd r13,Z+13\n"
1598  "ldd r14,Z+14\n"
1599  "ldd r15,Z+15\n"
1600  "lsl r8\n"
1601  "rol r9\n"
1602  "rol r10\n"
1603  "rol r11\n"
1604  "rol r12\n"
1605  "rol r13\n"
1606  "rol r14\n"
1607  "rol r15\n"
1608  "adc r8, __zero_reg__\n"
1609  "lsl r8\n"
1610  "rol r9\n"
1611  "rol r10\n"
1612  "rol r11\n"
1613  "rol r12\n"
1614  "rol r13\n"
1615  "rol r14\n"
1616  "rol r15\n"
1617  "adc r8, __zero_reg__\n"
1618  "st X+,r8\n"
1619  "st X+,r9\n"
1620  "st X+,r10\n"
1621  "st X+,r11\n"
1622  "st X+,r12\n"
1623  "st X+,r13\n"
1624  "st X+,r14\n"
1625  "st X+,r15\n"
1626  "subi r26,200\n"
1627  "sbc r27,__zero_reg__\n"
1628  "subi r30,160\n"
1629  "sbc r31,__zero_reg__\n"
1630 
1631  // Step mapping chi.
1632  "ldi r20,5\n"
1633  "50:\n"
1634  "ld r8,Y\n"
1635  "ldd r9,Y+8\n"
1636  "ldd r10,Y+16\n"
1637  "ldd r11,Y+24\n"
1638  "ldd r12,Y+32\n"
1639  "mov r13,r9\n"
1640  "com r13\n"
1641  "and r13,r10\n"
1642  "eor r13,r8\n"
1643  "mov r14,r10\n"
1644  "com r14\n"
1645  "and r14,r11\n"
1646  "eor r14,r9\n"
1647  "mov r15,r11\n"
1648  "com r15\n"
1649  "and r15,r12\n"
1650  "eor r15,r10\n"
1651  "mov r17,r12\n"
1652  "com r17\n"
1653  "and r17,r8\n"
1654  "eor r17,r11\n"
1655  "mov r16,r8\n"
1656  "com r16\n"
1657  "and r16,r9\n"
1658  "eor r16,r12\n"
1659  "st Z,r13\n"
1660  "std Z+8,r14\n"
1661  "std Z+16,r15\n"
1662  "std Z+24,r17\n"
1663  "std Z+32,r16\n"
1664  "ldd r8,Y+1\n"
1665  "ldd r9,Y+9\n"
1666  "ldd r10,Y+17\n"
1667  "ldd r11,Y+25\n"
1668  "ldd r12,Y+33\n"
1669  "mov r13,r9\n"
1670  "com r13\n"
1671  "and r13,r10\n"
1672  "eor r13,r8\n"
1673  "mov r14,r10\n"
1674  "com r14\n"
1675  "and r14,r11\n"
1676  "eor r14,r9\n"
1677  "mov r15,r11\n"
1678  "com r15\n"
1679  "and r15,r12\n"
1680  "eor r15,r10\n"
1681  "mov r17,r12\n"
1682  "com r17\n"
1683  "and r17,r8\n"
1684  "eor r17,r11\n"
1685  "mov r16,r8\n"
1686  "com r16\n"
1687  "and r16,r9\n"
1688  "eor r16,r12\n"
1689  "std Z+1,r13\n"
1690  "std Z+9,r14\n"
1691  "std Z+17,r15\n"
1692  "std Z+25,r17\n"
1693  "std Z+33,r16\n"
1694  "ldd r8,Y+2\n"
1695  "ldd r9,Y+10\n"
1696  "ldd r10,Y+18\n"
1697  "ldd r11,Y+26\n"
1698  "ldd r12,Y+34\n"
1699  "mov r13,r9\n"
1700  "com r13\n"
1701  "and r13,r10\n"
1702  "eor r13,r8\n"
1703  "mov r14,r10\n"
1704  "com r14\n"
1705  "and r14,r11\n"
1706  "eor r14,r9\n"
1707  "mov r15,r11\n"
1708  "com r15\n"
1709  "and r15,r12\n"
1710  "eor r15,r10\n"
1711  "mov r17,r12\n"
1712  "com r17\n"
1713  "and r17,r8\n"
1714  "eor r17,r11\n"
1715  "mov r16,r8\n"
1716  "com r16\n"
1717  "and r16,r9\n"
1718  "eor r16,r12\n"
1719  "std Z+2,r13\n"
1720  "std Z+10,r14\n"
1721  "std Z+18,r15\n"
1722  "std Z+26,r17\n"
1723  "std Z+34,r16\n"
1724  "ldd r8,Y+3\n"
1725  "ldd r9,Y+11\n"
1726  "ldd r10,Y+19\n"
1727  "ldd r11,Y+27\n"
1728  "ldd r12,Y+35\n"
1729  "mov r13,r9\n"
1730  "com r13\n"
1731  "and r13,r10\n"
1732  "eor r13,r8\n"
1733  "mov r14,r10\n"
1734  "com r14\n"
1735  "and r14,r11\n"
1736  "eor r14,r9\n"
1737  "mov r15,r11\n"
1738  "com r15\n"
1739  "and r15,r12\n"
1740  "eor r15,r10\n"
1741  "mov r17,r12\n"
1742  "com r17\n"
1743  "and r17,r8\n"
1744  "eor r17,r11\n"
1745  "mov r16,r8\n"
1746  "com r16\n"
1747  "and r16,r9\n"
1748  "eor r16,r12\n"
1749  "std Z+3,r13\n"
1750  "std Z+11,r14\n"
1751  "std Z+19,r15\n"
1752  "std Z+27,r17\n"
1753  "std Z+35,r16\n"
1754  "ldd r8,Y+4\n"
1755  "ldd r9,Y+12\n"
1756  "ldd r10,Y+20\n"
1757  "ldd r11,Y+28\n"
1758  "ldd r12,Y+36\n"
1759  "mov r13,r9\n"
1760  "com r13\n"
1761  "and r13,r10\n"
1762  "eor r13,r8\n"
1763  "mov r14,r10\n"
1764  "com r14\n"
1765  "and r14,r11\n"
1766  "eor r14,r9\n"
1767  "mov r15,r11\n"
1768  "com r15\n"
1769  "and r15,r12\n"
1770  "eor r15,r10\n"
1771  "mov r17,r12\n"
1772  "com r17\n"
1773  "and r17,r8\n"
1774  "eor r17,r11\n"
1775  "mov r16,r8\n"
1776  "com r16\n"
1777  "and r16,r9\n"
1778  "eor r16,r12\n"
1779  "std Z+4,r13\n"
1780  "std Z+12,r14\n"
1781  "std Z+20,r15\n"
1782  "std Z+28,r17\n"
1783  "std Z+36,r16\n"
1784  "ldd r8,Y+5\n"
1785  "ldd r9,Y+13\n"
1786  "ldd r10,Y+21\n"
1787  "ldd r11,Y+29\n"
1788  "ldd r12,Y+37\n"
1789  "mov r13,r9\n"
1790  "com r13\n"
1791  "and r13,r10\n"
1792  "eor r13,r8\n"
1793  "mov r14,r10\n"
1794  "com r14\n"
1795  "and r14,r11\n"
1796  "eor r14,r9\n"
1797  "mov r15,r11\n"
1798  "com r15\n"
1799  "and r15,r12\n"
1800  "eor r15,r10\n"
1801  "mov r17,r12\n"
1802  "com r17\n"
1803  "and r17,r8\n"
1804  "eor r17,r11\n"
1805  "mov r16,r8\n"
1806  "com r16\n"
1807  "and r16,r9\n"
1808  "eor r16,r12\n"
1809  "std Z+5,r13\n"
1810  "std Z+13,r14\n"
1811  "std Z+21,r15\n"
1812  "std Z+29,r17\n"
1813  "std Z+37,r16\n"
1814  "ldd r8,Y+6\n"
1815  "ldd r9,Y+14\n"
1816  "ldd r10,Y+22\n"
1817  "ldd r11,Y+30\n"
1818  "ldd r12,Y+38\n"
1819  "mov r13,r9\n"
1820  "com r13\n"
1821  "and r13,r10\n"
1822  "eor r13,r8\n"
1823  "mov r14,r10\n"
1824  "com r14\n"
1825  "and r14,r11\n"
1826  "eor r14,r9\n"
1827  "mov r15,r11\n"
1828  "com r15\n"
1829  "and r15,r12\n"
1830  "eor r15,r10\n"
1831  "mov r17,r12\n"
1832  "com r17\n"
1833  "and r17,r8\n"
1834  "eor r17,r11\n"
1835  "mov r16,r8\n"
1836  "com r16\n"
1837  "and r16,r9\n"
1838  "eor r16,r12\n"
1839  "std Z+6,r13\n"
1840  "std Z+14,r14\n"
1841  "std Z+22,r15\n"
1842  "std Z+30,r17\n"
1843  "std Z+38,r16\n"
1844  "ldd r8,Y+7\n"
1845  "ldd r9,Y+15\n"
1846  "ldd r10,Y+23\n"
1847  "ldd r11,Y+31\n"
1848  "ldd r12,Y+39\n"
1849  "mov r13,r9\n"
1850  "com r13\n"
1851  "and r13,r10\n"
1852  "eor r13,r8\n"
1853  "mov r14,r10\n"
1854  "com r14\n"
1855  "and r14,r11\n"
1856  "eor r14,r9\n"
1857  "mov r15,r11\n"
1858  "com r15\n"
1859  "and r15,r12\n"
1860  "eor r15,r10\n"
1861  "mov r17,r12\n"
1862  "com r17\n"
1863  "and r17,r8\n"
1864  "eor r17,r11\n"
1865  "mov r16,r8\n"
1866  "com r16\n"
1867  "and r16,r9\n"
1868  "eor r16,r12\n"
1869  "std Z+7,r13\n"
1870  "std Z+15,r14\n"
1871  "std Z+23,r15\n"
1872  "std Z+31,r17\n"
1873  "std Z+39,r16\n"
1874  "adiw r30,40\n"
1875  "adiw r28,40\n"
1876  "dec r20\n"
1877  "breq 51f\n"
1878  "rjmp 50b\n"
1879  "51:\n"
1880  "pop r28\n"
1881  "pop r29\n"
1882 
1883  // Done
1884  : : "x"(B), "z"(state.A)
1885  : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
1886  "r16", "r17", "r18", "r19", "r20", "r21", "memory"
1887  );
1888 #else
1889  static const uint8_t addMod5Table[9] PROGMEM = {
1890  0, 1, 2, 3, 4, 0, 1, 2, 3
1891  };
1892  #define addMod5(x, y) (pgm_read_byte(&(addMod5Table[(x) + (y)])))
1893  uint64_t D;
1894  uint8_t index, index2;
1895  for (uint8_t round = 0; round < 24; ++round) {
1896  // Step mapping theta. The specification mentions two temporary
1897  // arrays of size 5 called C and D. To save a bit of memory,
1898  // we use the first row of B to store C and compute D on the fly.
1899  for (index = 0; index < 5; ++index) {
1900  B[0][index] = state.A[0][index] ^ state.A[1][index] ^
1901  state.A[2][index] ^ state.A[3][index] ^
1902  state.A[4][index];
1903  }
1904  for (index = 0; index < 5; ++index) {
1905  D = B[0][addMod5(index, 4)] ^
1906  leftRotate1_64(B[0][addMod5(index, 1)]);
1907  for (index2 = 0; index2 < 5; ++index2)
1908  state.A[index2][index] ^= D;
1909  }
1910 
1911  // Step mapping rho and pi combined into a single step.
1912  // Rotate all lanes by a specific offset and rearrange.
1913  B[0][0] = state.A[0][0];
1914  B[1][0] = leftRotate28_64(state.A[0][3]);
1915  B[2][0] = leftRotate1_64 (state.A[0][1]);
1916  B[3][0] = leftRotate27_64(state.A[0][4]);
1917  B[4][0] = leftRotate62_64(state.A[0][2]);
1918  B[0][1] = leftRotate44_64(state.A[1][1]);
1919  B[1][1] = leftRotate20_64(state.A[1][4]);
1920  B[2][1] = leftRotate6_64 (state.A[1][2]);
1921  B[3][1] = leftRotate36_64(state.A[1][0]);
1922  B[4][1] = leftRotate55_64(state.A[1][3]);
1923  B[0][2] = leftRotate43_64(state.A[2][2]);
1924  B[1][2] = leftRotate3_64 (state.A[2][0]);
1925  B[2][2] = leftRotate25_64(state.A[2][3]);
1926  B[3][2] = leftRotate10_64(state.A[2][1]);
1927  B[4][2] = leftRotate39_64(state.A[2][4]);
1928  B[0][3] = leftRotate21_64(state.A[3][3]);
1929  B[1][3] = leftRotate45_64(state.A[3][1]);
1930  B[2][3] = leftRotate8_64 (state.A[3][4]);
1931  B[3][3] = leftRotate15_64(state.A[3][2]);
1932  B[4][3] = leftRotate41_64(state.A[3][0]);
1933  B[0][4] = leftRotate14_64(state.A[4][4]);
1934  B[1][4] = leftRotate61_64(state.A[4][2]);
1935  B[2][4] = leftRotate18_64(state.A[4][0]);
1936  B[3][4] = leftRotate56_64(state.A[4][3]);
1937  B[4][4] = leftRotate2_64 (state.A[4][1]);
1938 
1939  // Step mapping chi. Combine each lane with two other lanes in its row.
1940  for (index = 0; index < 5; ++index) {
1941  for (index2 = 0; index2 < 5; ++index2) {
1942  state.A[index2][index] =
1943  B[index2][index] ^
1944  ((~B[index2][addMod5(index, 1)]) &
1945  B[index2][addMod5(index, 2)]);
1946  }
1947  }
1948 #endif
1949 
1950  // Step mapping iota. XOR A[0][0] with the round constant.
1951  static uint64_t const RC[24] PROGMEM = {
1952  0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808AULL,
1953  0x8000000080008000ULL, 0x000000000000808BULL, 0x0000000080000001ULL,
1954  0x8000000080008081ULL, 0x8000000000008009ULL, 0x000000000000008AULL,
1955  0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000AULL,
1956  0x000000008000808BULL, 0x800000000000008BULL, 0x8000000000008089ULL,
1957  0x8000000000008003ULL, 0x8000000000008002ULL, 0x8000000000000080ULL,
1958  0x000000000000800AULL, 0x800000008000000AULL, 0x8000000080008081ULL,
1959  0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL
1960  };
1961  state.A[0][0] ^= pgm_read_qword(RC + round);
1962  }
1963 }
size_t blockSize() const
Returns the input block size for the sponge function in bytes.
Definition: KeccakCore.h:38
void setHMACKey(const void *key, size_t len, uint8_t pad, size_t hashSize)
Sets a HMAC key for a Keccak-based hash algorithm.
Definition: KeccakCore.cpp:243
void setCapacity(size_t capacity)
Sets the capacity of the Keccak sponge function in bits.
Definition: KeccakCore.cpp:94
~KeccakCore()
Destroys this Keccak sponge function after clearing all sensitive information.
Definition: KeccakCore.cpp:66
void extract(void *data, size_t size)
Extracts data from the Keccak sponge function.
Definition: KeccakCore.cpp:194
void pad(uint8_t tag)
Pads the last block of input data to blockSize().
Definition: KeccakCore.cpp:167
size_t capacity() const
Returns the capacity of the sponge function in bits.
Definition: KeccakCore.cpp:76
KeccakCore()
Constructs a new Keccak sponge function.
Definition: KeccakCore.cpp:54
void update(const void *data, size_t size)
Updates the Keccak sponge function with more input data.
Definition: KeccakCore.cpp:133
void clear()
Clears all sensitive data from this object.
Definition: KeccakCore.cpp:225
void reset()
Resets the Keccak sponge function ready for a new session.
Definition: KeccakCore.cpp:114