mirror of
https://github.com/taigrr/arduinolibs
synced 2025-01-18 04:33:12 -08:00
893 lines
32 KiB
C
893 lines
32 KiB
C
/*
|
|
* Copyright (C) 2016 Southern Storm Software, Pty Ltd.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included
|
|
* in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
// Special-purpose compiler that generates the AVR version of Speck*.
|
|
|
|
#include <stdio.h>
|
|
#include <stdarg.h>
|
|
|
|
static int indent = 4;
|
|
|
|
static int t1_reg = 8; // Temporary 64-bit value (any reg).
|
|
static int t2_reg = 16; // Temporary 64-bit value (any reg).
|
|
|
|
static int x_reg = 26;
|
|
//static int y_reg = 28;
|
|
static int z_reg = 30;
|
|
|
|
static int const_reg = 24; // For temporary constants (must be a high reg).
|
|
|
|
static int temp_reg = 25; // Spare temporary register.
|
|
|
|
// Information about a set of registers storing a 64-bit quantity.
|
|
typedef struct
|
|
{
|
|
int first; // First register in the set.
|
|
int offset; // Offset for multiple of 8 rotations.
|
|
|
|
} Reg64;
|
|
|
|
// Indent the code and print a string.
|
|
void indent_printf(const char *format, ...)
|
|
{
|
|
va_list va;
|
|
int posn;
|
|
va_start(va, format);
|
|
for (posn = 0; posn < indent; ++posn)
|
|
putc(' ', stdout);
|
|
vfprintf(stdout, format, va);
|
|
va_end(va);
|
|
}
|
|
|
|
// Print an assembler instruction within quotes.
|
|
void insn_printf(const char *format, ...)
|
|
{
|
|
va_list va;
|
|
int posn;
|
|
va_start(va, format);
|
|
for (posn = 0; posn < indent; ++posn)
|
|
putc(' ', stdout);
|
|
putc('"', stdout);
|
|
vfprintf(stdout, format, va);
|
|
putc('\\', stdout);
|
|
putc('n', stdout);
|
|
putc('"', stdout);
|
|
putc('\n', stdout);
|
|
va_end(va);
|
|
}
|
|
|
|
#define REGn(reg, n) ((reg)->first + ((n) + (reg)->offset) % 8)
|
|
|
|
void leftRotate1(const Reg64 *reg)
|
|
{
|
|
insn_printf("lsl r%d", REGn(reg, 0));
|
|
insn_printf("rol r%d", REGn(reg, 1));
|
|
insn_printf("rol r%d", REGn(reg, 2));
|
|
insn_printf("rol r%d", REGn(reg, 3));
|
|
insn_printf("rol r%d", REGn(reg, 4));
|
|
insn_printf("rol r%d", REGn(reg, 5));
|
|
insn_printf("rol r%d", REGn(reg, 6));
|
|
insn_printf("rol r%d", REGn(reg, 7));
|
|
insn_printf("adc r%d, __zero_reg__", REGn(reg, 0));
|
|
}
|
|
|
|
void leftRotate3(const Reg64 *reg)
|
|
{
|
|
leftRotate1(reg);
|
|
leftRotate1(reg);
|
|
leftRotate1(reg);
|
|
}
|
|
|
|
void leftRotate8(Reg64 *reg)
|
|
{
|
|
reg->offset = (reg->offset + 7) % 8;
|
|
}
|
|
|
|
void rightRotate1(const Reg64 *reg)
|
|
{
|
|
insn_printf("bst r%d,0", REGn(reg, 0));
|
|
insn_printf("ror r%d", REGn(reg, 7));
|
|
insn_printf("ror r%d", REGn(reg, 6));
|
|
insn_printf("ror r%d", REGn(reg, 5));
|
|
insn_printf("ror r%d", REGn(reg, 4));
|
|
insn_printf("ror r%d", REGn(reg, 3));
|
|
insn_printf("ror r%d", REGn(reg, 2));
|
|
insn_printf("ror r%d", REGn(reg, 1));
|
|
insn_printf("ror r%d", REGn(reg, 0));
|
|
insn_printf("bld r%d,7", REGn(reg, 7));
|
|
}
|
|
|
|
void rightRotate3(const Reg64 *reg)
|
|
{
|
|
rightRotate1(reg);
|
|
rightRotate1(reg);
|
|
rightRotate1(reg);
|
|
}
|
|
|
|
void rightRotate8(Reg64 *reg)
|
|
{
|
|
reg->offset = (reg->offset + 1) % 8;
|
|
}
|
|
|
|
void add64(const Reg64 *dst, const Reg64 *src)
|
|
{
|
|
insn_printf("add r%d,r%d", REGn(dst, 0), REGn(src, 0));
|
|
insn_printf("adc r%d,r%d", REGn(dst, 1), REGn(src, 1));
|
|
insn_printf("adc r%d,r%d", REGn(dst, 2), REGn(src, 2));
|
|
insn_printf("adc r%d,r%d", REGn(dst, 3), REGn(src, 3));
|
|
insn_printf("adc r%d,r%d", REGn(dst, 4), REGn(src, 4));
|
|
insn_printf("adc r%d,r%d", REGn(dst, 5), REGn(src, 5));
|
|
insn_printf("adc r%d,r%d", REGn(dst, 6), REGn(src, 6));
|
|
insn_printf("adc r%d,r%d", REGn(dst, 7), REGn(src, 7));
|
|
}
|
|
|
|
void sub64(const Reg64 *dst, const Reg64 *src)
|
|
{
|
|
insn_printf("sub r%d,r%d", REGn(dst, 0), REGn(src, 0));
|
|
insn_printf("sbc r%d,r%d", REGn(dst, 1), REGn(src, 1));
|
|
insn_printf("sbc r%d,r%d", REGn(dst, 2), REGn(src, 2));
|
|
insn_printf("sbc r%d,r%d", REGn(dst, 3), REGn(src, 3));
|
|
insn_printf("sbc r%d,r%d", REGn(dst, 4), REGn(src, 4));
|
|
insn_printf("sbc r%d,r%d", REGn(dst, 5), REGn(src, 5));
|
|
insn_printf("sbc r%d,r%d", REGn(dst, 6), REGn(src, 6));
|
|
insn_printf("sbc r%d,r%d", REGn(dst, 7), REGn(src, 7));
|
|
}
|
|
|
|
void eor64(const Reg64 *dst, const Reg64 *src)
|
|
{
|
|
insn_printf("eor r%d,r%d", REGn(dst, 0), REGn(src, 0));
|
|
insn_printf("eor r%d,r%d", REGn(dst, 1), REGn(src, 1));
|
|
insn_printf("eor r%d,r%d", REGn(dst, 2), REGn(src, 2));
|
|
insn_printf("eor r%d,r%d", REGn(dst, 3), REGn(src, 3));
|
|
insn_printf("eor r%d,r%d", REGn(dst, 4), REGn(src, 4));
|
|
insn_printf("eor r%d,r%d", REGn(dst, 5), REGn(src, 5));
|
|
insn_printf("eor r%d,r%d", REGn(dst, 6), REGn(src, 6));
|
|
insn_printf("eor r%d,r%d", REGn(dst, 7), REGn(src, 7));
|
|
}
|
|
|
|
void eor64Schedule(Reg64 *reg)
|
|
{
|
|
// XOR with the schedule.
|
|
insn_printf("ld __tmp_reg__,Z+");
|
|
insn_printf("eor __tmp_reg__,r%d", REGn(reg, 0));
|
|
insn_printf("ld r%d,Z+", REGn(reg, 0));
|
|
insn_printf("eor r%d,r%d", REGn(reg, 0), REGn(reg, 1));
|
|
insn_printf("ld r%d,Z+", REGn(reg, 1));
|
|
insn_printf("eor r%d,r%d", REGn(reg, 1), REGn(reg, 2));
|
|
insn_printf("ld r%d,Z+", REGn(reg, 2));
|
|
insn_printf("eor r%d,r%d", REGn(reg, 2), REGn(reg, 3));
|
|
insn_printf("ld r%d,Z+", REGn(reg, 3));
|
|
insn_printf("eor r%d,r%d", REGn(reg, 3), REGn(reg, 4));
|
|
insn_printf("ld r%d,Z+", REGn(reg, 4));
|
|
insn_printf("eor r%d,r%d", REGn(reg, 4), REGn(reg, 5));
|
|
insn_printf("ld r%d,Z+", REGn(reg, 5));
|
|
insn_printf("eor r%d,r%d", REGn(reg, 5), REGn(reg, 6));
|
|
insn_printf("ld r%d,Z+", REGn(reg, 6));
|
|
insn_printf("eor r%d,r%d", REGn(reg, 6), REGn(reg, 7));
|
|
insn_printf("mov r%d,__tmp_reg__", REGn(reg, 7));
|
|
|
|
// The above operations also implicitly perform a right-rotation.
|
|
// Undo it by left-shifting back into the correct position.
|
|
leftRotate8(reg);
|
|
}
|
|
|
|
void eor64ScheduleReversePtr(Reg64 *reg, const char *ptrReg)
|
|
{
|
|
// XOR with the schedule.
|
|
insn_printf("ld __tmp_reg__,-%s", ptrReg);
|
|
insn_printf("eor __tmp_reg__,r%d", REGn(reg, 7));
|
|
insn_printf("ld r%d,-%s", REGn(reg, 7), ptrReg);
|
|
insn_printf("eor r%d,r%d", REGn(reg, 7), REGn(reg, 6));
|
|
insn_printf("ld r%d,-%s", REGn(reg, 6), ptrReg);
|
|
insn_printf("eor r%d,r%d", REGn(reg, 6), REGn(reg, 5));
|
|
insn_printf("ld r%d,-%s", REGn(reg, 5), ptrReg);
|
|
insn_printf("eor r%d,r%d", REGn(reg, 5), REGn(reg, 4));
|
|
insn_printf("ld r%d,-%s", REGn(reg, 4), ptrReg);
|
|
insn_printf("eor r%d,r%d", REGn(reg, 4), REGn(reg, 3));
|
|
insn_printf("ld r%d,-%s", REGn(reg, 3), ptrReg);
|
|
insn_printf("eor r%d,r%d", REGn(reg, 3), REGn(reg, 2));
|
|
insn_printf("ld r%d,-%s", REGn(reg, 2), ptrReg);
|
|
insn_printf("eor r%d,r%d", REGn(reg, 2), REGn(reg, 1));
|
|
insn_printf("ld r%d,-%s", REGn(reg, 1), ptrReg);
|
|
insn_printf("eor r%d,r%d", REGn(reg, 1), REGn(reg, 0));
|
|
insn_printf("mov r%d,__tmp_reg__", REGn(reg, 0));
|
|
|
|
// The above operations also implicitly perform a left-rotation.
|
|
// Undo it by right-shifting back into the correct position.
|
|
// We have to do this twice because the following step will be
|
|
// apply a left-rotation to put everything back where it belongs.
|
|
rightRotate8(reg);
|
|
rightRotate8(reg);
|
|
}
|
|
|
|
void eor64ScheduleReverse(Reg64 *reg)
|
|
{
|
|
eor64ScheduleReversePtr(reg, "Z");
|
|
}
|
|
|
|
void eor64ScheduleReverseX(Reg64 *reg)
|
|
{
|
|
eor64ScheduleReversePtr(reg, "X");
|
|
}
|
|
|
|
// Unpack the input block and convert from big-endian to little-endian.
|
|
static void unpack_input(void)
|
|
{
|
|
Reg64 xreg = {t1_reg, 0};
|
|
Reg64 yreg = {t2_reg, 0};
|
|
|
|
insn_printf("ld r%d,X+", REGn(&xreg, 7));
|
|
insn_printf("ld r%d,X+", REGn(&xreg, 6));
|
|
insn_printf("ld r%d,X+", REGn(&xreg, 5));
|
|
insn_printf("ld r%d,X+", REGn(&xreg, 4));
|
|
insn_printf("ld r%d,X+", REGn(&xreg, 3));
|
|
insn_printf("ld r%d,X+", REGn(&xreg, 2));
|
|
insn_printf("ld r%d,X+", REGn(&xreg, 1));
|
|
insn_printf("ld r%d,X+", REGn(&xreg, 0));
|
|
|
|
insn_printf("ld r%d,X+", REGn(&yreg, 7));
|
|
insn_printf("ld r%d,X+", REGn(&yreg, 6));
|
|
insn_printf("ld r%d,X+", REGn(&yreg, 5));
|
|
insn_printf("ld r%d,X+", REGn(&yreg, 4));
|
|
insn_printf("ld r%d,X+", REGn(&yreg, 3));
|
|
insn_printf("ld r%d,X+", REGn(&yreg, 2));
|
|
insn_printf("ld r%d,X+", REGn(&yreg, 1));
|
|
insn_printf("ld r%d,X", REGn(&yreg, 0));
|
|
}
|
|
|
|
static void load_from_x(Reg64 *reg)
|
|
{
|
|
insn_printf("ld r%d,X+", REGn(reg, 0));
|
|
insn_printf("ld r%d,X+", REGn(reg, 1));
|
|
insn_printf("ld r%d,X+", REGn(reg, 2));
|
|
insn_printf("ld r%d,X+", REGn(reg, 3));
|
|
insn_printf("ld r%d,X+", REGn(reg, 4));
|
|
insn_printf("ld r%d,X+", REGn(reg, 5));
|
|
insn_printf("ld r%d,X+", REGn(reg, 6));
|
|
insn_printf("ld r%d,X+", REGn(reg, 7));
|
|
}
|
|
|
|
static void store_to_x(Reg64 *reg)
|
|
{
|
|
insn_printf("st X+,r%d", REGn(reg, 0));
|
|
insn_printf("st X+,r%d", REGn(reg, 1));
|
|
insn_printf("st X+,r%d", REGn(reg, 2));
|
|
insn_printf("st X+,r%d", REGn(reg, 3));
|
|
insn_printf("st X+,r%d", REGn(reg, 4));
|
|
insn_printf("st X+,r%d", REGn(reg, 5));
|
|
insn_printf("st X+,r%d", REGn(reg, 6));
|
|
insn_printf("st X+,r%d", REGn(reg, 7));
|
|
}
|
|
|
|
static void load_from_z(Reg64 *reg)
|
|
{
|
|
insn_printf("ld r%d,Z+", REGn(reg, 0));
|
|
insn_printf("ld r%d,Z+", REGn(reg, 1));
|
|
insn_printf("ld r%d,Z+", REGn(reg, 2));
|
|
insn_printf("ld r%d,Z+", REGn(reg, 3));
|
|
insn_printf("ld r%d,Z+", REGn(reg, 4));
|
|
insn_printf("ld r%d,Z+", REGn(reg, 5));
|
|
insn_printf("ld r%d,Z+", REGn(reg, 6));
|
|
insn_printf("ld r%d,Z+", REGn(reg, 7));
|
|
}
|
|
|
|
static void store_to_z(Reg64 *reg)
|
|
{
|
|
insn_printf("st Z+,r%d", REGn(reg, 0));
|
|
insn_printf("st Z+,r%d", REGn(reg, 1));
|
|
insn_printf("st Z+,r%d", REGn(reg, 2));
|
|
insn_printf("st Z+,r%d", REGn(reg, 3));
|
|
insn_printf("st Z+,r%d", REGn(reg, 4));
|
|
insn_printf("st Z+,r%d", REGn(reg, 5));
|
|
insn_printf("st Z+,r%d", REGn(reg, 6));
|
|
insn_printf("st Z+,r%d", REGn(reg, 7));
|
|
}
|
|
|
|
static void push64(Reg64 *reg)
|
|
{
|
|
reg->offset = 0;
|
|
insn_printf("push r%d", REGn(reg, 0));
|
|
insn_printf("push r%d", REGn(reg, 1));
|
|
insn_printf("push r%d", REGn(reg, 2));
|
|
insn_printf("push r%d", REGn(reg, 3));
|
|
insn_printf("push r%d", REGn(reg, 4));
|
|
insn_printf("push r%d", REGn(reg, 5));
|
|
insn_printf("push r%d", REGn(reg, 6));
|
|
insn_printf("push r%d", REGn(reg, 7));
|
|
}
|
|
|
|
static void pop64(Reg64 *reg)
|
|
{
|
|
reg->offset = 0;
|
|
insn_printf("pop r%d", REGn(reg, 7));
|
|
insn_printf("pop r%d", REGn(reg, 6));
|
|
insn_printf("pop r%d", REGn(reg, 5));
|
|
insn_printf("pop r%d", REGn(reg, 4));
|
|
insn_printf("pop r%d", REGn(reg, 3));
|
|
insn_printf("pop r%d", REGn(reg, 2));
|
|
insn_printf("pop r%d", REGn(reg, 1));
|
|
insn_printf("pop r%d", REGn(reg, 0));
|
|
}
|
|
|
|
// Main loop for Speck::encryptBlock().
|
|
static void full_enc_main_loop(void)
|
|
{
|
|
Reg64 xreg = {t1_reg, 0};
|
|
Reg64 yreg = {t2_reg, 0};
|
|
|
|
// Top of the main loop.
|
|
insn_printf("1:");
|
|
|
|
// x = (rightRotate8_64(x) + y) ^ *s++;
|
|
rightRotate8(&xreg);
|
|
add64(&xreg, &yreg);
|
|
eor64Schedule(&xreg);
|
|
|
|
// y = leftRotate3_64(y) ^ x;
|
|
leftRotate3(&yreg);
|
|
eor64(&yreg, &xreg);
|
|
|
|
// Bottom of the main loop.
|
|
insn_printf("dec %%2");
|
|
insn_printf("breq 2f");
|
|
insn_printf("rjmp 1b");
|
|
insn_printf("2:");
|
|
}
|
|
|
|
// Main loop for Speck::decryptBlock().
|
|
static void full_dec_main_loop(void)
|
|
{
|
|
Reg64 xreg = {t1_reg, 0};
|
|
Reg64 yreg = {t2_reg, 0};
|
|
|
|
// Top of the main loop.
|
|
insn_printf("1:");
|
|
|
|
// y = rightRotate3_64(x ^ y);
|
|
eor64(&yreg, &xreg);
|
|
rightRotate3(&yreg);
|
|
|
|
// x = leftRotate8_64((x ^ *s--) - y);
|
|
eor64ScheduleReverse(&xreg);
|
|
leftRotate8(&xreg);
|
|
sub64(&xreg, &yreg);
|
|
|
|
// Bottom of the main loop.
|
|
insn_printf("dec %%2");
|
|
insn_printf("breq 2f");
|
|
insn_printf("rjmp 1b");
|
|
insn_printf("2:");
|
|
}
|
|
|
|
// Pack the output block and convert from little-endian to big-endian.
|
|
static void pack_output(void)
|
|
{
|
|
Reg64 xreg = {t1_reg, 0};
|
|
Reg64 yreg = {t2_reg, 0};
|
|
|
|
insn_printf("ldd r%d,%%A3", x_reg);
|
|
insn_printf("ldd r%d,%%B3", x_reg + 1);
|
|
|
|
insn_printf("st X+,r%d", REGn(&xreg, 7));
|
|
insn_printf("st X+,r%d", REGn(&xreg, 6));
|
|
insn_printf("st X+,r%d", REGn(&xreg, 5));
|
|
insn_printf("st X+,r%d", REGn(&xreg, 4));
|
|
insn_printf("st X+,r%d", REGn(&xreg, 3));
|
|
insn_printf("st X+,r%d", REGn(&xreg, 2));
|
|
insn_printf("st X+,r%d", REGn(&xreg, 1));
|
|
insn_printf("st X+,r%d", REGn(&xreg, 0));
|
|
|
|
insn_printf("st X+,r%d", REGn(&yreg, 7));
|
|
insn_printf("st X+,r%d", REGn(&yreg, 6));
|
|
insn_printf("st X+,r%d", REGn(&yreg, 5));
|
|
insn_printf("st X+,r%d", REGn(&yreg, 4));
|
|
insn_printf("st X+,r%d", REGn(&yreg, 3));
|
|
insn_printf("st X+,r%d", REGn(&yreg, 2));
|
|
insn_printf("st X+,r%d", REGn(&yreg, 1));
|
|
insn_printf("st X,r%d", REGn(&yreg, 0));
|
|
}
|
|
|
|
static void temp_regs(void)
|
|
{
|
|
indent_printf(": \"r%d\", \"r%d\", \"r%d\", \"r%d\", "
|
|
"\"r%d\", \"r%d\", \"r%d\", \"r%d\",\n",
|
|
t1_reg, t1_reg + 1, t1_reg + 2, t1_reg + 3,
|
|
t1_reg + 4, t1_reg + 5, t1_reg + 6, t1_reg + 7);
|
|
indent_printf(" \"r%d\", \"r%d\", \"r%d\", \"r%d\", "
|
|
"\"r%d\", \"r%d\", \"r%d\", \"r%d\", \"memory\"\n",
|
|
t2_reg, t2_reg + 1, t2_reg + 2, t2_reg + 3,
|
|
t2_reg + 4, t2_reg + 5, t2_reg + 6, t2_reg + 7);
|
|
}
|
|
|
|
static void full_setkey(void)
|
|
{
|
|
Reg64 xreg = {t1_reg, 0};
|
|
Reg64 yreg = {t2_reg, 0};
|
|
|
|
printf("void Speck::setKey(const uint8_t *key, size_t len)\n");
|
|
printf("{\n");
|
|
indent_printf("// Automatically generated by the genspeck tool.\n");
|
|
|
|
// Validate the key length.
|
|
indent_printf("uint64_t l[4];\n");
|
|
indent_printf("uint8_t m, mb;\n");
|
|
indent_printf("if (len == 32) {\n");
|
|
indent_printf(" m = 4;\n");
|
|
indent_printf(" mb = 3 * 8;\n");
|
|
indent_printf("} else if (len == 24) {\n");
|
|
indent_printf(" m = 3;\n");
|
|
indent_printf(" mb = 2 * 8;\n");
|
|
indent_printf("} else if (len == 16) {\n");
|
|
indent_printf(" m = 2;\n");
|
|
indent_printf(" mb = 8;\n");
|
|
indent_printf("} else {\n");
|
|
indent_printf(" return false;\n");
|
|
indent_printf("}\n");
|
|
indent_printf("rounds = 30 + m;\n");
|
|
indent_printf("uint8_t r = rounds - 1;\n");
|
|
indent_printf("__asm__ __volatile__ (\n");
|
|
indent += 4;
|
|
|
|
// Copy the key into k[0] and l while converting endianness.
|
|
insn_printf("ld __tmp_reg__,-X"); // k[0] = last 8 bytes of the key
|
|
insn_printf("st Z+,__tmp_reg__");
|
|
insn_printf("ld __tmp_reg__,-X");
|
|
insn_printf("st Z+,__tmp_reg__");
|
|
insn_printf("ld __tmp_reg__,-X");
|
|
insn_printf("st Z+,__tmp_reg__");
|
|
insn_printf("ld __tmp_reg__,-X");
|
|
insn_printf("st Z+,__tmp_reg__");
|
|
insn_printf("ld __tmp_reg__,-X");
|
|
insn_printf("st Z+,__tmp_reg__");
|
|
insn_printf("ld __tmp_reg__,-X");
|
|
insn_printf("st Z+,__tmp_reg__");
|
|
insn_printf("ld __tmp_reg__,-X");
|
|
insn_printf("st Z+,__tmp_reg__");
|
|
insn_printf("ld __tmp_reg__,-X");
|
|
insn_printf("st Z+,__tmp_reg__");
|
|
insn_printf("sbiw r%d,8", z_reg); // Set Z back to beginning of k
|
|
insn_printf("movw r%d,r%d", t1_reg + 2, z_reg); // Save Z
|
|
insn_printf("movw r%d,%%A2", z_reg); // Z = l
|
|
insn_printf("ldd r%d,%%3", t1_reg);
|
|
insn_printf("1:");
|
|
insn_printf("ld __tmp_reg__,-X"); // Copy first mb bytes from key
|
|
insn_printf("st Z+,__tmp_reg__");
|
|
insn_printf("dec r%d", t1_reg);
|
|
insn_printf("brne 1b");
|
|
insn_printf("movw r%d,%%A2", x_reg); // X = l
|
|
insn_printf("movw r%d,r%d", z_reg, t1_reg + 2); // Z = k
|
|
|
|
// Expand the key to the full key schedule.
|
|
// Note: We can use %A2 and %B2 as spare temporary registers now.
|
|
insn_printf("clr %%A2"); // %A2 = li_in = 0
|
|
insn_printf("ldd %%B2,%%3"); // %B2 = li_out = mb (= (m - 1) * 8)
|
|
insn_printf("clr r%d", temp_reg); // i = 0
|
|
load_from_z(&yreg); // y = k[i]
|
|
insn_printf("2:");
|
|
|
|
// l[li_out] = (k[i] + rightRotate8_64(l[li_in])) ^ i
|
|
insn_printf("add r%d,%%A2", x_reg); // x = rightRotate8_64(l[li_in])
|
|
insn_printf("adc r%d,__zero_reg__", x_reg + 1);
|
|
xreg.offset = 7;
|
|
load_from_x(&xreg);
|
|
xreg.offset = 0;
|
|
insn_printf("sub r%d,%%A2", x_reg); // restore X to point at base of l
|
|
insn_printf("sbc r%d,__zero_reg__", x_reg + 1);
|
|
insn_printf("sbiw r%d,8", x_reg);
|
|
add64(&xreg, &yreg); // x += y
|
|
insn_printf("eor r%d,r%d", REGn(&xreg, 0), temp_reg); // x ^= i
|
|
insn_printf("add r%d,%%B2", x_reg); // l[li_out] = x
|
|
insn_printf("adc r%d,__zero_reg__", x_reg + 1);
|
|
store_to_x(&xreg);
|
|
insn_printf("sub r%d,%%B2", x_reg); // restore X to point at base of l
|
|
insn_printf("sbc r%d,__zero_reg__", x_reg + 1);
|
|
insn_printf("sbiw r%d,8", x_reg);
|
|
|
|
// k[i + 1] = leftRotate3_64(k[i]) ^ l[li_out];
|
|
leftRotate3(&yreg); // y = leftRotate3(y)
|
|
eor64(&yreg, &xreg); // y ^= x
|
|
store_to_z(&yreg); // k[i + 1] = y
|
|
|
|
// Advance li_in and li_out, wrapping around at the end of l.
|
|
insn_printf("ldi r%d,8", const_reg);
|
|
insn_printf("add %%A2,r%d", const_reg);
|
|
insn_printf("add %%B2,r%d", const_reg);
|
|
insn_printf("ldi r%d,0x1F", const_reg);
|
|
insn_printf("and %%A2,r%d", const_reg);
|
|
insn_printf("and %%B2,r%d", const_reg);
|
|
|
|
// Bottom of the loop.
|
|
insn_printf("ldd r%d,%%4", t1_reg); // r8 = rounds - 1
|
|
insn_printf("inc r%d", temp_reg); // ++i
|
|
insn_printf("cp r%d,r%d", temp_reg, t1_reg);
|
|
insn_printf("breq 3f");
|
|
insn_printf("rjmp 2b");
|
|
insn_printf("3:");
|
|
|
|
// Clean the l array. X register should still be pointing to it.
|
|
insn_printf("ldi r%d,32", const_reg);
|
|
insn_printf("4:");
|
|
insn_printf("st X+,__zero_reg__");
|
|
insn_printf("dec r%d", const_reg);
|
|
insn_printf("brne 4b");
|
|
|
|
// Declare the registers that we need.
|
|
indent_printf(": : \"z\"(k), \"x\"(key + len), \"r\"(l), \"Q\"(mb), \"Q\"(r)\n");
|
|
temp_regs();
|
|
indent_printf(", \"r%d\", \"r%d\"\n", const_reg, temp_reg);
|
|
indent -= 4;
|
|
indent_printf(");\n");
|
|
|
|
// End of function.
|
|
indent_printf("return true;\n");
|
|
printf("}\n\n");
|
|
}
|
|
|
|
static void full_enc(void)
|
|
{
|
|
printf("void Speck::encryptBlock(uint8_t *output, const uint8_t *input)\n");
|
|
printf("{\n");
|
|
indent_printf("// Automatically generated by the genspeck tool.\n");
|
|
indent_printf("__asm__ __volatile__ (\n");
|
|
indent += 4;
|
|
unpack_input();
|
|
full_enc_main_loop();
|
|
pack_output();
|
|
indent_printf(": : \"x\"(input), \"z\"(k), \"r\"(rounds), \"Q\"(output)\n");
|
|
temp_regs();
|
|
indent -= 4;
|
|
indent_printf(");\n");
|
|
printf("}\n\n");
|
|
}
|
|
|
|
static void full_dec(void)
|
|
{
|
|
printf("void Speck::decryptBlock(uint8_t *output, const uint8_t *input)\n");
|
|
printf("{\n");
|
|
indent_printf("// Automatically generated by the genspeck tool.\n");
|
|
indent_printf("__asm__ __volatile__ (\n");
|
|
indent += 4;
|
|
unpack_input();
|
|
full_dec_main_loop();
|
|
pack_output();
|
|
indent_printf(": : \"x\"(input), \"z\"(k + rounds), \"r\"(rounds), \"Q\"(output)\n");
|
|
temp_regs();
|
|
indent -= 4;
|
|
indent_printf(");\n");
|
|
printf("}\n\n");
|
|
}
|
|
|
|
static void tiny_enc(void)
|
|
{
|
|
Reg64 xreg = {t1_reg, 0};
|
|
Reg64 yreg = {t2_reg, 0};
|
|
|
|
printf("void SpeckTiny::encryptBlock(uint8_t *output, const uint8_t *input)\n");
|
|
printf("{\n");
|
|
indent_printf("// Automatically generated by the genspeck tool.\n");
|
|
indent_printf("uint64_t l[5];\n");
|
|
indent_printf("uint8_t r = rounds;\n");
|
|
indent_printf("uint8_t mb = (r - 31) * 8;\n");
|
|
|
|
// Copy the "k" array into the "l" array. The first element is "s"
|
|
// and the rest of the elements make up the normal l[0..3] values.
|
|
indent_printf("__asm__ __volatile__ (\n");
|
|
indent += 4;
|
|
insn_printf("movw r%d,r%d", t1_reg, z_reg); // Save Z
|
|
insn_printf("ldd r%d,%%4", t2_reg);
|
|
insn_printf("ldi r%d,8", const_reg);
|
|
insn_printf("add r%d,r%d", t2_reg, const_reg);
|
|
insn_printf("1:");
|
|
insn_printf("ld __tmp_reg__,X+");
|
|
insn_printf("st Z+,__tmp_reg__");
|
|
insn_printf("dec r%d", t2_reg);
|
|
insn_printf("brne 1b");
|
|
insn_printf("movw r%d,r%d", z_reg, t1_reg); // Restore Z to point at l
|
|
|
|
// Unpack the input. %A2 and %B2 are free temporary registers after this.
|
|
insn_printf("movw r%d,%%A2", x_reg);
|
|
unpack_input();
|
|
|
|
// Top of the loop.
|
|
insn_printf("clr %%A2"); // %A2 = li_in = 0
|
|
insn_printf("ldd %%B2,%%4"); // %B2 = li_out = mb
|
|
insn_printf("clr r%d", temp_reg); // i = 0
|
|
insn_printf("2:");
|
|
|
|
// Adjust x and y for this round using the key schedule word s (in l[0]).
|
|
// x = (rightRotate8_64(x) + y) ^ s;
|
|
rightRotate8(&xreg);
|
|
add64(&xreg, &yreg);
|
|
eor64Schedule(&xreg);
|
|
// y = leftRotate3_64(y) ^ x;
|
|
leftRotate3(&yreg);
|
|
eor64(&yreg, &xreg);
|
|
// At this point, Z has been incremented to point at l[1] which
|
|
// is the start of the actual l[0] from the original formulation.
|
|
|
|
// If this is the last round, then we are done. There is no
|
|
// point calculating another key schedule element.
|
|
insn_printf("mov __tmp_reg__,r%d", temp_reg);
|
|
insn_printf("inc __tmp_reg__");
|
|
insn_printf("ldd r%d,%%5", const_reg);
|
|
insn_printf("cp __tmp_reg__,r%d", const_reg);
|
|
insn_printf("brne 3f");
|
|
insn_printf("rjmp 4f");
|
|
insn_printf("3:");
|
|
|
|
// Save x and y on the stack - we need the registers to
|
|
// help us compute the next key schedule element.
|
|
push64(&xreg);
|
|
push64(&yreg);
|
|
|
|
// Compute the key schedule word s for the next round.
|
|
insn_printf("sbiw r%d,8", z_reg); // Point Z back at l[0]
|
|
// l[li_out] = (s + rightRotate8_64(l[li_in])) ^ i;
|
|
insn_printf("ld r%d,Z", REGn(&yreg, 0)); // y = s
|
|
insn_printf("ldd r%d,Z+1", REGn(&yreg, 1));
|
|
insn_printf("ldd r%d,Z+2", REGn(&yreg, 2));
|
|
insn_printf("ldd r%d,Z+3", REGn(&yreg, 3));
|
|
insn_printf("ldd r%d,Z+4", REGn(&yreg, 4));
|
|
insn_printf("ldd r%d,Z+5", REGn(&yreg, 5));
|
|
insn_printf("ldd r%d,Z+6", REGn(&yreg, 6));
|
|
insn_printf("ldd r%d,Z+7", REGn(&yreg, 7));
|
|
insn_printf("add r%d,%%A2", z_reg); // Z = &(l[li_in]) - 8
|
|
insn_printf("adc r%d,__zero_reg__", z_reg + 1);
|
|
leftRotate8(&xreg); // x = rightRotate8(l[li_in])
|
|
insn_printf("ldd r%d,Z+8", REGn(&xreg, 0));
|
|
insn_printf("ldd r%d,Z+9", REGn(&xreg, 1));
|
|
insn_printf("ldd r%d,Z+10", REGn(&xreg, 2));
|
|
insn_printf("ldd r%d,Z+11", REGn(&xreg, 3));
|
|
insn_printf("ldd r%d,Z+12", REGn(&xreg, 4));
|
|
insn_printf("ldd r%d,Z+13", REGn(&xreg, 5));
|
|
insn_printf("ldd r%d,Z+14", REGn(&xreg, 6));
|
|
insn_printf("ldd r%d,Z+15", REGn(&xreg, 7));
|
|
rightRotate8(&xreg);
|
|
add64(&xreg, &yreg); // x += y
|
|
insn_printf("eor r%d,r%d", REGn(&xreg, 0), temp_reg); // x ^= i
|
|
insn_printf("sub r%d,%%A2", z_reg); // Z = &(l[li_out]) - 8
|
|
insn_printf("sbc r%d,__zero_reg__", z_reg + 1);
|
|
insn_printf("add r%d,%%B2", z_reg);
|
|
insn_printf("adc r%d,__zero_reg__", z_reg + 1);
|
|
insn_printf("std Z+8,r%d", REGn(&xreg, 0)); // l[li_out] = x
|
|
insn_printf("std Z+9,r%d", REGn(&xreg, 1));
|
|
insn_printf("std Z+10,r%d", REGn(&xreg, 2));
|
|
insn_printf("std Z+11,r%d", REGn(&xreg, 3));
|
|
insn_printf("std Z+12,r%d", REGn(&xreg, 4));
|
|
insn_printf("std Z+13,r%d", REGn(&xreg, 5));
|
|
insn_printf("std Z+14,r%d", REGn(&xreg, 6));
|
|
insn_printf("std Z+15,r%d", REGn(&xreg, 7));
|
|
insn_printf("sub r%d,%%B2", z_reg); // Restore Z to base of l array
|
|
insn_printf("sbc r%d,__zero_reg__", z_reg + 1);
|
|
// s = leftRotate3_64(s) ^ l[li_out];
|
|
leftRotate3(&yreg);
|
|
eor64(&yreg, &xreg);
|
|
insn_printf("st Z,r%d", REGn(&yreg, 0));
|
|
insn_printf("std Z+1,r%d", REGn(&yreg, 1));
|
|
insn_printf("std Z+2,r%d", REGn(&yreg, 2));
|
|
insn_printf("std Z+3,r%d", REGn(&yreg, 3));
|
|
insn_printf("std Z+4,r%d", REGn(&yreg, 4));
|
|
insn_printf("std Z+5,r%d", REGn(&yreg, 5));
|
|
insn_printf("std Z+6,r%d", REGn(&yreg, 6));
|
|
insn_printf("std Z+7,r%d", REGn(&yreg, 7));
|
|
|
|
// Advance li_in and li_out, wrapping around at the end of l.
|
|
insn_printf("ldi r%d,8", const_reg);
|
|
insn_printf("add %%A2,r%d", const_reg);
|
|
insn_printf("add %%B2,r%d", const_reg);
|
|
insn_printf("ldi r%d,0x1F", const_reg);
|
|
insn_printf("and %%A2,r%d", const_reg);
|
|
insn_printf("and %%B2,r%d", const_reg);
|
|
|
|
// Restore the original x and y.
|
|
pop64(&yreg);
|
|
pop64(&xreg);
|
|
|
|
// Bottom of the loop.
|
|
insn_printf("inc r%d", temp_reg); // i++
|
|
insn_printf("rjmp 2b");
|
|
insn_printf("4:");
|
|
|
|
// Pack the results into the output buffer.
|
|
pack_output();
|
|
|
|
// Declare the registers that we need.
|
|
indent_printf(": : \"x\"(k), \"z\"(l), \"r\"(input), \"Q\"(output), \"Q\"(mb), \"Q\"(r)\n");
|
|
temp_regs();
|
|
indent_printf(", \"r%d\", \"r%d\"\n", const_reg, temp_reg);
|
|
indent -= 4;
|
|
indent_printf(");\n");
|
|
printf("}\n\n");
|
|
}
|
|
|
|
static void small_dec(void)
|
|
{
|
|
Reg64 xreg = {t1_reg, 0};
|
|
Reg64 yreg = {t2_reg, 0};
|
|
|
|
printf("void SpeckSmall::decryptBlock(uint8_t *output, const uint8_t *input)\n");
|
|
printf("{\n");
|
|
indent_printf("// Automatically generated by the genspeck tool.\n");
|
|
indent_printf("uint64_t l[5];\n");
|
|
indent_printf("uint8_t r = rounds;\n");
|
|
indent_printf("uint8_t li_in = ((r + 3) & 0x03) * 8;\n");
|
|
indent_printf("uint8_t li_out = ((((r - 31) & 0x03) * 8) + li_in) & 0x1F;\n");
|
|
indent_printf("__asm__ __volatile__ (\n");
|
|
indent += 4;
|
|
|
|
// Copy the this->l array into the local l array. Then copy
|
|
// the "s" value from l[li_out] to l[4].
|
|
insn_printf("ldd r%d,%%4", temp_reg); // r25 = li_out
|
|
insn_printf("ldi r%d,32", const_reg); // Copy 32 bytes from this->l.
|
|
insn_printf("1:");
|
|
insn_printf("ld __tmp_reg__,X+");
|
|
insn_printf("st Z+,__tmp_reg__");
|
|
insn_printf("dec r%d", const_reg);
|
|
insn_printf("brne 1b");
|
|
insn_printf("movw r%d,r%d", x_reg, z_reg); // X = Z + 32
|
|
insn_printf("sbiw r%d,32", z_reg); // Z = &(l[li_out])
|
|
insn_printf("add r%d,r%d", z_reg, temp_reg);
|
|
insn_printf("adc r%d,__zero_reg__", z_reg + 1);
|
|
insn_printf("ld __tmp_reg__,Z"); // Copy l[li_out] to l[4]
|
|
insn_printf("st X+,__tmp_reg__");
|
|
insn_printf("ldd __tmp_reg__,Z+1");
|
|
insn_printf("st X+,__tmp_reg__");
|
|
insn_printf("ldd __tmp_reg__,Z+2");
|
|
insn_printf("st X+,__tmp_reg__");
|
|
insn_printf("ldd __tmp_reg__,Z+3");
|
|
insn_printf("st X+,__tmp_reg__");
|
|
insn_printf("ldd __tmp_reg__,Z+4");
|
|
insn_printf("st X+,__tmp_reg__");
|
|
insn_printf("ldd __tmp_reg__,Z+5");
|
|
insn_printf("st X+,__tmp_reg__");
|
|
insn_printf("ldd __tmp_reg__,Z+6");
|
|
insn_printf("st X+,__tmp_reg__");
|
|
insn_printf("ldd __tmp_reg__,Z+7");
|
|
insn_printf("st X+,__tmp_reg__");
|
|
insn_printf("sub r%d,r%d", z_reg, temp_reg); // Z = &(l[0])
|
|
insn_printf("sbc r%d,__zero_reg__", z_reg + 1);
|
|
|
|
// Unpack the input. %A2 and %B2 are free temporary registers after this.
|
|
insn_printf("movw r%d,%%A2", x_reg);
|
|
unpack_input();
|
|
|
|
// Top of the loop.
|
|
insn_printf("ldd %%A2,%%6"); // %A2 = li_in
|
|
insn_printf("mov %%B2,r%d", temp_reg); // %B2 = li_out
|
|
insn_printf("ldd r%d,%%5", temp_reg); // i = rounds - 1
|
|
insn_printf("dec r%d", temp_reg);
|
|
insn_printf("movw r%d,r%d", x_reg, z_reg); // X = Z + 40 = &(l[5])
|
|
insn_printf("adiw r%d,40", x_reg); // i.e. point to end of l[4]
|
|
insn_printf("2:");
|
|
|
|
// Adjust x and y for this round using the key schedule word s (in l[4]).
|
|
// y = rightRotate3_64(x ^ y);
|
|
eor64(&yreg, &xreg);
|
|
rightRotate3(&yreg);
|
|
// x = leftRotate8_64((x ^ s) - y);
|
|
eor64ScheduleReverseX(&xreg);
|
|
leftRotate8(&xreg);
|
|
sub64(&xreg, &yreg);
|
|
|
|
// If this is the last round, then we are done. There is no
|
|
// point calculating another key schedule element.
|
|
insn_printf("or r%d,r%d", temp_reg, temp_reg); // if (i == 0)
|
|
insn_printf("brne 3f");
|
|
insn_printf("rjmp 4f");
|
|
insn_printf("3:");
|
|
insn_printf("dec r%d", temp_reg); // --i
|
|
|
|
// Save x and y on the stack - we need the registers to
|
|
// help us compute the next key schedule element.
|
|
push64(&xreg);
|
|
push64(&yreg);
|
|
|
|
// Move li_in and li_out backwards, wrapping around at the start of l.
|
|
insn_printf("ldi r%d,24", const_reg);
|
|
insn_printf("add %%A2,r%d", const_reg);
|
|
insn_printf("add %%B2,r%d", const_reg);
|
|
insn_printf("ldi r%d,0x1F", const_reg);
|
|
insn_printf("and %%A2,r%d", const_reg);
|
|
insn_printf("and %%B2,r%d", const_reg);
|
|
|
|
// Compute the key schedule word s for the next round.
|
|
// s = rightRotate3_64(s ^ l[li_out]);
|
|
insn_printf("ld r%d,X+", REGn(&yreg, 0)); // y = s = l[4]
|
|
insn_printf("ld r%d,X+", REGn(&yreg, 1));
|
|
insn_printf("ld r%d,X+", REGn(&yreg, 2));
|
|
insn_printf("ld r%d,X+", REGn(&yreg, 3));
|
|
insn_printf("ld r%d,X+", REGn(&yreg, 4));
|
|
insn_printf("ld r%d,X+", REGn(&yreg, 5));
|
|
insn_printf("ld r%d,X+", REGn(&yreg, 6));
|
|
insn_printf("ld r%d,X+", REGn(&yreg, 7));
|
|
insn_printf("add r%d,%%B2", z_reg); // Z = &(l[li_out])
|
|
insn_printf("adc r%d,__zero_reg__", z_reg + 1);
|
|
insn_printf("ld r%d,Z", REGn(&xreg, 0)); // x = l[li_out]
|
|
insn_printf("ldd r%d,Z+1", REGn(&xreg, 1));
|
|
insn_printf("ldd r%d,Z+2", REGn(&xreg, 2));
|
|
insn_printf("ldd r%d,Z+3", REGn(&xreg, 3));
|
|
insn_printf("ldd r%d,Z+4", REGn(&xreg, 4));
|
|
insn_printf("ldd r%d,Z+5", REGn(&xreg, 5));
|
|
insn_printf("ldd r%d,Z+6", REGn(&xreg, 6));
|
|
insn_printf("ldd r%d,Z+7", REGn(&xreg, 7));
|
|
insn_printf("sub r%d,%%B2", z_reg); // Z = &(l[0])
|
|
insn_printf("sbc r%d,__zero_reg__", z_reg + 1);
|
|
eor64(&yreg, &xreg);
|
|
rightRotate3(&yreg);
|
|
insn_printf("st -X,r%d", REGn(&yreg, 7)); // store s back into l[4]
|
|
insn_printf("st -X,r%d", REGn(&yreg, 6));
|
|
insn_printf("st -X,r%d", REGn(&yreg, 5));
|
|
insn_printf("st -X,r%d", REGn(&yreg, 4));
|
|
insn_printf("st -X,r%d", REGn(&yreg, 3));
|
|
insn_printf("st -X,r%d", REGn(&yreg, 2));
|
|
insn_printf("st -X,r%d", REGn(&yreg, 1));
|
|
insn_printf("st -X,r%d", REGn(&yreg, 0));
|
|
insn_printf("adiw r%d,8", x_reg); // X = &(l[5])
|
|
// l[li_in] = leftRotate8_64((l[li_out] ^ i) - s);
|
|
insn_printf("eor r%d,r%d", t1_reg, temp_reg); // x ^= i
|
|
sub64(&xreg, &yreg); // x -= s
|
|
leftRotate8(&xreg); // x = leftRotate8(x)
|
|
insn_printf("add r%d,%%A2", z_reg); // Z = &(l[li_in])
|
|
insn_printf("adc r%d,__zero_reg__", z_reg + 1);
|
|
insn_printf("st Z,r%d", REGn(&xreg, 0)); // l[li_in] = x
|
|
insn_printf("std Z+1,r%d", REGn(&xreg, 1));
|
|
insn_printf("std Z+2,r%d", REGn(&xreg, 2));
|
|
insn_printf("std Z+3,r%d", REGn(&xreg, 3));
|
|
insn_printf("std Z+4,r%d", REGn(&xreg, 4));
|
|
insn_printf("std Z+5,r%d", REGn(&xreg, 5));
|
|
insn_printf("std Z+6,r%d", REGn(&xreg, 6));
|
|
insn_printf("std Z+7,r%d", REGn(&xreg, 7));
|
|
insn_printf("sub r%d,%%A2", z_reg); // Z = &(l[0])
|
|
insn_printf("sbc r%d,__zero_reg__", z_reg + 1);
|
|
|
|
// Restore the original x and y.
|
|
pop64(&yreg);
|
|
pop64(&xreg);
|
|
|
|
// Bottom of the loop.
|
|
insn_printf("rjmp 2b");
|
|
insn_printf("4:");
|
|
|
|
// Pack the results into the output buffer.
|
|
pack_output();
|
|
|
|
// Declare the registers that we need.
|
|
indent_printf(": : \"x\"(this->l), \"z\"(l), \"r\"(input), \"Q\"(output), \"Q\"(li_out), \"Q\"(r), \"Q\"(li_in)\n");
|
|
temp_regs();
|
|
indent_printf(", \"r%d\", \"r%d\"\n", const_reg, temp_reg);
|
|
indent -= 4;
|
|
indent_printf(");\n");
|
|
printf("}\n\n");
|
|
}
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
full_setkey();
|
|
full_enc();
|
|
full_dec();
|
|
|
|
tiny_enc();
|
|
|
|
small_dec();
|
|
return 0;
|
|
}
|