From 4078351503f2602ac2cdb953328a847204347641 Mon Sep 17 00:00:00 2001 From: Rhys Weatherley Date: Thu, 26 Apr 2018 06:50:39 +1000 Subject: [PATCH] AVR assembly version of Acorn128 --- doc/crypto.dox | 1 + gen/genacorn.c | 942 ++++++++++++++++++ .../CryptoLW/examples/TestAcorn/TestAcorn.ino | 2 +- libraries/CryptoLW/src/Acorn128.cpp | 16 +- libraries/CryptoLW/src/Acorn128.h | 7 + libraries/CryptoLW/src/Acorn128AVR.cpp | 556 +++++++++++ 6 files changed, 1522 insertions(+), 2 deletions(-) create mode 100644 gen/genacorn.c create mode 100644 libraries/CryptoLW/src/Acorn128AVR.cpp diff --git a/doc/crypto.dox b/doc/crypto.dox index df01f72b..87bfad40 100644 --- a/doc/crypto.dox +++ b/doc/crypto.dox @@ -119,6 +119,7 @@ Ardunino Mega 2560 running at 16 MHz are similar: EAX<AES256>97.80us97.80us1806.57us332 EAX<Speck> (256-bit key)25.89us25.88us690.63us362 EAX<SpeckTiny> (256-bit key)78.20us78.20us1269.19us122 +Acorn12820.39us20.06us4817.82us60 Hash AlgorithmHashing (per byte)Finalization State Size (bytes) SHA25643.85us2841.04us 107 diff --git a/gen/genacorn.c b/gen/genacorn.c new file mode 100644 index 00000000..692757af --- /dev/null +++ b/gen/genacorn.c @@ -0,0 +1,942 @@ +/* + * Copyright (C) 2018 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +// Special-purpose compiler that generates the AVR version of Acorn128. + +#include +#include +#include + +static int indent = 4; + +static const int temp_reg = 0; // Register number for the AVR "__tmp_reg__". + +// Information about the positions and lengths of the LFSR's. +typedef struct +{ + int start; // Bit position where the LFSR starts. + int len; // Length of the LFSR in bits. + int offsetl; // Offset of the low word of the LFSR in the state. + int offseth; // Offset of the high word of the LFSR in the state. + +} LFSR; +#define num_lfsrs 7 +static LFSR const lfsr[num_lfsrs] = { + {0, 61, 0, 4}, + {61, 46, 8, 12}, + {107, 47, 16, 14}, + {154, 39, 20, 24}, + {193, 37, 28, 26}, + {230, 59, 32, 36}, + {289, 4, 40, 40}, +}; + +// LFSR byte offset for generating 32-bit versions of the code. +static int lfsr_offset = 0; + +// Non-zero to generate the 32-bit version. +static int is_32bit_version = 0; + +// Registers that can be used for temporary values, in the best +// order to allocate them. High registers are listed first. +static int regs[] = { + 16, 17, 18, 19, 20, 21, 22, 23, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +}; +#define num_regs (sizeof(regs) / sizeof(regs[0])) + +// Which registers are currently in use? +static int reg_in_use[num_regs] = {0}; + +// Which registers did we use while generating the code? +static int reg_used[num_regs] = {0}; + +// Registers that have been allocated to specific purposes. +static int s244 = -1; +static int s235 = -1; +static int s196 = -1; +static int s160 = -1; +static int s111 = -1; +static int s66 = -1; +static int s23 = -1; +static int s12 = -1; +static int s1_l = -1; +static int s2_l = -1; +static int s3_l = -1; +static int s4_l = -1; +static int s5_l = -1; +static int s6_l = -1; +static int s7_l = -1; +static int s7_l_prev[3] = {-1, -1, -1}; +static int ks = -1; +static int f = -1; + +// Indent the code and print a string. +void indent_printf(const char *format, ...) +{ + va_list va; + int posn; + va_start(va, format); + for (posn = 0; posn < indent; ++posn) + putc(' ', stdout); + vfprintf(stdout, format, va); + va_end(va); +} + +// Print an assembler instruction within quotes. +void insn_printf(const char *format, ...) +{ + va_list va; + int posn; + va_start(va, format); + for (posn = 0; posn < indent; ++posn) + putc(' ', stdout); + putc('"', stdout); + vfprintf(stdout, format, va); + putc('\\', stdout); + putc('n', stdout); + putc('"', stdout); + putc('\n', stdout); + va_end(va); +} + +// Allocate an unused register, starting with high registers. +static void alloc_high_reg(int *reg) +{ + unsigned index; + if (*reg != -1) { + fprintf(stderr, "Temporary register wasn't previously released\n"); + exit(1); + } + for (index = 0; index < num_regs; ++index) { + if (!reg_in_use[index]) { + reg_in_use[index] = 1; + reg_used[index] = 1; + *reg = regs[index]; + if (*reg < 16) { + fprintf(stderr, "Ran out of temporary high registers\n"); + exit(1); + } + return; + } + } + fprintf(stderr, "Ran out of temporary registers\n"); + exit(1); +} + +// Allocate an unused register, starting with low registers +// because we know we won't need the value in a high reg later. +static void alloc_low_reg(int *reg) +{ + unsigned index; + if (*reg != -1) { + fprintf(stderr, "Temporary register wasn't previously released\n"); + exit(1); + } + for (index = num_regs; index > 0; --index) { + if (!reg_in_use[index - 1]) { + reg_in_use[index - 1] = 1; + reg_used[index - 1] = 1; + *reg = regs[index - 1]; + return; + } + } + fprintf(stderr, "Ran out of temporary registers\n"); + exit(1); +} + +// Release a register back to the allocation pool. +static void release_reg(int *reg) +{ + unsigned index; + for (index = 0; index < num_regs; ++index) { + if (regs[index] == *reg && reg_in_use[index]) { + reg_in_use[index] = 0; + *reg = -1; + return; + } + } + fprintf(stderr, "Released a register that was not in use\n"); + exit(1); +} + +// Check that we have a high register when we need one. +static void check_high_reg(int *reg) +{ + if ((*reg) < 16) { + fprintf(stderr, "r%d is not a high register\n", *reg); + exit(1); + } +} + +// Check that all temporary registers have been released. +static void check_regs(void) +{ + unsigned index; + for (index = 0; index < num_regs; ++index) { + if (reg_in_use[index]) { + fprintf(stderr, "Register r%d has not been released\n", + regs[index]); + exit(1); + } + } +} + +// Print the names of the temporary registers that we used. +static void temp_regs(void) +{ + unsigned index; + int first = 1; + indent_printf(": "); + for (index = 0; index < num_regs; ++index) { + if (reg_used[index]) { + if (first) { + first = 0; + printf("\"r%d\"", regs[index]); + } else { + printf(", \"r%d\"", regs[index]); + } + } + } + printf(", \"memory\"\n"); +} + +// Find the information about a specific LFSR. +static const LFSR *find_lfsr(int bit) +{ + unsigned index; + for (index = 0; index < num_lfsrs; ++index) { + if (bit >= lfsr[index].start && + bit < (lfsr[index].start + lfsr[index].len)) + return &(lfsr[index]); + } + return &(lfsr[num_lfsrs - 1]); +} + +// Gets the information for a specific LFSR from 0 to 6. +static const LFSR *get_lfsr(int num) +{ + return &(lfsr[num]); +} + +// Shift a two-register value left by a number of bits. +static void shift_left_2_regs(int reg1, int reg2, int count) +{ + while (count > 0) { + insn_printf("lsl r%d", reg2); + insn_printf("rol r%d", reg1); + --count; + } +} + +// Shift a three-register value left by a number of bits. +static void shift_left_3_regs(int reg1, int reg2, int reg3, int count) +{ + while (count > 0) { + insn_printf("lsl r%d", reg3); + insn_printf("rol r%d", reg2); + insn_printf("rol r%d", reg1); + --count; + } +} + +// Shift a five-register value left by a number of bits. +static void shift_left_5_regs + (int reg1, int reg2, int reg3, int reg4, int reg5, int count) +{ + while (count > 0) { + insn_printf("lsl r%d", reg5); + insn_printf("rol r%d", reg4); + insn_printf("rol r%d", reg3); + insn_printf("rol r%d", reg2); + insn_printf("rol r%d", reg1); + --count; + } +} + +// Shift a two-register value right by a number of bits. +static void shift_right_2_regs(int reg1, int reg2, int count) +{ + while (count > 0) { + insn_printf("lsr r%d", reg1); + insn_printf("ror r%d", reg2); + --count; + } +} + +// Shift a five-register value right by a number of bits. +static void shift_right_5_regs + (int reg1, int reg2, int reg3, int reg4, int reg5, int count) +{ + while (count > 0) { + insn_printf("lsr r%d", reg1); + insn_printf("ror r%d", reg2); + insn_printf("ror r%d", reg3); + insn_printf("ror r%d", reg4); + insn_printf("ror r%d", reg5); + --count; + } +} + +// Extracts one part from the state as a byte. +static void extract_one_part(int reg, int bit) +{ + const LFSR *lfsr = find_lfsr(bit); + int offset; + bit -= lfsr->start; + offset = lfsr->offsetl + lfsr_offset + (bit / 8); + bit %= 8; + if (bit < 4) { + insn_printf("ldd r%d,Z+%d", reg, offset); + insn_printf("ldd r%d,Z+%d", temp_reg, offset + 1); + shift_right_2_regs(temp_reg, reg, bit); + } else if (bit > 4) { + insn_printf("ldd r%d,Z+%d", temp_reg, offset); + insn_printf("ldd r%d,Z+%d", reg, offset + 1); + shift_left_2_regs(reg, temp_reg, 8 - bit); + } else { + int extra_reg = -1; + alloc_high_reg(&extra_reg); + insn_printf("ldd r%d,Z+%d", reg, offset); + insn_printf("ldd r%d,Z+%d", extra_reg, offset + 1); + insn_printf("swap r%d", reg); + insn_printf("swap r%d", extra_reg); + check_high_reg(®); + check_high_reg(&extra_reg); + insn_printf("andi r%d,0x0F", reg); // Assumes reg and extra_reg + insn_printf("andi r%d,0xF0", extra_reg); // are high registers. + insn_printf("or r%d,r%d", reg, extra_reg); + release_reg(&extra_reg); + } +} + +// Extracts two parts from the state as bytes. Both parts are +// assumed to be within the same 3-byte region within the state +// and that they don't overlap. We also assume that bit1 > bit2. +static void extract_two_parts(int reg1, int reg2, int bit1, int bit2) +{ + const LFSR *lfsr = find_lfsr(bit1); + int offset, count; + bit1 -= lfsr->start; + bit2 -= lfsr->start; + offset = lfsr->offsetl + lfsr_offset + (bit2 / 8); + insn_printf("ldd r%d,Z+%d", reg1, offset + 2); + insn_printf("ldd r%d,Z+%d", reg2, offset + 1); + insn_printf("ldd r%d,Z+%d", temp_reg, offset); + count = 8 - (bit1 % 8); + shift_left_3_regs(reg1, reg2, temp_reg, count); + count = bit1 - (bit2 + 8); + shift_left_2_regs(reg2, temp_reg, count); +} + +// Extract out various sub-parts of the state as 8-bit bytes. +// We do this by extracting two bytes around the one we want +// and then shifting it left or right until it is byte-aligned. +// Sometimes there is overlap and we can extract 3 bytes and shift. +static void extract_sub_parts(void) +{ + // LFSR6 + alloc_low_reg(&s244); + alloc_low_reg(&s235); + extract_two_parts(s244, s235, 244, 235); + + // LFSR5 + alloc_low_reg(&s196); + extract_one_part(s196, 196); + + // LFSR4 + alloc_low_reg(&s160); + extract_one_part(s160, 160); + + // LFSR3 + alloc_high_reg(&s111); + extract_one_part(s111, 111); + + // LFSR2 + alloc_low_reg(&s66); + extract_one_part(s66, 66); + + // LFSR1 + alloc_low_reg(&s23); + alloc_low_reg(&s12); + extract_two_parts(s23, s12, 23, 12); +} + +// Update the LFSR's. +static void update_lfsrs(void) +{ + int offset; + + // LFSR7: if the offset is non-zero then we still have the s7_l + // value from a previous shift_down_step() call in a register. + if (lfsr_offset == 0) { + alloc_low_reg(&s7_l); + insn_printf("ldd r%d,Z+%d", s7_l, get_lfsr(6)->offsetl + lfsr_offset); + } + insn_printf("eor r%d,r%d", s7_l, s235); + alloc_low_reg(&s6_l); + insn_printf("ldd r%d,Z+%d", s6_l, get_lfsr(5)->offsetl + lfsr_offset); + insn_printf("eor r%d,r%d", s7_l, s6_l); + + // LFSR6 + alloc_low_reg(&s5_l); + insn_printf("eor r%d,r%d", s6_l, s196); + insn_printf("ldd r%d,Z+%d", s5_l, get_lfsr(4)->offsetl + lfsr_offset); + insn_printf("eor r%d,r%d", s6_l, s5_l); + + // LFSR5 + alloc_low_reg(&s4_l); + insn_printf("eor r%d,r%d", s5_l, s160); + insn_printf("ldd r%d,Z+%d", s4_l, get_lfsr(3)->offsetl + lfsr_offset); + insn_printf("eor r%d,r%d", s5_l, s4_l); + + // LFSR4 + alloc_low_reg(&s3_l); + insn_printf("eor r%d,r%d", s4_l, s111); + insn_printf("ldd r%d,Z+%d", s3_l, get_lfsr(2)->offsetl + lfsr_offset); + insn_printf("eor r%d,r%d", s4_l, s3_l); + + // LFSR3 + alloc_low_reg(&s2_l); + insn_printf("eor r%d,r%d", s3_l, s66); + insn_printf("ldd r%d,Z+%d", s2_l, get_lfsr(1)->offsetl + lfsr_offset); + insn_printf("eor r%d,r%d", s3_l, s2_l); + + // LFSR2 + alloc_low_reg(&s1_l); + insn_printf("eor r%d,r%d", s2_l, s23); + offset = get_lfsr(0)->offsetl + lfsr_offset; + if (offset != 0) + insn_printf("ldd r%d,Z+%d", s1_l, offset); + else + insn_printf("ld r%d,Z", s1_l); + insn_printf("eor r%d,r%d", s2_l, s1_l); +} + +// Generate the next 8 keystream bits. +static void generate_keystream(void) +{ + // ks = s12 ^ state->s4_l ^ + // maj(s235, state->s2_l, state->s5_l) ^ + // ch(state->s6_l, s111, s66); + ks = s12; + s12 = -1; + insn_printf("eor r%d,r%d", ks, s4_l); + + // ks ^= maj(s235, state->s2_l, state->s5_l) + insn_printf("mov r%d,r%d", temp_reg, s235); // ks ^= (s235 & s2_l) + insn_printf("and r%d,r%d", temp_reg, s2_l); + insn_printf("eor r%d,r%d", ks, temp_reg); + insn_printf("and r%d,r%d", s235, s5_l); // ks ^= (s235 & s5_l) + insn_printf("eor r%d,r%d", ks, s235); + insn_printf("mov r%d,r%d", temp_reg, s2_l); // ks ^= (s2_l & s5_l) + insn_printf("and r%d,r%d", temp_reg, s5_l); + insn_printf("eor r%d,r%d", ks, temp_reg); + release_reg(&s235); + + // ks ^= ch(state->s6_l, s111, s66) + insn_printf("and r%d,r%d", s111, s6_l); + insn_printf("eor r%d,r%d", ks, s111); + insn_printf("mov r%d,r%d", temp_reg, s6_l); + insn_printf("com r%d", temp_reg); + insn_printf("and r%d,r%d", s66, temp_reg); + insn_printf("eor r%d,r%d", ks, s66); + release_reg(&s111); + release_reg(&s66); +} + +// Generate the next 8 non-linear feedback bits. +static void generate_feedback(int input_is_ciphertext) +{ + // f = state->s1_l ^ (~state->s3_l) ^ + // maj(s244, s23, s160) ^ (ca & s196) ^ (cb & ks); + // f ^= plaintext; + alloc_high_reg(&f); // Needs to be a high register for shift_down(). + insn_printf("mov r%d,r%d", f, s3_l); + insn_printf("com r%d", f); + insn_printf("eor r%d,r%d", f, s1_l); + release_reg(&s1_l); // Don't need the low byte of s1 any more. + + // f ^= maj(s244, s23, s160) + insn_printf("mov r%d,r%d", temp_reg, s244); // f ^= (s244 & s23) + insn_printf("and r%d,r%d", temp_reg, s23); + insn_printf("eor r%d,r%d", f, temp_reg); + insn_printf("and r%d,r%d", s23, s160); // f ^= (s23 & s160) + insn_printf("eor r%d,r%d", f, s23); + insn_printf("and r%d,r%d", s244, s160); // f ^= (s244 & s160) + insn_printf("eor r%d,r%d", f, s244); + release_reg(&s244); + release_reg(&s23); + release_reg(&s160); + + // f ^= (ca & s196). Note that when decrypting, ca is always 1. + if (!input_is_ciphertext) + insn_printf("and r%d,%%3", s196); // s196 &= ca + insn_printf("eor r%d,r%d", f, s196); // f ^= s196 + release_reg(&s196); + + // f ^= (cb & ks) ^ plaintext + // If we are processing the ciphertext, then we need to first + // decrypt the input with ks. We leave the plaintext in "ks". + // Note that when decrypting, cb is always 0. + if (!is_32bit_version) { + // Plaintext and ciphertext are 8-bit values in registers. + if (input_is_ciphertext) { + insn_printf("eor r%d,%%2", ks); // plaintext = ciphertext ^ ks + insn_printf("eor r%d,r%d", f, ks); // f ^= plaintext + } else { + insn_printf("mov r%d,r%d", temp_reg, ks); // f ^= (cb & ks) + insn_printf("and r%d,%%4", temp_reg); + insn_printf("eor r%d,r%d", f, temp_reg); + insn_printf("eor r%d,%%2", f); // f ^= plaintext + } + } else { + // Plaintext and ciphertext are 32-bit values in the stack frame. + static const char * const out_regs[] = {"%A0", "%B0", "%C0", "%D0"}; + static const char * const in_regs[] = {"%A2", "%B2", "%C2", "%D2"}; + if (input_is_ciphertext) { + insn_printf("ldd r%d,%s", temp_reg, in_regs[lfsr_offset]); + insn_printf("eor r%d,r%d", ks, temp_reg); + insn_printf("eor r%d,r%d", f, ks); + insn_printf("std %s,r%d", out_regs[lfsr_offset], ks); + } else { + insn_printf("mov r%d,r%d", temp_reg, ks); + insn_printf("and r%d,%%4", temp_reg); + insn_printf("eor r%d,r%d", f, temp_reg); + insn_printf("ldd r%d,%s", temp_reg, in_regs[lfsr_offset]); + insn_printf("eor r%d,r%d", f, temp_reg); + insn_printf("eor r%d,r%d", temp_reg, ks); + insn_printf("std %s,r%d", out_regs[lfsr_offset], temp_reg); + } + release_reg(&ks); + } +} + +// Shift a LFSR downwards by 8 bits and rotate in a register. +static void shift_down_lfsr(const LFSR *lfsr, int reg) +{ + int bit; + int offset1; + int offset2; + int extra = -1; + + // Shift all bytes but the last down. We assume that the low byte + // is already cached in a register from the update_lfsrs() function + // so we don't need to worry about saving it away now. + for (bit = 0; bit < (lfsr->len - 16); bit += 8) { + if (bit < 32) + offset2 = lfsr->offsetl + (bit / 8); + else + offset2 = lfsr->offseth + ((bit - 32) / 8); + if ((bit + 8) < 32) + offset1 = lfsr->offsetl + ((bit + 8) / 8); + else + offset1 = lfsr->offseth + ((bit + 8 - 32) / 8); + insn_printf("ldd r%d,Z+%d", temp_reg, offset1); + if (offset2 != 0) + insn_printf("std Z+%d,r%d", offset2, temp_reg); + else + insn_printf("st Z,r%d", temp_reg); + } + + // Rotate the register value from the next-higher LFSR into the high byte. + if (bit < 32) + offset2 = lfsr->offsetl + (bit / 8); + else + offset2 = lfsr->offseth + ((bit - 32) / 8); + if ((bit + 8) < 32) + offset1 = lfsr->offsetl + ((bit + 8) / 8); + else + offset1 = lfsr->offseth + ((bit + 8 - 32) / 8); + insn_printf("ldd r%d,Z+%d", temp_reg, offset1); + alloc_low_reg(&extra); + bit = lfsr->len % 8; + if (bit <= 4) { + insn_printf("clr r%d", extra); + shift_left_2_regs(extra, reg, bit); + insn_printf("or r%d,r%d", temp_reg, reg); + insn_printf("std Z+%d,r%d", offset2, temp_reg); + insn_printf("std Z+%d,r%d", offset1, extra); + } else { + insn_printf("clr r%d", extra); + shift_right_2_regs(reg, extra, 8 - bit); + insn_printf("or r%d,r%d", temp_reg, extra); + insn_printf("std Z+%d,r%d", offset2, temp_reg); + insn_printf("std Z+%d,r%d", offset1, reg); + } + release_reg(&extra); +} + +// Shift the state downwards by 8 bits. +static void shift_down(void) +{ + int extra = -1; + + // LFSR7: s7_l ^= (f << 4), state->s7 = (f >> 4) + alloc_high_reg(&extra); + check_high_reg(&f); + insn_printf("swap r%d", f); + insn_printf("mov r%d,r%d", extra, f); + insn_printf("andi r%d,0xF0", extra); // Assumes extra is a high reg. + insn_printf("eor r%d,r%d", s7_l, extra); + insn_printf("andi r%d,0x0F", f); // Assumes f is a high reg. + insn_printf("std Z+%d,r%d", get_lfsr(6)->offsetl, f); + release_reg(&f); + release_reg(&extra); + + // LFSR6 down to LFSR1 + shift_down_lfsr(get_lfsr(5), s7_l); + release_reg(&s7_l); + shift_down_lfsr(get_lfsr(4), s6_l); + release_reg(&s6_l); + shift_down_lfsr(get_lfsr(3), s5_l); + release_reg(&s5_l); + shift_down_lfsr(get_lfsr(2), s4_l); + release_reg(&s4_l); + shift_down_lfsr(get_lfsr(1), s3_l); + release_reg(&s3_l); + shift_down_lfsr(get_lfsr(0), s2_l); + release_reg(&s2_l); +} + +// Shift the state downwards by 8 bits as one step within a 32-bit word. +static void shift_down_step(void) +{ + int extra = -1; + int s7_l_next = -1; + + // LFSR7: s7_l ^= (f << 4), state->s7 = (f >> 4) + // We keep the previous s7_l in a register for now and + // make use of it during shift_down_final(). Allocate + // a new register to hold the next s7_l value. + s7_l_prev[lfsr_offset] = s7_l; + alloc_high_reg(&extra); + check_high_reg(&f); + alloc_low_reg(&s7_l_next); + insn_printf("swap r%d", f); + insn_printf("mov r%d,r%d", extra, f); + insn_printf("andi r%d,0xF0", extra); // Assumes extra is a high reg. + insn_printf("eor r%d,r%d", s7_l, extra); + insn_printf("andi r%d,0x0F", f); // Assumes f is a high reg. + insn_printf("mov r%d,r%d", s7_l_next, f); + release_reg(&f); + release_reg(&extra); + s7_l = s7_l_next; + + // Write s2 to s6 back to the state. We don't need to store s1 + // because shift_down_final() will be throwing the value away. + // The generate_feedback() function already released the register. + insn_printf("std Z+%d,r%d", get_lfsr(5)->offsetl + lfsr_offset, s6_l); + release_reg(&s6_l); + insn_printf("std Z+%d,r%d", get_lfsr(4)->offsetl + lfsr_offset, s5_l); + release_reg(&s5_l); + insn_printf("std Z+%d,r%d", get_lfsr(3)->offsetl + lfsr_offset, s4_l); + release_reg(&s4_l); + insn_printf("std Z+%d,r%d", get_lfsr(2)->offsetl + lfsr_offset, s3_l); + release_reg(&s3_l); + insn_printf("std Z+%d,r%d", get_lfsr(1)->offsetl + lfsr_offset, s2_l); + release_reg(&s2_l); +} + +// Shift the state downwards by 8 bits as the final step within a 32-bit word. +static void shift_down_final(void) +{ + int extra = -1; + int extra2 = -1; + int extra3 = -1; + int extra4 = -1; + const LFSR *lfsr; + + // LFSR7: s7_l ^= (f << 4), state->s7 = (f >> 4) + alloc_high_reg(&extra); + check_high_reg(&f); + insn_printf("swap r%d", f); + insn_printf("mov r%d,r%d", extra, f); + insn_printf("andi r%d,0xF0", extra); // Assumes extra is a high reg. + insn_printf("eor r%d,r%d", s7_l, extra); + insn_printf("andi r%d,0x0F", f); // Assumes f is a high reg. + insn_printf("std Z+%d,r%d", get_lfsr(6)->offsetl, f); + release_reg(&f); + + // We currently have the 32-bit s7 word in four registers: + // s7_l_prev[0], s7_l_prev[1], s7_l_prev[2], and s7_l. + // We also have the third byte of s2..s6 in the s2_l..s6_l regs. + // Everything else is stored within the Acorn128 state structure. + + // LFSR1: state->s1_l = state->s1_h | (state->s2_l << 29) + // state->s1_h = state->s2_l >> 3 + lfsr = get_lfsr(1); + alloc_low_reg(&extra2); + alloc_low_reg(&extra3); + alloc_low_reg(&extra4); + insn_printf("clr r%d", temp_reg); + insn_printf("ldd r%d,Z+%d", extra, lfsr->offsetl); // load s2_l[0..2] + insn_printf("ldd r%d,Z+%d", extra2, lfsr->offsetl + 1); + insn_printf("ldd r%d,Z+%d", extra3, lfsr->offsetl + 2); + shift_right_5_regs(s2_l, extra3, extra2, extra, temp_reg, 3); + lfsr = get_lfsr(0); + insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth); + insn_printf("st Z,r%d", extra4); // offset is zero. + insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth + 1); + insn_printf("std Z+%d,r%d", lfsr->offsetl + 1, extra4); + insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth + 2); + insn_printf("std Z+%d,r%d", lfsr->offsetl + 2, extra4); + insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth + 3); + insn_printf("or r%d,r%d", extra4, temp_reg); + insn_printf("std Z+%d,r%d", lfsr->offsetl + 3, temp_reg); + insn_printf("std Z+%d,r%d", lfsr->offseth, extra); + insn_printf("std Z+%d,r%d", lfsr->offseth + 1, extra2); + insn_printf("std Z+%d,r%d", lfsr->offseth + 2, extra3); + insn_printf("std Z+%d,r%d", lfsr->offseth + 3, s2_l); + release_reg(&s2_l); + + // LFSR2: state->s2_l = state->s2_h | (state->s3_l << 14) + // state->s2_h = state->s3_l >> 18 + lfsr = get_lfsr(2); + insn_printf("clr r%d", temp_reg); + insn_printf("ldd r%d,Z+%d", extra, lfsr->offsetl); // load s3_l[0..2] + insn_printf("ldd r%d,Z+%d", extra2, lfsr->offsetl + 1); + insn_printf("ldd r%d,Z+%d", extra3, lfsr->offsetl + 2); + shift_right_5_regs(s3_l, extra3, extra2, extra, temp_reg, 2); + lfsr = get_lfsr(1); + insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth); // s2_h[0] + insn_printf("std Z+%d,r%d", lfsr->offsetl, extra4); + insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth + 1); // s2_h[1] + insn_printf("or r%d,r%d", temp_reg, extra4); + insn_printf("std Z+%d,r%d", lfsr->offsetl + 1, temp_reg); + insn_printf("std Z+%d,r%d", lfsr->offsetl + 2, extra); + insn_printf("std Z+%d,r%d", lfsr->offsetl + 3, extra2); + insn_printf("std Z+%d,r%d", lfsr->offseth, extra3); + insn_printf("std Z+%d,r%d", lfsr->offseth + 1, s3_l); + release_reg(&s3_l); + + // LFSR3: state->s3_l = state->s3_h | (state->s4_l << 15) + // state->s3_h = state->s4_l >> 17 + lfsr = get_lfsr(3); + insn_printf("clr r%d", temp_reg); + insn_printf("ldd r%d,Z+%d", extra, lfsr->offsetl); // load s4_l[0..2] + insn_printf("ldd r%d,Z+%d", extra2, lfsr->offsetl + 1); + insn_printf("ldd r%d,Z+%d", extra3, lfsr->offsetl + 2); + shift_right_5_regs(s4_l, extra3, extra2, extra, temp_reg, 1); + lfsr = get_lfsr(2); + insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth); // s3_h[0] + insn_printf("std Z+%d,r%d", lfsr->offsetl, extra4); + insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth + 1); // s3_h[1] + insn_printf("or r%d,r%d", temp_reg, extra4); + insn_printf("std Z+%d,r%d", lfsr->offsetl + 1, temp_reg); + insn_printf("std Z+%d,r%d", lfsr->offsetl + 2, extra); + insn_printf("std Z+%d,r%d", lfsr->offsetl + 3, extra2); + insn_printf("std Z+%d,r%d", lfsr->offseth, extra3); + insn_printf("std Z+%d,r%d", lfsr->offseth + 1, s4_l); + release_reg(&s4_l); + + // LFSR4: state->s4_l = state->s4_h | (state->s5_l << 7) + // state->s4_h = state->s5_l >> 25 + lfsr = get_lfsr(4); + insn_printf("clr r%d", temp_reg); + insn_printf("ldd r%d,Z+%d", extra, lfsr->offsetl); // load s5_l[0..2] + insn_printf("ldd r%d,Z+%d", extra2, lfsr->offsetl + 1); + insn_printf("ldd r%d,Z+%d", extra3, lfsr->offsetl + 2); + shift_right_5_regs(s5_l, extra3, extra2, extra, temp_reg, 1); + lfsr = get_lfsr(3); + insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth); // s4_h + insn_printf("or r%d,r%d", temp_reg, extra4); + insn_printf("std Z+%d,r%d", lfsr->offsetl, temp_reg); + insn_printf("std Z+%d,r%d", lfsr->offsetl + 1, extra); + insn_printf("std Z+%d,r%d", lfsr->offsetl + 2, extra2); + insn_printf("std Z+%d,r%d", lfsr->offsetl + 3, extra3); + insn_printf("std Z+%d,r%d", lfsr->offseth, s5_l); + release_reg(&s5_l); + + // LFSR5: state->s5_l = state->s5_h | (state->s6_l << 5) + // state->s5_h = state->s6_l >> 27 + lfsr = get_lfsr(5); + insn_printf("clr r%d", temp_reg); + insn_printf("ldd r%d,Z+%d", extra, lfsr->offsetl); // load s6_l[0..2] + insn_printf("ldd r%d,Z+%d", extra2, lfsr->offsetl + 1); + insn_printf("ldd r%d,Z+%d", extra3, lfsr->offsetl + 2); + shift_right_5_regs(s6_l, extra3, extra2, extra, temp_reg, 3); + lfsr = get_lfsr(4); + insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth); // s5_h + insn_printf("or r%d,r%d", temp_reg, extra4); + insn_printf("std Z+%d,r%d", lfsr->offsetl, temp_reg); + insn_printf("std Z+%d,r%d", lfsr->offsetl + 1, extra); + insn_printf("std Z+%d,r%d", lfsr->offsetl + 2, extra2); + insn_printf("std Z+%d,r%d", lfsr->offsetl + 3, extra3); + insn_printf("std Z+%d,r%d", lfsr->offseth, s6_l); + release_reg(&s6_l); + + // LFSR6: state->s6_l = state->s6_h | (s7_l << 27) + // state->s6_h = s7_l >> 5 + lfsr = get_lfsr(5); + insn_printf("clr r%d", temp_reg); + shift_left_5_regs + (temp_reg, s7_l, s7_l_prev[2], s7_l_prev[1], s7_l_prev[0], 3); + insn_printf("ldd r%d,Z+%d", extra, lfsr->offseth); + insn_printf("std Z+%d,r%d", lfsr->offsetl, extra); + insn_printf("ldd r%d,Z+%d", extra, lfsr->offseth + 1); + insn_printf("std Z+%d,r%d", lfsr->offsetl + 1, extra); + insn_printf("ldd r%d,Z+%d", extra, lfsr->offseth + 2); + insn_printf("std Z+%d,r%d", lfsr->offsetl + 2, extra); + insn_printf("ldd r%d,Z+%d", extra, lfsr->offseth + 3); + insn_printf("or r%d,r%d", extra, s7_l_prev[0]); + insn_printf("std Z+%d,r%d", lfsr->offsetl + 3, extra); + insn_printf("std Z+%d,r%d", lfsr->offseth, s7_l_prev[1]); + insn_printf("std Z+%d,r%d", lfsr->offseth + 1, s7_l_prev[2]); + insn_printf("std Z+%d,r%d", lfsr->offseth + 2, s7_l); + insn_printf("std Z+%d,r%d", lfsr->offseth + 3, temp_reg); + release_reg(&s7_l_prev[0]); + release_reg(&s7_l_prev[1]); + release_reg(&s7_l_prev[2]); + release_reg(&s7_l); + + // Release temporary registers. + release_reg(&extra); + release_reg(&extra2); + release_reg(&extra3); + release_reg(&extra4); +} + +static void encrypt8(void) +{ + // Print the function header. + printf("static uint8_t acornEncrypt8\n"); + printf(" (Acorn128State *state, uint8_t plaintext, uint8_t ca, uint8_t cb)\n"); + printf("{\n"); + indent_printf("// Automatically generated by the genacorn tool.\n"); + indent_printf("uint8_t ciphertext;\n"); + indent_printf("__asm__ __volatile__ (\n"); + indent += 4; + + // Output the body of the function. + extract_sub_parts(); + update_lfsrs(); + generate_keystream(); + generate_feedback(0); + shift_down(); + + // Generate the final ciphertext. + insn_printf("mov %%0,%%2"); + insn_printf("eor %%0,r%d", ks); + release_reg(&ks); + + // Declare the registers that we need. + indent_printf(": \"=r\"(ciphertext)\n"); + indent_printf(": \"z\"(&state->s1_l), \"r\"(plaintext), \"r\"((uint8_t)ca), \"r\"((uint8_t)cb)\n"); + temp_regs(); + indent -= 4; + indent_printf(");\n"); + indent_printf("return ciphertext;\n"); + printf("}\n\n"); + check_regs(); +} + +static void decrypt8(void) +{ + // Print the function header. + printf("static uint8_t acornDecrypt8(Acorn128State *state, uint8_t ciphertext)\n"); + printf("{\n"); + indent_printf("// Automatically generated by the genacorn tool.\n"); + indent_printf("uint8_t plaintext;\n"); + indent_printf("__asm__ __volatile__ (\n"); + indent += 4; + + // Output the body of the function. + extract_sub_parts(); + update_lfsrs(); + generate_keystream(); + generate_feedback(1); + shift_down(); + + // Generate the final plaintext. + insn_printf("mov %%0,r%d", ks); + release_reg(&ks); + + // Declare the registers that we need. + indent_printf(": \"=r\"(plaintext)\n"); + indent_printf(": \"z\"(&state->s1_l), \"r\"(ciphertext)\n"); + temp_regs(); + indent -= 4; + indent_printf(");\n"); + indent_printf("return plaintext;\n"); + printf("}\n\n"); + check_regs(); +} + +static void encrypt32(void) +{ + // Print the function header. + printf("uint32_t acornEncrypt32\n"); + printf(" (Acorn128State *state, uint32_t plaintext, uint32_t ca, uint32_t cb)\n"); + printf("{\n"); + indent_printf("// Automatically generated by the genacorn tool.\n"); + indent_printf("uint32_t ciphertext;\n"); + indent_printf("__asm__ __volatile__ (\n"); + indent += 4; + + // Output the body of the function as 4 rounds for each byte in the word. + // The shift-down step is delayed until after all 4 rounds are complete. + lfsr_offset = 0; + is_32bit_version = 1; + extract_sub_parts(); + update_lfsrs(); + generate_keystream(); + generate_feedback(0); + shift_down_step(); + lfsr_offset = 1; + extract_sub_parts(); + update_lfsrs(); + generate_keystream(); + generate_feedback(0); + shift_down_step(); + lfsr_offset = 2; + extract_sub_parts(); + update_lfsrs(); + generate_keystream(); + generate_feedback(0); + shift_down_step(); + lfsr_offset = 3; + extract_sub_parts(); + update_lfsrs(); + generate_keystream(); + generate_feedback(0); + shift_down_final(); + lfsr_offset = 0; + is_32bit_version = 0; + + // Declare the registers that we need. + indent_printf(": \"=Q\"(ciphertext)\n"); + indent_printf(": \"z\"(&state->s1_l), \"Q\"(plaintext), \"r\"((uint8_t)ca), \"r\"((uint8_t)cb)\n"); + temp_regs(); + indent -= 4; + indent_printf(");\n"); + indent_printf("return ciphertext;\n"); + printf("}\n\n"); + check_regs(); +} + +int main(int argc, char *argv[]) +{ + encrypt8(); + decrypt8(); + encrypt32(); + return 0; +} diff --git a/libraries/CryptoLW/examples/TestAcorn/TestAcorn.ino b/libraries/CryptoLW/examples/TestAcorn/TestAcorn.ino index 7c6d4c39..2be29e92 100644 --- a/libraries/CryptoLW/examples/TestAcorn/TestAcorn.ino +++ b/libraries/CryptoLW/examples/TestAcorn/TestAcorn.ino @@ -147,7 +147,7 @@ TestVector testVector; Acorn128 acorn; -byte buffer[MAX_PLAINTEXT_LEN]; +byte buffer[128]; bool testCipher_N(Acorn128 *cipher, const struct TestVector *test, size_t inc) { diff --git a/libraries/CryptoLW/src/Acorn128.cpp b/libraries/CryptoLW/src/Acorn128.cpp index 4b4bb59f..f4be4af8 100644 --- a/libraries/CryptoLW/src/Acorn128.cpp +++ b/libraries/CryptoLW/src/Acorn128.cpp @@ -99,6 +99,8 @@ size_t Acorn128::tagSize() const #define CB_0_BYTE ((uint8_t)0x00) #define CB_1_BYTE ((uint8_t)0xFF) +#if defined(CRYPTO_ACORN128_DEFAULT) || defined(CRYPTO_DOC) + // maj() and ch() functions for mixing the state. #define maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) #define ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z))) @@ -446,13 +448,21 @@ static inline uint32_t acornDecrypt32(Acorn128State *state, uint32_t ciphertext) return plaintext; } +#elif defined(CRYPTO_ACORN128_AVR) + +// Import definitions from Acorn128AVR.cpp +extern uint32_t acornEncrypt32 + (Acorn128State *state, uint32_t plaintext, uint32_t ca, uint32_t cb); + +#endif // CRYPTO_ACORN128_AVR + /** * \brief Adds 256 bits of padding to the Acorn128 state. * * \param state The state for the Acorn128 cipher. * \param cb The cb constant for the padding block. */ -static void acornPad(Acorn128State *state, uint32_t cb) +void acornPad(Acorn128State *state, uint32_t cb) { acornEncrypt32(state, 1, CA_1, cb); acornEncrypt32(state, 0, CA_1, cb); @@ -539,6 +549,8 @@ bool Acorn128::setIV(const uint8_t *iv, size_t len) return true; } +#if defined(CRYPTO_ACORN128_DEFAULT) || defined(CRYPTO_DOC) + void Acorn128::encrypt(uint8_t *output, const uint8_t *input, size_t len) { uint32_t temp; @@ -616,6 +628,8 @@ void Acorn128::addAuthData(const void *data, size_t len) } } +#endif // CRYPTO_ACORN128_DEFAULT + void Acorn128::computeTag(void *tag, size_t len) { // Finalize the data and apply padding. diff --git a/libraries/CryptoLW/src/Acorn128.h b/libraries/CryptoLW/src/Acorn128.h index 1bca355e..8337e82e 100644 --- a/libraries/CryptoLW/src/Acorn128.h +++ b/libraries/CryptoLW/src/Acorn128.h @@ -55,6 +55,13 @@ typedef struct } Acorn128State; +// Determine which Acorn128 implementation to export to applications. +#if defined(__AVR__) +#define CRYPTO_ACORN128_AVR 1 +#else +#define CRYPTO_ACORN128_DEFAULT 1 +#endif + /** @endcond */ class Acorn128 : public AuthenticatedCipher diff --git a/libraries/CryptoLW/src/Acorn128AVR.cpp b/libraries/CryptoLW/src/Acorn128AVR.cpp new file mode 100644 index 00000000..35b84bc8 --- /dev/null +++ b/libraries/CryptoLW/src/Acorn128AVR.cpp @@ -0,0 +1,556 @@ +/* + * Copyright (C) 2018 Southern Storm Software, Pty Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "Acorn128.h" + +#if defined(CRYPTO_ACORN128_AVR) + +// Acorn128 constants for ca and cb. +#define CA_0 ((uint32_t)0x00000000) +#define CA_1 ((uint32_t)0xFFFFFFFF) +#define CB_0 ((uint32_t)0x00000000) +#define CB_1 ((uint32_t)0xFFFFFFFF) +#define CA_0_BYTE ((uint8_t)0x00) +#define CA_1_BYTE ((uint8_t)0xFF) +#define CB_0_BYTE ((uint8_t)0x00) +#define CB_1_BYTE ((uint8_t)0xFF) + +// Imports from Acorn128.cpp +extern void acornPad(Acorn128State *state, uint32_t cb); + +// Force the acornEncrypt8() and acornDecrypt8() functions to always inline. +static uint8_t acornEncrypt8 + (Acorn128State *state, uint8_t plaintext, uint8_t ca, uint8_t cb) + __attribute__((always_inline)); +static uint8_t acornDecrypt8(Acorn128State *state, uint8_t ciphertext) + __attribute__((always_inline)); + +static uint8_t acornEncrypt8 + (Acorn128State *state, uint8_t plaintext, uint8_t ca, uint8_t cb) +{ + // Automatically generated by the genacorn tool. + uint8_t ciphertext; + __asm__ __volatile__ ( + "ldd r15,Z+34\n" + "ldd r14,Z+33\n" + "ldd r0,Z+32\n" + "lsl r0\n" + "rol r14\n" + "rol r15\n" + "lsl r0\n" + "rol r14\n" + "rol r15\n" + "lsl r0\n" + "rol r14\n" + "ldd r13,Z+28\n" + "ldd r0,Z+29\n" + "lsr r0\n" + "ror r13\n" + "lsr r0\n" + "ror r13\n" + "lsr r0\n" + "ror r13\n" + "ldd r0,Z+20\n" + "ldd r12,Z+21\n" + "lsl r0\n" + "rol r12\n" + "lsl r0\n" + "rol r12\n" + "ldd r16,Z+16\n" + "ldd r17,Z+17\n" + "swap r16\n" + "swap r17\n" + "andi r16,0x0F\n" + "andi r17,0xF0\n" + "or r16,r17\n" + "ldd r0,Z+8\n" + "ldd r11,Z+9\n" + "lsl r0\n" + "rol r11\n" + "lsl r0\n" + "rol r11\n" + "lsl r0\n" + "rol r11\n" + "ldd r10,Z+3\n" + "ldd r9,Z+2\n" + "ldd r0,Z+1\n" + "lsl r0\n" + "rol r9\n" + "rol r10\n" + "lsl r0\n" + "rol r9\n" + "lsl r0\n" + "rol r9\n" + "lsl r0\n" + "rol r9\n" + "ldd r8,Z+40\n" + "eor r8,r14\n" + "ldd r7,Z+32\n" + "eor r8,r7\n" + "eor r7,r13\n" + "ldd r6,Z+28\n" + "eor r7,r6\n" + "eor r6,r12\n" + "ldd r23,Z+20\n" + "eor r6,r23\n" + "eor r23,r16\n" + "ldd r22,Z+16\n" + "eor r23,r22\n" + "eor r22,r11\n" + "ldd r21,Z+8\n" + "eor r22,r21\n" + "eor r21,r10\n" + "ld r20,Z\n" + "eor r21,r20\n" + "eor r9,r23\n" + "mov r0,r14\n" + "and r0,r21\n" + "eor r9,r0\n" + "and r14,r6\n" + "eor r9,r14\n" + "mov r0,r21\n" + "and r0,r6\n" + "eor r9,r0\n" + "and r16,r7\n" + "eor r9,r16\n" + "mov r0,r7\n" + "com r0\n" + "and r11,r0\n" + "eor r9,r11\n" + "mov r16,r22\n" + "com r16\n" + "eor r16,r20\n" + "mov r0,r15\n" + "and r0,r10\n" + "eor r16,r0\n" + "and r10,r12\n" + "eor r16,r10\n" + "and r15,r12\n" + "eor r16,r15\n" + "and r13,%3\n" + "eor r16,r13\n" + "mov r0,r9\n" + "and r0,%4\n" + "eor r16,r0\n" + "eor r16,%2\n" + "swap r16\n" + "mov r17,r16\n" + "andi r17,0xF0\n" + "eor r8,r17\n" + "andi r16,0x0F\n" + "std Z+40,r16\n" + "ldd r0,Z+33\n" + "std Z+32,r0\n" + "ldd r0,Z+34\n" + "std Z+33,r0\n" + "ldd r0,Z+35\n" + "std Z+34,r0\n" + "ldd r0,Z+36\n" + "std Z+35,r0\n" + "ldd r0,Z+37\n" + "std Z+36,r0\n" + "ldd r0,Z+38\n" + "std Z+37,r0\n" + "ldd r0,Z+39\n" + "clr r15\n" + "lsl r8\n" + "rol r15\n" + "lsl r8\n" + "rol r15\n" + "lsl r8\n" + "rol r15\n" + "or r0,r8\n" + "std Z+38,r0\n" + "std Z+39,r15\n" + "ldd r0,Z+29\n" + "std Z+28,r0\n" + "ldd r0,Z+30\n" + "std Z+29,r0\n" + "ldd r0,Z+31\n" + "std Z+30,r0\n" + "ldd r0,Z+26\n" + "clr r15\n" + "lsr r7\n" + "ror r15\n" + "lsr r7\n" + "ror r15\n" + "lsr r7\n" + "ror r15\n" + "or r0,r15\n" + "std Z+31,r0\n" + "std Z+26,r7\n" + "ldd r0,Z+21\n" + "std Z+20,r0\n" + "ldd r0,Z+22\n" + "std Z+21,r0\n" + "ldd r0,Z+23\n" + "std Z+22,r0\n" + "ldd r0,Z+24\n" + "clr r15\n" + "lsr r6\n" + "ror r15\n" + "or r0,r15\n" + "std Z+23,r0\n" + "std Z+24,r6\n" + "ldd r0,Z+17\n" + "std Z+16,r0\n" + "ldd r0,Z+18\n" + "std Z+17,r0\n" + "ldd r0,Z+19\n" + "std Z+18,r0\n" + "ldd r0,Z+14\n" + "std Z+19,r0\n" + "ldd r0,Z+15\n" + "clr r15\n" + "lsr r23\n" + "ror r15\n" + "or r0,r15\n" + "std Z+14,r0\n" + "std Z+15,r23\n" + "ldd r0,Z+9\n" + "std Z+8,r0\n" + "ldd r0,Z+10\n" + "std Z+9,r0\n" + "ldd r0,Z+11\n" + "std Z+10,r0\n" + "ldd r0,Z+12\n" + "std Z+11,r0\n" + "ldd r0,Z+13\n" + "clr r15\n" + "lsr r22\n" + "ror r15\n" + "lsr r22\n" + "ror r15\n" + "or r0,r15\n" + "std Z+12,r0\n" + "std Z+13,r22\n" + "ldd r0,Z+1\n" + "st Z,r0\n" + "ldd r0,Z+2\n" + "std Z+1,r0\n" + "ldd r0,Z+3\n" + "std Z+2,r0\n" + "ldd r0,Z+4\n" + "std Z+3,r0\n" + "ldd r0,Z+5\n" + "std Z+4,r0\n" + "ldd r0,Z+6\n" + "std Z+5,r0\n" + "ldd r0,Z+7\n" + "clr r15\n" + "lsr r21\n" + "ror r15\n" + "lsr r21\n" + "ror r15\n" + "lsr r21\n" + "ror r15\n" + "or r0,r15\n" + "std Z+6,r0\n" + "std Z+7,r21\n" + "mov %0,%2\n" + "eor %0,r9\n" + : "=r"(ciphertext) + : "z"(&state->s1_l), "r"(plaintext), "r"((uint8_t)ca), "r"((uint8_t)cb) + : "r16", "r17", "r20", "r21", "r22", "r23", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "memory" + ); + return ciphertext; +} + +static uint8_t acornDecrypt8(Acorn128State *state, uint8_t ciphertext) +{ + // Automatically generated by the genacorn tool. + uint8_t plaintext; + __asm__ __volatile__ ( + "ldd r15,Z+34\n" + "ldd r14,Z+33\n" + "ldd r0,Z+32\n" + "lsl r0\n" + "rol r14\n" + "rol r15\n" + "lsl r0\n" + "rol r14\n" + "rol r15\n" + "lsl r0\n" + "rol r14\n" + "ldd r13,Z+28\n" + "ldd r0,Z+29\n" + "lsr r0\n" + "ror r13\n" + "lsr r0\n" + "ror r13\n" + "lsr r0\n" + "ror r13\n" + "ldd r0,Z+20\n" + "ldd r12,Z+21\n" + "lsl r0\n" + "rol r12\n" + "lsl r0\n" + "rol r12\n" + "ldd r16,Z+16\n" + "ldd r17,Z+17\n" + "swap r16\n" + "swap r17\n" + "andi r16,0x0F\n" + "andi r17,0xF0\n" + "or r16,r17\n" + "ldd r0,Z+8\n" + "ldd r11,Z+9\n" + "lsl r0\n" + "rol r11\n" + "lsl r0\n" + "rol r11\n" + "lsl r0\n" + "rol r11\n" + "ldd r10,Z+3\n" + "ldd r9,Z+2\n" + "ldd r0,Z+1\n" + "lsl r0\n" + "rol r9\n" + "rol r10\n" + "lsl r0\n" + "rol r9\n" + "lsl r0\n" + "rol r9\n" + "lsl r0\n" + "rol r9\n" + "ldd r8,Z+40\n" + "eor r8,r14\n" + "ldd r7,Z+32\n" + "eor r8,r7\n" + "eor r7,r13\n" + "ldd r6,Z+28\n" + "eor r7,r6\n" + "eor r6,r12\n" + "ldd r23,Z+20\n" + "eor r6,r23\n" + "eor r23,r16\n" + "ldd r22,Z+16\n" + "eor r23,r22\n" + "eor r22,r11\n" + "ldd r21,Z+8\n" + "eor r22,r21\n" + "eor r21,r10\n" + "ld r20,Z\n" + "eor r21,r20\n" + "eor r9,r23\n" + "mov r0,r14\n" + "and r0,r21\n" + "eor r9,r0\n" + "and r14,r6\n" + "eor r9,r14\n" + "mov r0,r21\n" + "and r0,r6\n" + "eor r9,r0\n" + "and r16,r7\n" + "eor r9,r16\n" + "mov r0,r7\n" + "com r0\n" + "and r11,r0\n" + "eor r9,r11\n" + "mov r16,r22\n" + "com r16\n" + "eor r16,r20\n" + "mov r0,r15\n" + "and r0,r10\n" + "eor r16,r0\n" + "and r10,r12\n" + "eor r16,r10\n" + "and r15,r12\n" + "eor r16,r15\n" + "eor r16,r13\n" + "eor r9,%2\n" + "eor r16,r9\n" + "swap r16\n" + "mov r17,r16\n" + "andi r17,0xF0\n" + "eor r8,r17\n" + "andi r16,0x0F\n" + "std Z+40,r16\n" + "ldd r0,Z+33\n" + "std Z+32,r0\n" + "ldd r0,Z+34\n" + "std Z+33,r0\n" + "ldd r0,Z+35\n" + "std Z+34,r0\n" + "ldd r0,Z+36\n" + "std Z+35,r0\n" + "ldd r0,Z+37\n" + "std Z+36,r0\n" + "ldd r0,Z+38\n" + "std Z+37,r0\n" + "ldd r0,Z+39\n" + "clr r15\n" + "lsl r8\n" + "rol r15\n" + "lsl r8\n" + "rol r15\n" + "lsl r8\n" + "rol r15\n" + "or r0,r8\n" + "std Z+38,r0\n" + "std Z+39,r15\n" + "ldd r0,Z+29\n" + "std Z+28,r0\n" + "ldd r0,Z+30\n" + "std Z+29,r0\n" + "ldd r0,Z+31\n" + "std Z+30,r0\n" + "ldd r0,Z+26\n" + "clr r15\n" + "lsr r7\n" + "ror r15\n" + "lsr r7\n" + "ror r15\n" + "lsr r7\n" + "ror r15\n" + "or r0,r15\n" + "std Z+31,r0\n" + "std Z+26,r7\n" + "ldd r0,Z+21\n" + "std Z+20,r0\n" + "ldd r0,Z+22\n" + "std Z+21,r0\n" + "ldd r0,Z+23\n" + "std Z+22,r0\n" + "ldd r0,Z+24\n" + "clr r15\n" + "lsr r6\n" + "ror r15\n" + "or r0,r15\n" + "std Z+23,r0\n" + "std Z+24,r6\n" + "ldd r0,Z+17\n" + "std Z+16,r0\n" + "ldd r0,Z+18\n" + "std Z+17,r0\n" + "ldd r0,Z+19\n" + "std Z+18,r0\n" + "ldd r0,Z+14\n" + "std Z+19,r0\n" + "ldd r0,Z+15\n" + "clr r15\n" + "lsr r23\n" + "ror r15\n" + "or r0,r15\n" + "std Z+14,r0\n" + "std Z+15,r23\n" + "ldd r0,Z+9\n" + "std Z+8,r0\n" + "ldd r0,Z+10\n" + "std Z+9,r0\n" + "ldd r0,Z+11\n" + "std Z+10,r0\n" + "ldd r0,Z+12\n" + "std Z+11,r0\n" + "ldd r0,Z+13\n" + "clr r15\n" + "lsr r22\n" + "ror r15\n" + "lsr r22\n" + "ror r15\n" + "or r0,r15\n" + "std Z+12,r0\n" + "std Z+13,r22\n" + "ldd r0,Z+1\n" + "st Z,r0\n" + "ldd r0,Z+2\n" + "std Z+1,r0\n" + "ldd r0,Z+3\n" + "std Z+2,r0\n" + "ldd r0,Z+4\n" + "std Z+3,r0\n" + "ldd r0,Z+5\n" + "std Z+4,r0\n" + "ldd r0,Z+6\n" + "std Z+5,r0\n" + "ldd r0,Z+7\n" + "clr r15\n" + "lsr r21\n" + "ror r15\n" + "lsr r21\n" + "ror r15\n" + "lsr r21\n" + "ror r15\n" + "or r0,r15\n" + "std Z+6,r0\n" + "std Z+7,r21\n" + "mov %0,r9\n" + : "=r"(plaintext) + : "z"(&state->s1_l), "r"(ciphertext) + : "r16", "r17", "r20", "r21", "r22", "r23", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "memory" + ); + return plaintext; +} + +uint32_t acornEncrypt32 + (Acorn128State *state, uint32_t plaintext, uint32_t ca, uint32_t cb) +{ + uint32_t ciphertext; + ciphertext = acornEncrypt8 + (state, (uint8_t)plaintext, (uint8_t)ca, (uint8_t)cb); + ciphertext |= ((uint32_t)(acornEncrypt8 + (state, (uint8_t)(plaintext >> 8), (uint8_t)ca, (uint8_t)cb))) << 8; + ciphertext |= ((uint32_t)(acornEncrypt8 + (state, (uint8_t)(plaintext >> 16), (uint8_t)ca, (uint8_t)cb))) << 16; + ciphertext |= ((uint32_t)(acornEncrypt8 + (state, (uint8_t)(plaintext >> 24), (uint8_t)ca, (uint8_t)cb))) << 24; + return ciphertext; +} + +void Acorn128::encrypt(uint8_t *output, const uint8_t *input, size_t len) +{ + uint32_t temp; + if (!state.authDone) { + acornPad(&state, CB_1); + state.authDone = 1; + } + while (len > 0) { + *output++ = acornEncrypt8(&state, *input++, CA_1_BYTE, CB_0_BYTE); + --len; + } +} + +void Acorn128::decrypt(uint8_t *output, const uint8_t *input, size_t len) +{ + uint32_t temp; + if (!state.authDone) { + acornPad(&state, CB_1); + state.authDone = 1; + } + while (len > 0) { + *output++ = acornDecrypt8(&state, *input++); + --len; + } +} + +void Acorn128::addAuthData(const void *data, size_t len) +{ + if (state.authDone) + return; + const uint8_t *input = (const uint8_t *)data; + while (len > 0) { + acornEncrypt8(&state, *input++, CA_1_BYTE, CB_1_BYTE); + --len; + } +} + +#endif // CRYPTO_ACORN128_AVR