1
0
mirror of https://github.com/taigrr/arduinolibs synced 2025-01-18 04:33:12 -08:00

AVR assembly version of Acorn128

This commit is contained in:
Rhys Weatherley 2018-04-26 06:50:39 +10:00
parent 91bffb9d1f
commit 4078351503
6 changed files with 1522 additions and 2 deletions

View File

@ -119,6 +119,7 @@ Ardunino Mega 2560 running at 16 MHz are similar:
<tr><td>EAX&lt;AES256&gt;</td><td align="right">97.80us</td><td align="right">97.80us</td><td align="right">1806.57us</td><td align="right">332</td></tr> <tr><td>EAX&lt;AES256&gt;</td><td align="right">97.80us</td><td align="right">97.80us</td><td align="right">1806.57us</td><td align="right">332</td></tr>
<tr><td>EAX&lt;Speck&gt; (256-bit key)</td><td align="right">25.89us</td><td align="right">25.88us</td><td align="right">690.63us</td><td align="right">362</td></tr> <tr><td>EAX&lt;Speck&gt; (256-bit key)</td><td align="right">25.89us</td><td align="right">25.88us</td><td align="right">690.63us</td><td align="right">362</td></tr>
<tr><td>EAX&lt;SpeckTiny&gt; (256-bit key)</td><td align="right">78.20us</td><td align="right">78.20us</td><td align="right">1269.19us</td><td align="right">122</td></tr> <tr><td>EAX&lt;SpeckTiny&gt; (256-bit key)</td><td align="right">78.20us</td><td align="right">78.20us</td><td align="right">1269.19us</td><td align="right">122</td></tr>
<tr><td>Acorn128</td><td align="right">20.39us</td><td align="right">20.06us</td><td align="right">4817.82us</td><td align="right">60</td></tr>
<tr><td colspan="5"> </td></tr> <tr><td colspan="5"> </td></tr>
<tr><td>Hash Algorithm</td><td align="right">Hashing (per byte)</td><td align="right">Finalization</td><td> </td><td>State Size (bytes)</td></tr> <tr><td>Hash Algorithm</td><td align="right">Hashing (per byte)</td><td align="right">Finalization</td><td> </td><td>State Size (bytes)</td></tr>
<tr><td>SHA256</td><td align="right">43.85us</td><td align="right">2841.04us</td><td align="right"> </td><td align="right">107</td></tr> <tr><td>SHA256</td><td align="right">43.85us</td><td align="right">2841.04us</td><td align="right"> </td><td align="right">107</td></tr>

942
gen/genacorn.c Normal file
View File

@ -0,0 +1,942 @@
/*
* Copyright (C) 2018 Southern Storm Software, Pty Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
// Special-purpose compiler that generates the AVR version of Acorn128.
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
static int indent = 4;
static const int temp_reg = 0; // Register number for the AVR "__tmp_reg__".
// Information about the positions and lengths of the LFSR's.
typedef struct
{
int start; // Bit position where the LFSR starts.
int len; // Length of the LFSR in bits.
int offsetl; // Offset of the low word of the LFSR in the state.
int offseth; // Offset of the high word of the LFSR in the state.
} LFSR;
#define num_lfsrs 7
static LFSR const lfsr[num_lfsrs] = {
{0, 61, 0, 4},
{61, 46, 8, 12},
{107, 47, 16, 14},
{154, 39, 20, 24},
{193, 37, 28, 26},
{230, 59, 32, 36},
{289, 4, 40, 40},
};
// LFSR byte offset for generating 32-bit versions of the code.
static int lfsr_offset = 0;
// Non-zero to generate the 32-bit version.
static int is_32bit_version = 0;
// Registers that can be used for temporary values, in the best
// order to allocate them. High registers are listed first.
static int regs[] = {
16, 17, 18, 19, 20, 21, 22, 23, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
};
#define num_regs (sizeof(regs) / sizeof(regs[0]))
// Which registers are currently in use?
static int reg_in_use[num_regs] = {0};
// Which registers did we use while generating the code?
static int reg_used[num_regs] = {0};
// Registers that have been allocated to specific purposes.
static int s244 = -1;
static int s235 = -1;
static int s196 = -1;
static int s160 = -1;
static int s111 = -1;
static int s66 = -1;
static int s23 = -1;
static int s12 = -1;
static int s1_l = -1;
static int s2_l = -1;
static int s3_l = -1;
static int s4_l = -1;
static int s5_l = -1;
static int s6_l = -1;
static int s7_l = -1;
static int s7_l_prev[3] = {-1, -1, -1};
static int ks = -1;
static int f = -1;
// Indent the code and print a string.
void indent_printf(const char *format, ...)
{
va_list va;
int posn;
va_start(va, format);
for (posn = 0; posn < indent; ++posn)
putc(' ', stdout);
vfprintf(stdout, format, va);
va_end(va);
}
// Print an assembler instruction within quotes.
void insn_printf(const char *format, ...)
{
va_list va;
int posn;
va_start(va, format);
for (posn = 0; posn < indent; ++posn)
putc(' ', stdout);
putc('"', stdout);
vfprintf(stdout, format, va);
putc('\\', stdout);
putc('n', stdout);
putc('"', stdout);
putc('\n', stdout);
va_end(va);
}
// Allocate an unused register, starting with high registers.
static void alloc_high_reg(int *reg)
{
unsigned index;
if (*reg != -1) {
fprintf(stderr, "Temporary register wasn't previously released\n");
exit(1);
}
for (index = 0; index < num_regs; ++index) {
if (!reg_in_use[index]) {
reg_in_use[index] = 1;
reg_used[index] = 1;
*reg = regs[index];
if (*reg < 16) {
fprintf(stderr, "Ran out of temporary high registers\n");
exit(1);
}
return;
}
}
fprintf(stderr, "Ran out of temporary registers\n");
exit(1);
}
// Allocate an unused register, starting with low registers
// because we know we won't need the value in a high reg later.
static void alloc_low_reg(int *reg)
{
unsigned index;
if (*reg != -1) {
fprintf(stderr, "Temporary register wasn't previously released\n");
exit(1);
}
for (index = num_regs; index > 0; --index) {
if (!reg_in_use[index - 1]) {
reg_in_use[index - 1] = 1;
reg_used[index - 1] = 1;
*reg = regs[index - 1];
return;
}
}
fprintf(stderr, "Ran out of temporary registers\n");
exit(1);
}
// Release a register back to the allocation pool.
static void release_reg(int *reg)
{
unsigned index;
for (index = 0; index < num_regs; ++index) {
if (regs[index] == *reg && reg_in_use[index]) {
reg_in_use[index] = 0;
*reg = -1;
return;
}
}
fprintf(stderr, "Released a register that was not in use\n");
exit(1);
}
// Check that we have a high register when we need one.
static void check_high_reg(int *reg)
{
if ((*reg) < 16) {
fprintf(stderr, "r%d is not a high register\n", *reg);
exit(1);
}
}
// Check that all temporary registers have been released.
static void check_regs(void)
{
unsigned index;
for (index = 0; index < num_regs; ++index) {
if (reg_in_use[index]) {
fprintf(stderr, "Register r%d has not been released\n",
regs[index]);
exit(1);
}
}
}
// Print the names of the temporary registers that we used.
static void temp_regs(void)
{
unsigned index;
int first = 1;
indent_printf(": ");
for (index = 0; index < num_regs; ++index) {
if (reg_used[index]) {
if (first) {
first = 0;
printf("\"r%d\"", regs[index]);
} else {
printf(", \"r%d\"", regs[index]);
}
}
}
printf(", \"memory\"\n");
}
// Find the information about a specific LFSR.
static const LFSR *find_lfsr(int bit)
{
unsigned index;
for (index = 0; index < num_lfsrs; ++index) {
if (bit >= lfsr[index].start &&
bit < (lfsr[index].start + lfsr[index].len))
return &(lfsr[index]);
}
return &(lfsr[num_lfsrs - 1]);
}
// Gets the information for a specific LFSR from 0 to 6.
static const LFSR *get_lfsr(int num)
{
return &(lfsr[num]);
}
// Shift a two-register value left by a number of bits.
static void shift_left_2_regs(int reg1, int reg2, int count)
{
while (count > 0) {
insn_printf("lsl r%d", reg2);
insn_printf("rol r%d", reg1);
--count;
}
}
// Shift a three-register value left by a number of bits.
static void shift_left_3_regs(int reg1, int reg2, int reg3, int count)
{
while (count > 0) {
insn_printf("lsl r%d", reg3);
insn_printf("rol r%d", reg2);
insn_printf("rol r%d", reg1);
--count;
}
}
// Shift a five-register value left by a number of bits.
static void shift_left_5_regs
(int reg1, int reg2, int reg3, int reg4, int reg5, int count)
{
while (count > 0) {
insn_printf("lsl r%d", reg5);
insn_printf("rol r%d", reg4);
insn_printf("rol r%d", reg3);
insn_printf("rol r%d", reg2);
insn_printf("rol r%d", reg1);
--count;
}
}
// Shift a two-register value right by a number of bits.
static void shift_right_2_regs(int reg1, int reg2, int count)
{
while (count > 0) {
insn_printf("lsr r%d", reg1);
insn_printf("ror r%d", reg2);
--count;
}
}
// Shift a five-register value right by a number of bits.
static void shift_right_5_regs
(int reg1, int reg2, int reg3, int reg4, int reg5, int count)
{
while (count > 0) {
insn_printf("lsr r%d", reg1);
insn_printf("ror r%d", reg2);
insn_printf("ror r%d", reg3);
insn_printf("ror r%d", reg4);
insn_printf("ror r%d", reg5);
--count;
}
}
// Extracts one part from the state as a byte.
static void extract_one_part(int reg, int bit)
{
const LFSR *lfsr = find_lfsr(bit);
int offset;
bit -= lfsr->start;
offset = lfsr->offsetl + lfsr_offset + (bit / 8);
bit %= 8;
if (bit < 4) {
insn_printf("ldd r%d,Z+%d", reg, offset);
insn_printf("ldd r%d,Z+%d", temp_reg, offset + 1);
shift_right_2_regs(temp_reg, reg, bit);
} else if (bit > 4) {
insn_printf("ldd r%d,Z+%d", temp_reg, offset);
insn_printf("ldd r%d,Z+%d", reg, offset + 1);
shift_left_2_regs(reg, temp_reg, 8 - bit);
} else {
int extra_reg = -1;
alloc_high_reg(&extra_reg);
insn_printf("ldd r%d,Z+%d", reg, offset);
insn_printf("ldd r%d,Z+%d", extra_reg, offset + 1);
insn_printf("swap r%d", reg);
insn_printf("swap r%d", extra_reg);
check_high_reg(&reg);
check_high_reg(&extra_reg);
insn_printf("andi r%d,0x0F", reg); // Assumes reg and extra_reg
insn_printf("andi r%d,0xF0", extra_reg); // are high registers.
insn_printf("or r%d,r%d", reg, extra_reg);
release_reg(&extra_reg);
}
}
// Extracts two parts from the state as bytes. Both parts are
// assumed to be within the same 3-byte region within the state
// and that they don't overlap. We also assume that bit1 > bit2.
static void extract_two_parts(int reg1, int reg2, int bit1, int bit2)
{
const LFSR *lfsr = find_lfsr(bit1);
int offset, count;
bit1 -= lfsr->start;
bit2 -= lfsr->start;
offset = lfsr->offsetl + lfsr_offset + (bit2 / 8);
insn_printf("ldd r%d,Z+%d", reg1, offset + 2);
insn_printf("ldd r%d,Z+%d", reg2, offset + 1);
insn_printf("ldd r%d,Z+%d", temp_reg, offset);
count = 8 - (bit1 % 8);
shift_left_3_regs(reg1, reg2, temp_reg, count);
count = bit1 - (bit2 + 8);
shift_left_2_regs(reg2, temp_reg, count);
}
// Extract out various sub-parts of the state as 8-bit bytes.
// We do this by extracting two bytes around the one we want
// and then shifting it left or right until it is byte-aligned.
// Sometimes there is overlap and we can extract 3 bytes and shift.
static void extract_sub_parts(void)
{
// LFSR6
alloc_low_reg(&s244);
alloc_low_reg(&s235);
extract_two_parts(s244, s235, 244, 235);
// LFSR5
alloc_low_reg(&s196);
extract_one_part(s196, 196);
// LFSR4
alloc_low_reg(&s160);
extract_one_part(s160, 160);
// LFSR3
alloc_high_reg(&s111);
extract_one_part(s111, 111);
// LFSR2
alloc_low_reg(&s66);
extract_one_part(s66, 66);
// LFSR1
alloc_low_reg(&s23);
alloc_low_reg(&s12);
extract_two_parts(s23, s12, 23, 12);
}
// Update the LFSR's.
static void update_lfsrs(void)
{
int offset;
// LFSR7: if the offset is non-zero then we still have the s7_l
// value from a previous shift_down_step() call in a register.
if (lfsr_offset == 0) {
alloc_low_reg(&s7_l);
insn_printf("ldd r%d,Z+%d", s7_l, get_lfsr(6)->offsetl + lfsr_offset);
}
insn_printf("eor r%d,r%d", s7_l, s235);
alloc_low_reg(&s6_l);
insn_printf("ldd r%d,Z+%d", s6_l, get_lfsr(5)->offsetl + lfsr_offset);
insn_printf("eor r%d,r%d", s7_l, s6_l);
// LFSR6
alloc_low_reg(&s5_l);
insn_printf("eor r%d,r%d", s6_l, s196);
insn_printf("ldd r%d,Z+%d", s5_l, get_lfsr(4)->offsetl + lfsr_offset);
insn_printf("eor r%d,r%d", s6_l, s5_l);
// LFSR5
alloc_low_reg(&s4_l);
insn_printf("eor r%d,r%d", s5_l, s160);
insn_printf("ldd r%d,Z+%d", s4_l, get_lfsr(3)->offsetl + lfsr_offset);
insn_printf("eor r%d,r%d", s5_l, s4_l);
// LFSR4
alloc_low_reg(&s3_l);
insn_printf("eor r%d,r%d", s4_l, s111);
insn_printf("ldd r%d,Z+%d", s3_l, get_lfsr(2)->offsetl + lfsr_offset);
insn_printf("eor r%d,r%d", s4_l, s3_l);
// LFSR3
alloc_low_reg(&s2_l);
insn_printf("eor r%d,r%d", s3_l, s66);
insn_printf("ldd r%d,Z+%d", s2_l, get_lfsr(1)->offsetl + lfsr_offset);
insn_printf("eor r%d,r%d", s3_l, s2_l);
// LFSR2
alloc_low_reg(&s1_l);
insn_printf("eor r%d,r%d", s2_l, s23);
offset = get_lfsr(0)->offsetl + lfsr_offset;
if (offset != 0)
insn_printf("ldd r%d,Z+%d", s1_l, offset);
else
insn_printf("ld r%d,Z", s1_l);
insn_printf("eor r%d,r%d", s2_l, s1_l);
}
// Generate the next 8 keystream bits.
static void generate_keystream(void)
{
// ks = s12 ^ state->s4_l ^
// maj(s235, state->s2_l, state->s5_l) ^
// ch(state->s6_l, s111, s66);
ks = s12;
s12 = -1;
insn_printf("eor r%d,r%d", ks, s4_l);
// ks ^= maj(s235, state->s2_l, state->s5_l)
insn_printf("mov r%d,r%d", temp_reg, s235); // ks ^= (s235 & s2_l)
insn_printf("and r%d,r%d", temp_reg, s2_l);
insn_printf("eor r%d,r%d", ks, temp_reg);
insn_printf("and r%d,r%d", s235, s5_l); // ks ^= (s235 & s5_l)
insn_printf("eor r%d,r%d", ks, s235);
insn_printf("mov r%d,r%d", temp_reg, s2_l); // ks ^= (s2_l & s5_l)
insn_printf("and r%d,r%d", temp_reg, s5_l);
insn_printf("eor r%d,r%d", ks, temp_reg);
release_reg(&s235);
// ks ^= ch(state->s6_l, s111, s66)
insn_printf("and r%d,r%d", s111, s6_l);
insn_printf("eor r%d,r%d", ks, s111);
insn_printf("mov r%d,r%d", temp_reg, s6_l);
insn_printf("com r%d", temp_reg);
insn_printf("and r%d,r%d", s66, temp_reg);
insn_printf("eor r%d,r%d", ks, s66);
release_reg(&s111);
release_reg(&s66);
}
// Generate the next 8 non-linear feedback bits.
static void generate_feedback(int input_is_ciphertext)
{
// f = state->s1_l ^ (~state->s3_l) ^
// maj(s244, s23, s160) ^ (ca & s196) ^ (cb & ks);
// f ^= plaintext;
alloc_high_reg(&f); // Needs to be a high register for shift_down().
insn_printf("mov r%d,r%d", f, s3_l);
insn_printf("com r%d", f);
insn_printf("eor r%d,r%d", f, s1_l);
release_reg(&s1_l); // Don't need the low byte of s1 any more.
// f ^= maj(s244, s23, s160)
insn_printf("mov r%d,r%d", temp_reg, s244); // f ^= (s244 & s23)
insn_printf("and r%d,r%d", temp_reg, s23);
insn_printf("eor r%d,r%d", f, temp_reg);
insn_printf("and r%d,r%d", s23, s160); // f ^= (s23 & s160)
insn_printf("eor r%d,r%d", f, s23);
insn_printf("and r%d,r%d", s244, s160); // f ^= (s244 & s160)
insn_printf("eor r%d,r%d", f, s244);
release_reg(&s244);
release_reg(&s23);
release_reg(&s160);
// f ^= (ca & s196). Note that when decrypting, ca is always 1.
if (!input_is_ciphertext)
insn_printf("and r%d,%%3", s196); // s196 &= ca
insn_printf("eor r%d,r%d", f, s196); // f ^= s196
release_reg(&s196);
// f ^= (cb & ks) ^ plaintext
// If we are processing the ciphertext, then we need to first
// decrypt the input with ks. We leave the plaintext in "ks".
// Note that when decrypting, cb is always 0.
if (!is_32bit_version) {
// Plaintext and ciphertext are 8-bit values in registers.
if (input_is_ciphertext) {
insn_printf("eor r%d,%%2", ks); // plaintext = ciphertext ^ ks
insn_printf("eor r%d,r%d", f, ks); // f ^= plaintext
} else {
insn_printf("mov r%d,r%d", temp_reg, ks); // f ^= (cb & ks)
insn_printf("and r%d,%%4", temp_reg);
insn_printf("eor r%d,r%d", f, temp_reg);
insn_printf("eor r%d,%%2", f); // f ^= plaintext
}
} else {
// Plaintext and ciphertext are 32-bit values in the stack frame.
static const char * const out_regs[] = {"%A0", "%B0", "%C0", "%D0"};
static const char * const in_regs[] = {"%A2", "%B2", "%C2", "%D2"};
if (input_is_ciphertext) {
insn_printf("ldd r%d,%s", temp_reg, in_regs[lfsr_offset]);
insn_printf("eor r%d,r%d", ks, temp_reg);
insn_printf("eor r%d,r%d", f, ks);
insn_printf("std %s,r%d", out_regs[lfsr_offset], ks);
} else {
insn_printf("mov r%d,r%d", temp_reg, ks);
insn_printf("and r%d,%%4", temp_reg);
insn_printf("eor r%d,r%d", f, temp_reg);
insn_printf("ldd r%d,%s", temp_reg, in_regs[lfsr_offset]);
insn_printf("eor r%d,r%d", f, temp_reg);
insn_printf("eor r%d,r%d", temp_reg, ks);
insn_printf("std %s,r%d", out_regs[lfsr_offset], temp_reg);
}
release_reg(&ks);
}
}
// Shift a LFSR downwards by 8 bits and rotate in a register.
static void shift_down_lfsr(const LFSR *lfsr, int reg)
{
int bit;
int offset1;
int offset2;
int extra = -1;
// Shift all bytes but the last down. We assume that the low byte
// is already cached in a register from the update_lfsrs() function
// so we don't need to worry about saving it away now.
for (bit = 0; bit < (lfsr->len - 16); bit += 8) {
if (bit < 32)
offset2 = lfsr->offsetl + (bit / 8);
else
offset2 = lfsr->offseth + ((bit - 32) / 8);
if ((bit + 8) < 32)
offset1 = lfsr->offsetl + ((bit + 8) / 8);
else
offset1 = lfsr->offseth + ((bit + 8 - 32) / 8);
insn_printf("ldd r%d,Z+%d", temp_reg, offset1);
if (offset2 != 0)
insn_printf("std Z+%d,r%d", offset2, temp_reg);
else
insn_printf("st Z,r%d", temp_reg);
}
// Rotate the register value from the next-higher LFSR into the high byte.
if (bit < 32)
offset2 = lfsr->offsetl + (bit / 8);
else
offset2 = lfsr->offseth + ((bit - 32) / 8);
if ((bit + 8) < 32)
offset1 = lfsr->offsetl + ((bit + 8) / 8);
else
offset1 = lfsr->offseth + ((bit + 8 - 32) / 8);
insn_printf("ldd r%d,Z+%d", temp_reg, offset1);
alloc_low_reg(&extra);
bit = lfsr->len % 8;
if (bit <= 4) {
insn_printf("clr r%d", extra);
shift_left_2_regs(extra, reg, bit);
insn_printf("or r%d,r%d", temp_reg, reg);
insn_printf("std Z+%d,r%d", offset2, temp_reg);
insn_printf("std Z+%d,r%d", offset1, extra);
} else {
insn_printf("clr r%d", extra);
shift_right_2_regs(reg, extra, 8 - bit);
insn_printf("or r%d,r%d", temp_reg, extra);
insn_printf("std Z+%d,r%d", offset2, temp_reg);
insn_printf("std Z+%d,r%d", offset1, reg);
}
release_reg(&extra);
}
// Shift the state downwards by 8 bits.
static void shift_down(void)
{
int extra = -1;
// LFSR7: s7_l ^= (f << 4), state->s7 = (f >> 4)
alloc_high_reg(&extra);
check_high_reg(&f);
insn_printf("swap r%d", f);
insn_printf("mov r%d,r%d", extra, f);
insn_printf("andi r%d,0xF0", extra); // Assumes extra is a high reg.
insn_printf("eor r%d,r%d", s7_l, extra);
insn_printf("andi r%d,0x0F", f); // Assumes f is a high reg.
insn_printf("std Z+%d,r%d", get_lfsr(6)->offsetl, f);
release_reg(&f);
release_reg(&extra);
// LFSR6 down to LFSR1
shift_down_lfsr(get_lfsr(5), s7_l);
release_reg(&s7_l);
shift_down_lfsr(get_lfsr(4), s6_l);
release_reg(&s6_l);
shift_down_lfsr(get_lfsr(3), s5_l);
release_reg(&s5_l);
shift_down_lfsr(get_lfsr(2), s4_l);
release_reg(&s4_l);
shift_down_lfsr(get_lfsr(1), s3_l);
release_reg(&s3_l);
shift_down_lfsr(get_lfsr(0), s2_l);
release_reg(&s2_l);
}
// Shift the state downwards by 8 bits as one step within a 32-bit word.
static void shift_down_step(void)
{
int extra = -1;
int s7_l_next = -1;
// LFSR7: s7_l ^= (f << 4), state->s7 = (f >> 4)
// We keep the previous s7_l in a register for now and
// make use of it during shift_down_final(). Allocate
// a new register to hold the next s7_l value.
s7_l_prev[lfsr_offset] = s7_l;
alloc_high_reg(&extra);
check_high_reg(&f);
alloc_low_reg(&s7_l_next);
insn_printf("swap r%d", f);
insn_printf("mov r%d,r%d", extra, f);
insn_printf("andi r%d,0xF0", extra); // Assumes extra is a high reg.
insn_printf("eor r%d,r%d", s7_l, extra);
insn_printf("andi r%d,0x0F", f); // Assumes f is a high reg.
insn_printf("mov r%d,r%d", s7_l_next, f);
release_reg(&f);
release_reg(&extra);
s7_l = s7_l_next;
// Write s2 to s6 back to the state. We don't need to store s1
// because shift_down_final() will be throwing the value away.
// The generate_feedback() function already released the register.
insn_printf("std Z+%d,r%d", get_lfsr(5)->offsetl + lfsr_offset, s6_l);
release_reg(&s6_l);
insn_printf("std Z+%d,r%d", get_lfsr(4)->offsetl + lfsr_offset, s5_l);
release_reg(&s5_l);
insn_printf("std Z+%d,r%d", get_lfsr(3)->offsetl + lfsr_offset, s4_l);
release_reg(&s4_l);
insn_printf("std Z+%d,r%d", get_lfsr(2)->offsetl + lfsr_offset, s3_l);
release_reg(&s3_l);
insn_printf("std Z+%d,r%d", get_lfsr(1)->offsetl + lfsr_offset, s2_l);
release_reg(&s2_l);
}
// Shift the state downwards by 8 bits as the final step within a 32-bit word.
static void shift_down_final(void)
{
int extra = -1;
int extra2 = -1;
int extra3 = -1;
int extra4 = -1;
const LFSR *lfsr;
// LFSR7: s7_l ^= (f << 4), state->s7 = (f >> 4)
alloc_high_reg(&extra);
check_high_reg(&f);
insn_printf("swap r%d", f);
insn_printf("mov r%d,r%d", extra, f);
insn_printf("andi r%d,0xF0", extra); // Assumes extra is a high reg.
insn_printf("eor r%d,r%d", s7_l, extra);
insn_printf("andi r%d,0x0F", f); // Assumes f is a high reg.
insn_printf("std Z+%d,r%d", get_lfsr(6)->offsetl, f);
release_reg(&f);
// We currently have the 32-bit s7 word in four registers:
// s7_l_prev[0], s7_l_prev[1], s7_l_prev[2], and s7_l.
// We also have the third byte of s2..s6 in the s2_l..s6_l regs.
// Everything else is stored within the Acorn128 state structure.
// LFSR1: state->s1_l = state->s1_h | (state->s2_l << 29)
// state->s1_h = state->s2_l >> 3
lfsr = get_lfsr(1);
alloc_low_reg(&extra2);
alloc_low_reg(&extra3);
alloc_low_reg(&extra4);
insn_printf("clr r%d", temp_reg);
insn_printf("ldd r%d,Z+%d", extra, lfsr->offsetl); // load s2_l[0..2]
insn_printf("ldd r%d,Z+%d", extra2, lfsr->offsetl + 1);
insn_printf("ldd r%d,Z+%d", extra3, lfsr->offsetl + 2);
shift_right_5_regs(s2_l, extra3, extra2, extra, temp_reg, 3);
lfsr = get_lfsr(0);
insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth);
insn_printf("st Z,r%d", extra4); // offset is zero.
insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth + 1);
insn_printf("std Z+%d,r%d", lfsr->offsetl + 1, extra4);
insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth + 2);
insn_printf("std Z+%d,r%d", lfsr->offsetl + 2, extra4);
insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth + 3);
insn_printf("or r%d,r%d", extra4, temp_reg);
insn_printf("std Z+%d,r%d", lfsr->offsetl + 3, temp_reg);
insn_printf("std Z+%d,r%d", lfsr->offseth, extra);
insn_printf("std Z+%d,r%d", lfsr->offseth + 1, extra2);
insn_printf("std Z+%d,r%d", lfsr->offseth + 2, extra3);
insn_printf("std Z+%d,r%d", lfsr->offseth + 3, s2_l);
release_reg(&s2_l);
// LFSR2: state->s2_l = state->s2_h | (state->s3_l << 14)
// state->s2_h = state->s3_l >> 18
lfsr = get_lfsr(2);
insn_printf("clr r%d", temp_reg);
insn_printf("ldd r%d,Z+%d", extra, lfsr->offsetl); // load s3_l[0..2]
insn_printf("ldd r%d,Z+%d", extra2, lfsr->offsetl + 1);
insn_printf("ldd r%d,Z+%d", extra3, lfsr->offsetl + 2);
shift_right_5_regs(s3_l, extra3, extra2, extra, temp_reg, 2);
lfsr = get_lfsr(1);
insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth); // s2_h[0]
insn_printf("std Z+%d,r%d", lfsr->offsetl, extra4);
insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth + 1); // s2_h[1]
insn_printf("or r%d,r%d", temp_reg, extra4);
insn_printf("std Z+%d,r%d", lfsr->offsetl + 1, temp_reg);
insn_printf("std Z+%d,r%d", lfsr->offsetl + 2, extra);
insn_printf("std Z+%d,r%d", lfsr->offsetl + 3, extra2);
insn_printf("std Z+%d,r%d", lfsr->offseth, extra3);
insn_printf("std Z+%d,r%d", lfsr->offseth + 1, s3_l);
release_reg(&s3_l);
// LFSR3: state->s3_l = state->s3_h | (state->s4_l << 15)
// state->s3_h = state->s4_l >> 17
lfsr = get_lfsr(3);
insn_printf("clr r%d", temp_reg);
insn_printf("ldd r%d,Z+%d", extra, lfsr->offsetl); // load s4_l[0..2]
insn_printf("ldd r%d,Z+%d", extra2, lfsr->offsetl + 1);
insn_printf("ldd r%d,Z+%d", extra3, lfsr->offsetl + 2);
shift_right_5_regs(s4_l, extra3, extra2, extra, temp_reg, 1);
lfsr = get_lfsr(2);
insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth); // s3_h[0]
insn_printf("std Z+%d,r%d", lfsr->offsetl, extra4);
insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth + 1); // s3_h[1]
insn_printf("or r%d,r%d", temp_reg, extra4);
insn_printf("std Z+%d,r%d", lfsr->offsetl + 1, temp_reg);
insn_printf("std Z+%d,r%d", lfsr->offsetl + 2, extra);
insn_printf("std Z+%d,r%d", lfsr->offsetl + 3, extra2);
insn_printf("std Z+%d,r%d", lfsr->offseth, extra3);
insn_printf("std Z+%d,r%d", lfsr->offseth + 1, s4_l);
release_reg(&s4_l);
// LFSR4: state->s4_l = state->s4_h | (state->s5_l << 7)
// state->s4_h = state->s5_l >> 25
lfsr = get_lfsr(4);
insn_printf("clr r%d", temp_reg);
insn_printf("ldd r%d,Z+%d", extra, lfsr->offsetl); // load s5_l[0..2]
insn_printf("ldd r%d,Z+%d", extra2, lfsr->offsetl + 1);
insn_printf("ldd r%d,Z+%d", extra3, lfsr->offsetl + 2);
shift_right_5_regs(s5_l, extra3, extra2, extra, temp_reg, 1);
lfsr = get_lfsr(3);
insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth); // s4_h
insn_printf("or r%d,r%d", temp_reg, extra4);
insn_printf("std Z+%d,r%d", lfsr->offsetl, temp_reg);
insn_printf("std Z+%d,r%d", lfsr->offsetl + 1, extra);
insn_printf("std Z+%d,r%d", lfsr->offsetl + 2, extra2);
insn_printf("std Z+%d,r%d", lfsr->offsetl + 3, extra3);
insn_printf("std Z+%d,r%d", lfsr->offseth, s5_l);
release_reg(&s5_l);
// LFSR5: state->s5_l = state->s5_h | (state->s6_l << 5)
// state->s5_h = state->s6_l >> 27
lfsr = get_lfsr(5);
insn_printf("clr r%d", temp_reg);
insn_printf("ldd r%d,Z+%d", extra, lfsr->offsetl); // load s6_l[0..2]
insn_printf("ldd r%d,Z+%d", extra2, lfsr->offsetl + 1);
insn_printf("ldd r%d,Z+%d", extra3, lfsr->offsetl + 2);
shift_right_5_regs(s6_l, extra3, extra2, extra, temp_reg, 3);
lfsr = get_lfsr(4);
insn_printf("ldd r%d,Z+%d", extra4, lfsr->offseth); // s5_h
insn_printf("or r%d,r%d", temp_reg, extra4);
insn_printf("std Z+%d,r%d", lfsr->offsetl, temp_reg);
insn_printf("std Z+%d,r%d", lfsr->offsetl + 1, extra);
insn_printf("std Z+%d,r%d", lfsr->offsetl + 2, extra2);
insn_printf("std Z+%d,r%d", lfsr->offsetl + 3, extra3);
insn_printf("std Z+%d,r%d", lfsr->offseth, s6_l);
release_reg(&s6_l);
// LFSR6: state->s6_l = state->s6_h | (s7_l << 27)
// state->s6_h = s7_l >> 5
lfsr = get_lfsr(5);
insn_printf("clr r%d", temp_reg);
shift_left_5_regs
(temp_reg, s7_l, s7_l_prev[2], s7_l_prev[1], s7_l_prev[0], 3);
insn_printf("ldd r%d,Z+%d", extra, lfsr->offseth);
insn_printf("std Z+%d,r%d", lfsr->offsetl, extra);
insn_printf("ldd r%d,Z+%d", extra, lfsr->offseth + 1);
insn_printf("std Z+%d,r%d", lfsr->offsetl + 1, extra);
insn_printf("ldd r%d,Z+%d", extra, lfsr->offseth + 2);
insn_printf("std Z+%d,r%d", lfsr->offsetl + 2, extra);
insn_printf("ldd r%d,Z+%d", extra, lfsr->offseth + 3);
insn_printf("or r%d,r%d", extra, s7_l_prev[0]);
insn_printf("std Z+%d,r%d", lfsr->offsetl + 3, extra);
insn_printf("std Z+%d,r%d", lfsr->offseth, s7_l_prev[1]);
insn_printf("std Z+%d,r%d", lfsr->offseth + 1, s7_l_prev[2]);
insn_printf("std Z+%d,r%d", lfsr->offseth + 2, s7_l);
insn_printf("std Z+%d,r%d", lfsr->offseth + 3, temp_reg);
release_reg(&s7_l_prev[0]);
release_reg(&s7_l_prev[1]);
release_reg(&s7_l_prev[2]);
release_reg(&s7_l);
// Release temporary registers.
release_reg(&extra);
release_reg(&extra2);
release_reg(&extra3);
release_reg(&extra4);
}
static void encrypt8(void)
{
// Print the function header.
printf("static uint8_t acornEncrypt8\n");
printf(" (Acorn128State *state, uint8_t plaintext, uint8_t ca, uint8_t cb)\n");
printf("{\n");
indent_printf("// Automatically generated by the genacorn tool.\n");
indent_printf("uint8_t ciphertext;\n");
indent_printf("__asm__ __volatile__ (\n");
indent += 4;
// Output the body of the function.
extract_sub_parts();
update_lfsrs();
generate_keystream();
generate_feedback(0);
shift_down();
// Generate the final ciphertext.
insn_printf("mov %%0,%%2");
insn_printf("eor %%0,r%d", ks);
release_reg(&ks);
// Declare the registers that we need.
indent_printf(": \"=r\"(ciphertext)\n");
indent_printf(": \"z\"(&state->s1_l), \"r\"(plaintext), \"r\"((uint8_t)ca), \"r\"((uint8_t)cb)\n");
temp_regs();
indent -= 4;
indent_printf(");\n");
indent_printf("return ciphertext;\n");
printf("}\n\n");
check_regs();
}
static void decrypt8(void)
{
// Print the function header.
printf("static uint8_t acornDecrypt8(Acorn128State *state, uint8_t ciphertext)\n");
printf("{\n");
indent_printf("// Automatically generated by the genacorn tool.\n");
indent_printf("uint8_t plaintext;\n");
indent_printf("__asm__ __volatile__ (\n");
indent += 4;
// Output the body of the function.
extract_sub_parts();
update_lfsrs();
generate_keystream();
generate_feedback(1);
shift_down();
// Generate the final plaintext.
insn_printf("mov %%0,r%d", ks);
release_reg(&ks);
// Declare the registers that we need.
indent_printf(": \"=r\"(plaintext)\n");
indent_printf(": \"z\"(&state->s1_l), \"r\"(ciphertext)\n");
temp_regs();
indent -= 4;
indent_printf(");\n");
indent_printf("return plaintext;\n");
printf("}\n\n");
check_regs();
}
static void encrypt32(void)
{
// Print the function header.
printf("uint32_t acornEncrypt32\n");
printf(" (Acorn128State *state, uint32_t plaintext, uint32_t ca, uint32_t cb)\n");
printf("{\n");
indent_printf("// Automatically generated by the genacorn tool.\n");
indent_printf("uint32_t ciphertext;\n");
indent_printf("__asm__ __volatile__ (\n");
indent += 4;
// Output the body of the function as 4 rounds for each byte in the word.
// The shift-down step is delayed until after all 4 rounds are complete.
lfsr_offset = 0;
is_32bit_version = 1;
extract_sub_parts();
update_lfsrs();
generate_keystream();
generate_feedback(0);
shift_down_step();
lfsr_offset = 1;
extract_sub_parts();
update_lfsrs();
generate_keystream();
generate_feedback(0);
shift_down_step();
lfsr_offset = 2;
extract_sub_parts();
update_lfsrs();
generate_keystream();
generate_feedback(0);
shift_down_step();
lfsr_offset = 3;
extract_sub_parts();
update_lfsrs();
generate_keystream();
generate_feedback(0);
shift_down_final();
lfsr_offset = 0;
is_32bit_version = 0;
// Declare the registers that we need.
indent_printf(": \"=Q\"(ciphertext)\n");
indent_printf(": \"z\"(&state->s1_l), \"Q\"(plaintext), \"r\"((uint8_t)ca), \"r\"((uint8_t)cb)\n");
temp_regs();
indent -= 4;
indent_printf(");\n");
indent_printf("return ciphertext;\n");
printf("}\n\n");
check_regs();
}
int main(int argc, char *argv[])
{
encrypt8();
decrypt8();
encrypt32();
return 0;
}

View File

@ -147,7 +147,7 @@ TestVector testVector;
Acorn128 acorn; Acorn128 acorn;
byte buffer[MAX_PLAINTEXT_LEN]; byte buffer[128];
bool testCipher_N(Acorn128 *cipher, const struct TestVector *test, size_t inc) bool testCipher_N(Acorn128 *cipher, const struct TestVector *test, size_t inc)
{ {

View File

@ -99,6 +99,8 @@ size_t Acorn128::tagSize() const
#define CB_0_BYTE ((uint8_t)0x00) #define CB_0_BYTE ((uint8_t)0x00)
#define CB_1_BYTE ((uint8_t)0xFF) #define CB_1_BYTE ((uint8_t)0xFF)
#if defined(CRYPTO_ACORN128_DEFAULT) || defined(CRYPTO_DOC)
// maj() and ch() functions for mixing the state. // maj() and ch() functions for mixing the state.
#define maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) #define maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
#define ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z))) #define ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z)))
@ -446,13 +448,21 @@ static inline uint32_t acornDecrypt32(Acorn128State *state, uint32_t ciphertext)
return plaintext; return plaintext;
} }
#elif defined(CRYPTO_ACORN128_AVR)
// Import definitions from Acorn128AVR.cpp
extern uint32_t acornEncrypt32
(Acorn128State *state, uint32_t plaintext, uint32_t ca, uint32_t cb);
#endif // CRYPTO_ACORN128_AVR
/** /**
* \brief Adds 256 bits of padding to the Acorn128 state. * \brief Adds 256 bits of padding to the Acorn128 state.
* *
* \param state The state for the Acorn128 cipher. * \param state The state for the Acorn128 cipher.
* \param cb The cb constant for the padding block. * \param cb The cb constant for the padding block.
*/ */
static void acornPad(Acorn128State *state, uint32_t cb) void acornPad(Acorn128State *state, uint32_t cb)
{ {
acornEncrypt32(state, 1, CA_1, cb); acornEncrypt32(state, 1, CA_1, cb);
acornEncrypt32(state, 0, CA_1, cb); acornEncrypt32(state, 0, CA_1, cb);
@ -539,6 +549,8 @@ bool Acorn128::setIV(const uint8_t *iv, size_t len)
return true; return true;
} }
#if defined(CRYPTO_ACORN128_DEFAULT) || defined(CRYPTO_DOC)
void Acorn128::encrypt(uint8_t *output, const uint8_t *input, size_t len) void Acorn128::encrypt(uint8_t *output, const uint8_t *input, size_t len)
{ {
uint32_t temp; uint32_t temp;
@ -616,6 +628,8 @@ void Acorn128::addAuthData(const void *data, size_t len)
} }
} }
#endif // CRYPTO_ACORN128_DEFAULT
void Acorn128::computeTag(void *tag, size_t len) void Acorn128::computeTag(void *tag, size_t len)
{ {
// Finalize the data and apply padding. // Finalize the data and apply padding.

View File

@ -55,6 +55,13 @@ typedef struct
} Acorn128State; } Acorn128State;
// Determine which Acorn128 implementation to export to applications.
#if defined(__AVR__)
#define CRYPTO_ACORN128_AVR 1
#else
#define CRYPTO_ACORN128_DEFAULT 1
#endif
/** @endcond */ /** @endcond */
class Acorn128 : public AuthenticatedCipher class Acorn128 : public AuthenticatedCipher

View File

@ -0,0 +1,556 @@
/*
* Copyright (C) 2018 Southern Storm Software, Pty Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "Acorn128.h"
#if defined(CRYPTO_ACORN128_AVR)
// Acorn128 constants for ca and cb.
#define CA_0 ((uint32_t)0x00000000)
#define CA_1 ((uint32_t)0xFFFFFFFF)
#define CB_0 ((uint32_t)0x00000000)
#define CB_1 ((uint32_t)0xFFFFFFFF)
#define CA_0_BYTE ((uint8_t)0x00)
#define CA_1_BYTE ((uint8_t)0xFF)
#define CB_0_BYTE ((uint8_t)0x00)
#define CB_1_BYTE ((uint8_t)0xFF)
// Imports from Acorn128.cpp
extern void acornPad(Acorn128State *state, uint32_t cb);
// Force the acornEncrypt8() and acornDecrypt8() functions to always inline.
static uint8_t acornEncrypt8
(Acorn128State *state, uint8_t plaintext, uint8_t ca, uint8_t cb)
__attribute__((always_inline));
static uint8_t acornDecrypt8(Acorn128State *state, uint8_t ciphertext)
__attribute__((always_inline));
static uint8_t acornEncrypt8
(Acorn128State *state, uint8_t plaintext, uint8_t ca, uint8_t cb)
{
// Automatically generated by the genacorn tool.
uint8_t ciphertext;
__asm__ __volatile__ (
"ldd r15,Z+34\n"
"ldd r14,Z+33\n"
"ldd r0,Z+32\n"
"lsl r0\n"
"rol r14\n"
"rol r15\n"
"lsl r0\n"
"rol r14\n"
"rol r15\n"
"lsl r0\n"
"rol r14\n"
"ldd r13,Z+28\n"
"ldd r0,Z+29\n"
"lsr r0\n"
"ror r13\n"
"lsr r0\n"
"ror r13\n"
"lsr r0\n"
"ror r13\n"
"ldd r0,Z+20\n"
"ldd r12,Z+21\n"
"lsl r0\n"
"rol r12\n"
"lsl r0\n"
"rol r12\n"
"ldd r16,Z+16\n"
"ldd r17,Z+17\n"
"swap r16\n"
"swap r17\n"
"andi r16,0x0F\n"
"andi r17,0xF0\n"
"or r16,r17\n"
"ldd r0,Z+8\n"
"ldd r11,Z+9\n"
"lsl r0\n"
"rol r11\n"
"lsl r0\n"
"rol r11\n"
"lsl r0\n"
"rol r11\n"
"ldd r10,Z+3\n"
"ldd r9,Z+2\n"
"ldd r0,Z+1\n"
"lsl r0\n"
"rol r9\n"
"rol r10\n"
"lsl r0\n"
"rol r9\n"
"lsl r0\n"
"rol r9\n"
"lsl r0\n"
"rol r9\n"
"ldd r8,Z+40\n"
"eor r8,r14\n"
"ldd r7,Z+32\n"
"eor r8,r7\n"
"eor r7,r13\n"
"ldd r6,Z+28\n"
"eor r7,r6\n"
"eor r6,r12\n"
"ldd r23,Z+20\n"
"eor r6,r23\n"
"eor r23,r16\n"
"ldd r22,Z+16\n"
"eor r23,r22\n"
"eor r22,r11\n"
"ldd r21,Z+8\n"
"eor r22,r21\n"
"eor r21,r10\n"
"ld r20,Z\n"
"eor r21,r20\n"
"eor r9,r23\n"
"mov r0,r14\n"
"and r0,r21\n"
"eor r9,r0\n"
"and r14,r6\n"
"eor r9,r14\n"
"mov r0,r21\n"
"and r0,r6\n"
"eor r9,r0\n"
"and r16,r7\n"
"eor r9,r16\n"
"mov r0,r7\n"
"com r0\n"
"and r11,r0\n"
"eor r9,r11\n"
"mov r16,r22\n"
"com r16\n"
"eor r16,r20\n"
"mov r0,r15\n"
"and r0,r10\n"
"eor r16,r0\n"
"and r10,r12\n"
"eor r16,r10\n"
"and r15,r12\n"
"eor r16,r15\n"
"and r13,%3\n"
"eor r16,r13\n"
"mov r0,r9\n"
"and r0,%4\n"
"eor r16,r0\n"
"eor r16,%2\n"
"swap r16\n"
"mov r17,r16\n"
"andi r17,0xF0\n"
"eor r8,r17\n"
"andi r16,0x0F\n"
"std Z+40,r16\n"
"ldd r0,Z+33\n"
"std Z+32,r0\n"
"ldd r0,Z+34\n"
"std Z+33,r0\n"
"ldd r0,Z+35\n"
"std Z+34,r0\n"
"ldd r0,Z+36\n"
"std Z+35,r0\n"
"ldd r0,Z+37\n"
"std Z+36,r0\n"
"ldd r0,Z+38\n"
"std Z+37,r0\n"
"ldd r0,Z+39\n"
"clr r15\n"
"lsl r8\n"
"rol r15\n"
"lsl r8\n"
"rol r15\n"
"lsl r8\n"
"rol r15\n"
"or r0,r8\n"
"std Z+38,r0\n"
"std Z+39,r15\n"
"ldd r0,Z+29\n"
"std Z+28,r0\n"
"ldd r0,Z+30\n"
"std Z+29,r0\n"
"ldd r0,Z+31\n"
"std Z+30,r0\n"
"ldd r0,Z+26\n"
"clr r15\n"
"lsr r7\n"
"ror r15\n"
"lsr r7\n"
"ror r15\n"
"lsr r7\n"
"ror r15\n"
"or r0,r15\n"
"std Z+31,r0\n"
"std Z+26,r7\n"
"ldd r0,Z+21\n"
"std Z+20,r0\n"
"ldd r0,Z+22\n"
"std Z+21,r0\n"
"ldd r0,Z+23\n"
"std Z+22,r0\n"
"ldd r0,Z+24\n"
"clr r15\n"
"lsr r6\n"
"ror r15\n"
"or r0,r15\n"
"std Z+23,r0\n"
"std Z+24,r6\n"
"ldd r0,Z+17\n"
"std Z+16,r0\n"
"ldd r0,Z+18\n"
"std Z+17,r0\n"
"ldd r0,Z+19\n"
"std Z+18,r0\n"
"ldd r0,Z+14\n"
"std Z+19,r0\n"
"ldd r0,Z+15\n"
"clr r15\n"
"lsr r23\n"
"ror r15\n"
"or r0,r15\n"
"std Z+14,r0\n"
"std Z+15,r23\n"
"ldd r0,Z+9\n"
"std Z+8,r0\n"
"ldd r0,Z+10\n"
"std Z+9,r0\n"
"ldd r0,Z+11\n"
"std Z+10,r0\n"
"ldd r0,Z+12\n"
"std Z+11,r0\n"
"ldd r0,Z+13\n"
"clr r15\n"
"lsr r22\n"
"ror r15\n"
"lsr r22\n"
"ror r15\n"
"or r0,r15\n"
"std Z+12,r0\n"
"std Z+13,r22\n"
"ldd r0,Z+1\n"
"st Z,r0\n"
"ldd r0,Z+2\n"
"std Z+1,r0\n"
"ldd r0,Z+3\n"
"std Z+2,r0\n"
"ldd r0,Z+4\n"
"std Z+3,r0\n"
"ldd r0,Z+5\n"
"std Z+4,r0\n"
"ldd r0,Z+6\n"
"std Z+5,r0\n"
"ldd r0,Z+7\n"
"clr r15\n"
"lsr r21\n"
"ror r15\n"
"lsr r21\n"
"ror r15\n"
"lsr r21\n"
"ror r15\n"
"or r0,r15\n"
"std Z+6,r0\n"
"std Z+7,r21\n"
"mov %0,%2\n"
"eor %0,r9\n"
: "=r"(ciphertext)
: "z"(&state->s1_l), "r"(plaintext), "r"((uint8_t)ca), "r"((uint8_t)cb)
: "r16", "r17", "r20", "r21", "r22", "r23", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "memory"
);
return ciphertext;
}
static uint8_t acornDecrypt8(Acorn128State *state, uint8_t ciphertext)
{
// Automatically generated by the genacorn tool.
uint8_t plaintext;
__asm__ __volatile__ (
"ldd r15,Z+34\n"
"ldd r14,Z+33\n"
"ldd r0,Z+32\n"
"lsl r0\n"
"rol r14\n"
"rol r15\n"
"lsl r0\n"
"rol r14\n"
"rol r15\n"
"lsl r0\n"
"rol r14\n"
"ldd r13,Z+28\n"
"ldd r0,Z+29\n"
"lsr r0\n"
"ror r13\n"
"lsr r0\n"
"ror r13\n"
"lsr r0\n"
"ror r13\n"
"ldd r0,Z+20\n"
"ldd r12,Z+21\n"
"lsl r0\n"
"rol r12\n"
"lsl r0\n"
"rol r12\n"
"ldd r16,Z+16\n"
"ldd r17,Z+17\n"
"swap r16\n"
"swap r17\n"
"andi r16,0x0F\n"
"andi r17,0xF0\n"
"or r16,r17\n"
"ldd r0,Z+8\n"
"ldd r11,Z+9\n"
"lsl r0\n"
"rol r11\n"
"lsl r0\n"
"rol r11\n"
"lsl r0\n"
"rol r11\n"
"ldd r10,Z+3\n"
"ldd r9,Z+2\n"
"ldd r0,Z+1\n"
"lsl r0\n"
"rol r9\n"
"rol r10\n"
"lsl r0\n"
"rol r9\n"
"lsl r0\n"
"rol r9\n"
"lsl r0\n"
"rol r9\n"
"ldd r8,Z+40\n"
"eor r8,r14\n"
"ldd r7,Z+32\n"
"eor r8,r7\n"
"eor r7,r13\n"
"ldd r6,Z+28\n"
"eor r7,r6\n"
"eor r6,r12\n"
"ldd r23,Z+20\n"
"eor r6,r23\n"
"eor r23,r16\n"
"ldd r22,Z+16\n"
"eor r23,r22\n"
"eor r22,r11\n"
"ldd r21,Z+8\n"
"eor r22,r21\n"
"eor r21,r10\n"
"ld r20,Z\n"
"eor r21,r20\n"
"eor r9,r23\n"
"mov r0,r14\n"
"and r0,r21\n"
"eor r9,r0\n"
"and r14,r6\n"
"eor r9,r14\n"
"mov r0,r21\n"
"and r0,r6\n"
"eor r9,r0\n"
"and r16,r7\n"
"eor r9,r16\n"
"mov r0,r7\n"
"com r0\n"
"and r11,r0\n"
"eor r9,r11\n"
"mov r16,r22\n"
"com r16\n"
"eor r16,r20\n"
"mov r0,r15\n"
"and r0,r10\n"
"eor r16,r0\n"
"and r10,r12\n"
"eor r16,r10\n"
"and r15,r12\n"
"eor r16,r15\n"
"eor r16,r13\n"
"eor r9,%2\n"
"eor r16,r9\n"
"swap r16\n"
"mov r17,r16\n"
"andi r17,0xF0\n"
"eor r8,r17\n"
"andi r16,0x0F\n"
"std Z+40,r16\n"
"ldd r0,Z+33\n"
"std Z+32,r0\n"
"ldd r0,Z+34\n"
"std Z+33,r0\n"
"ldd r0,Z+35\n"
"std Z+34,r0\n"
"ldd r0,Z+36\n"
"std Z+35,r0\n"
"ldd r0,Z+37\n"
"std Z+36,r0\n"
"ldd r0,Z+38\n"
"std Z+37,r0\n"
"ldd r0,Z+39\n"
"clr r15\n"
"lsl r8\n"
"rol r15\n"
"lsl r8\n"
"rol r15\n"
"lsl r8\n"
"rol r15\n"
"or r0,r8\n"
"std Z+38,r0\n"
"std Z+39,r15\n"
"ldd r0,Z+29\n"
"std Z+28,r0\n"
"ldd r0,Z+30\n"
"std Z+29,r0\n"
"ldd r0,Z+31\n"
"std Z+30,r0\n"
"ldd r0,Z+26\n"
"clr r15\n"
"lsr r7\n"
"ror r15\n"
"lsr r7\n"
"ror r15\n"
"lsr r7\n"
"ror r15\n"
"or r0,r15\n"
"std Z+31,r0\n"
"std Z+26,r7\n"
"ldd r0,Z+21\n"
"std Z+20,r0\n"
"ldd r0,Z+22\n"
"std Z+21,r0\n"
"ldd r0,Z+23\n"
"std Z+22,r0\n"
"ldd r0,Z+24\n"
"clr r15\n"
"lsr r6\n"
"ror r15\n"
"or r0,r15\n"
"std Z+23,r0\n"
"std Z+24,r6\n"
"ldd r0,Z+17\n"
"std Z+16,r0\n"
"ldd r0,Z+18\n"
"std Z+17,r0\n"
"ldd r0,Z+19\n"
"std Z+18,r0\n"
"ldd r0,Z+14\n"
"std Z+19,r0\n"
"ldd r0,Z+15\n"
"clr r15\n"
"lsr r23\n"
"ror r15\n"
"or r0,r15\n"
"std Z+14,r0\n"
"std Z+15,r23\n"
"ldd r0,Z+9\n"
"std Z+8,r0\n"
"ldd r0,Z+10\n"
"std Z+9,r0\n"
"ldd r0,Z+11\n"
"std Z+10,r0\n"
"ldd r0,Z+12\n"
"std Z+11,r0\n"
"ldd r0,Z+13\n"
"clr r15\n"
"lsr r22\n"
"ror r15\n"
"lsr r22\n"
"ror r15\n"
"or r0,r15\n"
"std Z+12,r0\n"
"std Z+13,r22\n"
"ldd r0,Z+1\n"
"st Z,r0\n"
"ldd r0,Z+2\n"
"std Z+1,r0\n"
"ldd r0,Z+3\n"
"std Z+2,r0\n"
"ldd r0,Z+4\n"
"std Z+3,r0\n"
"ldd r0,Z+5\n"
"std Z+4,r0\n"
"ldd r0,Z+6\n"
"std Z+5,r0\n"
"ldd r0,Z+7\n"
"clr r15\n"
"lsr r21\n"
"ror r15\n"
"lsr r21\n"
"ror r15\n"
"lsr r21\n"
"ror r15\n"
"or r0,r15\n"
"std Z+6,r0\n"
"std Z+7,r21\n"
"mov %0,r9\n"
: "=r"(plaintext)
: "z"(&state->s1_l), "r"(ciphertext)
: "r16", "r17", "r20", "r21", "r22", "r23", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "memory"
);
return plaintext;
}
uint32_t acornEncrypt32
(Acorn128State *state, uint32_t plaintext, uint32_t ca, uint32_t cb)
{
uint32_t ciphertext;
ciphertext = acornEncrypt8
(state, (uint8_t)plaintext, (uint8_t)ca, (uint8_t)cb);
ciphertext |= ((uint32_t)(acornEncrypt8
(state, (uint8_t)(plaintext >> 8), (uint8_t)ca, (uint8_t)cb))) << 8;
ciphertext |= ((uint32_t)(acornEncrypt8
(state, (uint8_t)(plaintext >> 16), (uint8_t)ca, (uint8_t)cb))) << 16;
ciphertext |= ((uint32_t)(acornEncrypt8
(state, (uint8_t)(plaintext >> 24), (uint8_t)ca, (uint8_t)cb))) << 24;
return ciphertext;
}
void Acorn128::encrypt(uint8_t *output, const uint8_t *input, size_t len)
{
uint32_t temp;
if (!state.authDone) {
acornPad(&state, CB_1);
state.authDone = 1;
}
while (len > 0) {
*output++ = acornEncrypt8(&state, *input++, CA_1_BYTE, CB_0_BYTE);
--len;
}
}
void Acorn128::decrypt(uint8_t *output, const uint8_t *input, size_t len)
{
uint32_t temp;
if (!state.authDone) {
acornPad(&state, CB_1);
state.authDone = 1;
}
while (len > 0) {
*output++ = acornDecrypt8(&state, *input++);
--len;
}
}
void Acorn128::addAuthData(const void *data, size_t len)
{
if (state.authDone)
return;
const uint8_t *input = (const uint8_t *)data;
while (len > 0) {
acornEncrypt8(&state, *input++, CA_1_BYTE, CB_1_BYTE);
--len;
}
}
#endif // CRYPTO_ACORN128_AVR