1
0
mirror of https://github.com/taigrr/arduinolibs synced 2025-01-18 04:33:12 -08:00

Fix AVR asm issues with Speck

This commit is contained in:
Rhys Weatherley 2017-11-01 16:11:51 +10:00
parent d9f3caa106
commit b53f57225d

View File

@ -143,11 +143,11 @@ bool Speck::setKey(const uint8_t *key, size_t len)
); );
// Expand the key to the full key schedule. // Expand the key to the full key schedule.
uint8_t li_in = 0;
uint8_t li_out = m - 1;
for (uint8_t i = 0; i < (rounds - 1); ++i) {
__asm__ __volatile__ ( __asm__ __volatile__ (
"1:\n"
// l[li_out] = (k[i] + rightRotate8_64(l[li_in])) ^ i; // l[li_out] = (k[i] + rightRotate8_64(l[li_in])) ^ i;
"add %A1,%2\n" // X = &(l[li_in])
"adc %B1,__zero_reg__\n"
"ld r15,X+\n" // x = rightRotate8_64(l[li_in]) "ld r15,X+\n" // x = rightRotate8_64(l[li_in])
"ld r8,X+\n" "ld r8,X+\n"
"ld r9,X+\n" "ld r9,X+\n"
@ -175,19 +175,11 @@ bool Speck::setKey(const uint8_t *key, size_t len)
"adc r14,r22\n" "adc r14,r22\n"
"adc r15,r23\n" "adc r15,r23\n"
"eor r8,%4\n" // x ^= i "eor r8,%3\n" // x ^= i
// X = X - li_in + li_out // k[i + 1] = leftRotate3_64(k[i]) ^ l[li_out];
"ldi r24,8\n" // li_in = li_in + 1 "movw r26,%A2\n" // l[li_out] = x
"add %2,r24\n" "st X+,r8\n"
"sub %A1,%2\n" // return X to its initial value
"sbc %B1,__zero_reg__\n"
"ldi r25,0x1f\n"
"and %2,r25\n" // li_in = li_in % 4
"add %A1,%3\n" // X = &(l[li_out])
"adc %B1,__zero_reg__\n"
"st X+,r8\n" // l[li_out] = x
"st X+,r9\n" "st X+,r9\n"
"st X+,r10\n" "st X+,r10\n"
"st X+,r11\n" "st X+,r11\n"
@ -196,12 +188,6 @@ bool Speck::setKey(const uint8_t *key, size_t len)
"st X+,r14\n" "st X+,r14\n"
"st X+,r15\n" "st X+,r15\n"
"add %3,r24\n" // li_out = li_out + 1
"sub %A1,%3\n" // return X to its initial value
"sbc %B1,__zero_reg__\n"
"and %3,r25\n" // li_out = li_out % 4
// k[i + 1] = leftRotate3_64(k[i]) ^ l[li_out];
"lsl r16\n" // y = leftRotate1_64(y) "lsl r16\n" // y = leftRotate1_64(y)
"rol r17\n" "rol r17\n"
"rol r18\n" "rol r18\n"
@ -241,31 +227,27 @@ bool Speck::setKey(const uint8_t *key, size_t len)
"eor r22,r14\n" "eor r22,r14\n"
"eor r23,r15\n" "eor r23,r15\n"
"st Z,r16\n" // k[i + 1] = y "st Z+,r16\n" // k[i + 1] = y
"std Z+1,r17\n" "st Z+,r17\n"
"std Z+2,r18\n" "st Z+,r18\n"
"std Z+3,r19\n" "st Z+,r19\n"
"std Z+4,r20\n" "st Z+,r20\n"
"std Z+5,r21\n" "st Z+,r21\n"
"std Z+6,r22\n" "st Z+,r22\n"
"std Z+7,r23\n" "st Z+,r23\n"
// Loop : : "z"(&(k[i])), "x"(&(l[li_in])),
"inc %4\n" // ++i "r"(&(l[li_out])),
"dec %5\n" // --rounds "r"(i)
"breq 2f\n"
"rjmp 1b\n"
"2:\n"
: : "z"(k), "x"(l),
"r"((uint8_t)0), // initial value of li_in
"r"((uint8_t)((m - 1) * 8)), // initial value of li_out
"r"(0), // initial value of i
"r"(rounds - 1)
: "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
"r24", "r25" "r24", "r25"
); );
if ((++li_in) >= m)
li_in = 0;
if ((++li_out) >= m)
li_out = 0;
}
#else #else
uint64_t l[4]; uint64_t l[4];
uint8_t m; uint8_t m;
@ -335,41 +317,32 @@ void Speck::encryptBlock(uint8_t *output, const uint8_t *input)
__asm__ __volatile__ ( __asm__ __volatile__ (
"1:\n" "1:\n"
// x = (rightRotate8_64(x) + y) ^ *s++; // x = (rightRotate8_64(x) + y) ^ *s++;
"mov __tmp_reg__,%A0\n" // x = rightRotate8_64(x) "add %B0,%A2\n" // x = rightRotate8_64(x), x += y
"mov %A0,%B0\n" "adc %C0,%B2\n" // Note: right rotate is implicit.
"mov %B0,%C0\n" "adc %D0,%C2\n"
"mov %C0,%D0\n" "adc %A1,%D2\n"
"mov %D0,%A1\n" "adc %B1,%A3\n"
"mov %A1,%B1\n" "adc %C1,%B3\n"
"mov %B1,%C1\n" "adc %D1,%C3\n"
"mov %C1,%D1\n" "adc %A0,%D3\n"
"mov %D1,__tmp_reg__\n"
"add %A0,%A2\n" // x += y
"adc %B0,%B2\n"
"adc %C0,%C2\n"
"adc %D0,%D2\n"
"adc %A1,%A3\n"
"adc %B1,%B3\n"
"adc %C1,%C3\n"
"adc %D1,%D3\n"
"ld __tmp_reg__,Z+\n" // x ^= *s++ "ld __tmp_reg__,Z+\n" // x ^= *s++
"eor %A0,__tmp_reg__\n" "eor __tmp_reg__,%B0\n" // Also fully apply the right rotate.
"ld __tmp_reg__,Z+\n" "ld %B0,Z+\n"
"eor %B0,__tmp_reg__\n" "eor %B0,%C0\n"
"ld __tmp_reg__,Z+\n" "ld %C0,Z+\n"
"eor %C0,__tmp_reg__\n" "eor %C0,%D0\n"
"ld __tmp_reg__,Z+\n" "ld %D0,Z+\n"
"eor %D0,__tmp_reg__\n" "eor %D0,%A1\n"
"ld __tmp_reg__,Z+\n" "ld %A1,Z+\n"
"eor %A1,__tmp_reg__\n" "eor %A1,%B1\n"
"ld __tmp_reg__,Z+\n" "ld %B1,Z+\n"
"eor %B1,__tmp_reg__\n" "eor %B1,%C1\n"
"ld __tmp_reg__,Z+\n" "ld %C1,Z+\n"
"eor %C1,__tmp_reg__\n" "eor %C1,%D1\n"
"ld __tmp_reg__,Z+\n" "ld %D1,Z+\n"
"eor %D1,__tmp_reg__\n" "eor %D1,%A0\n"
"mov %A0,__tmp_reg__\n"
// y = leftRotate3_64(y) ^ x; // y = leftRotate3_64(y) ^ x;
"lsl %A2\n" // y = leftRotate1_64(y) "lsl %A2\n" // y = leftRotate1_64(y)
@ -530,41 +503,32 @@ void Speck::decryptBlock(uint8_t *output, const uint8_t *input)
// x = leftRotate8_64((x ^ *s--) - y); // x = leftRotate8_64((x ^ *s--) - y);
"ld __tmp_reg__,-Z\n" // x ^= *s-- "ld __tmp_reg__,-Z\n" // x ^= *s--
"eor %D1,__tmp_reg__\n" "eor __tmp_reg__,%D1\n" // Note: also implicitly left-rotates regs
"ld __tmp_reg__,-Z\n" "ld %D1,-Z\n"
"eor %C1,__tmp_reg__\n" "eor %D1,%C1\n"
"ld __tmp_reg__,-Z\n" "ld %C1,-Z\n"
"eor %B1,__tmp_reg__\n" "eor %C1,%B1\n"
"ld __tmp_reg__,-Z\n" "ld %B1,-Z\n"
"eor %A1,__tmp_reg__\n" "eor %B1,%A1\n"
"ld __tmp_reg__,-Z\n" "ld %A1,-Z\n"
"eor %D0,__tmp_reg__\n" "eor %A1,%D0\n"
"ld __tmp_reg__,-Z\n" "ld %D0,-Z\n"
"eor %C0,__tmp_reg__\n" "eor %D0,%C0\n"
"ld __tmp_reg__,-Z\n" "ld %C0,-Z\n"
"eor %B0,__tmp_reg__\n" "eor %C0,%B0\n"
"ld __tmp_reg__,-Z\n" "ld %B0,-Z\n"
"eor %A0,__tmp_reg__\n" "eor %B0,%A0\n"
"sub %A0,%A2\n" // x -= y
"sbc %B0,%B2\n"
"sbc %C0,%C2\n"
"sbc %D0,%D2\n"
"sbc %A1,%A3\n"
"sbc %B1,%B3\n"
"sbc %C1,%C3\n"
"sbc %D1,%D3\n"
"mov __tmp_reg__,%D1\n" // x = lefRotate8_64(x)
"mov %D1,%C1\n"
"mov %C1,%B1\n"
"mov %B1,%A1\n"
"mov %A1,%D0\n"
"mov %D0,%C0\n"
"mov %C0,%B0\n"
"mov %B0,%A0\n"
"mov %A0,__tmp_reg__\n" "mov %A0,__tmp_reg__\n"
"sub %B0,%A2\n" // x -= y
"sbc %C0,%B2\n" // Note: regs are already left-rotated
"sbc %D0,%C2\n"
"sbc %A1,%D2\n"
"sbc %B1,%A3\n"
"sbc %C1,%B3\n"
"sbc %D1,%C3\n"
"sbc %A0,%D3\n"
// Loop // Loop
"dec %5\n" // --round "dec %5\n" // --round
"breq 2f\n" "breq 2f\n"