1
0
mirror of https://github.com/taigrr/arduinolibs synced 2025-01-18 04:33:12 -08:00

Recursive batcher84 to save code size

This commit is contained in:
Rhys Weatherley 2016-08-23 18:52:46 +10:00
parent 6c4ec0cb23
commit ca67bdbae0
2 changed files with 51 additions and 123 deletions

View File

@ -215,8 +215,8 @@ All figures are for the Arduino Due running at 84 MHz:
<tr><td>NewHope::keygen(), Ref</td><td align="right">29ms</td><td colspan="3">Generate key pair for Alice, Ref version</td></tr>
<tr><td>NewHope::sharedb(), Ref</td><td align="right">40ms</td><td colspan="3">Generate shared secret and public key for Bob, Ref version</td></tr>
<tr><td>NewHope::shareda(), Ref</td><td align="right">9ms</td><td colspan="3">Generate shared secret for Alice, Ref version</td></tr>
<tr><td>NewHope::keygen(), Torref</td><td align="right">40ms</td><td colspan="3">Generate key pair for Alice, Torref version</td></tr>
<tr><td>NewHope::sharedb(), Torref</td><td align="right">52ms</td><td colspan="3">Generate shared secret and public key for Bob, Torref version</td></tr>
<tr><td>NewHope::keygen(), Torref</td><td align="right">42ms</td><td colspan="3">Generate key pair for Alice, Torref version</td></tr>
<tr><td>NewHope::sharedb(), Torref</td><td align="right">53ms</td><td colspan="3">Generate shared secret and public key for Bob, Torref version</td></tr>
<tr><td>NewHope::shareda(), Torref</td><td align="right">9ms</td><td colspan="3">Generate shared secret for Alice, Torref version</td></tr>
</table>
*/

View File

@ -776,132 +776,60 @@ static void decode_b_2nd_half(uint16_t *c, const unsigned char *r)
x[16*(i)] ^= t;\
x[16*(j)] ^= t;
static void batcher84(uint16_t *x)
// Code size efficient (but slower) version of the Batcher sort.
// https://en.wikipedia.org/wiki/Batcher_odd%E2%80%93even_mergesort
static void oddeven_merge(uint16_t *x, unsigned lo, unsigned hi, unsigned r)
{
static uint8_t const swap_table[] = {
0, 1, 2, 3, 0, 2, 1, 3, 1, 2, 4, 5, 6, 7, 4, 6,
5, 7, 5, 6, 0, 4, 2, 6, 2, 4, 1, 5, 3, 7, 3, 5,
1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 8, 10, 9, 11, 9, 10,
12, 13, 14, 15, 12, 14, 13, 15, 13, 14, 8, 12, 10, 14, 10, 12,
9, 13, 11, 15, 11, 13, 9, 10, 11, 12, 13, 14, 0, 8, 4, 12,
4, 8, 2, 10, 6, 14, 6, 10, 2, 4, 6, 8, 10, 12, 1, 9,
5, 13, 5, 9, 3, 11, 7, 15, 7, 11, 3, 5, 7, 9, 11, 13,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17,
18, 19, 16, 18, 17, 19, 17, 18, 20, 21, 22, 23, 20, 22, 21, 23,
21, 22, 16, 20, 18, 22, 18, 20, 17, 21, 19, 23, 19, 21, 17, 18,
19, 20, 21, 22, 24, 25, 26, 27, 24, 26, 25, 27, 25, 26, 28, 29,
30, 31, 28, 30, 29, 31, 29, 30, 24, 28, 26, 30, 26, 28, 25, 29,
27, 31, 27, 29, 25, 26, 27, 28, 29, 30, 16, 24, 20, 28, 20, 24,
18, 26, 22, 30, 22, 26, 18, 20, 22, 24, 26, 28, 17, 25, 21, 29,
21, 25, 19, 27, 23, 31, 23, 27, 19, 21, 23, 25, 27, 29, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 16, 8, 24,
8, 16, 4, 20, 12, 28, 12, 20, 4, 8, 12, 16, 20, 24, 2, 18,
10, 26, 10, 18, 6, 22, 14, 30, 14, 22, 6, 10, 14, 18, 22, 26,
2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 1, 17,
9, 25, 9, 17, 5, 21, 13, 29, 13, 21, 5, 9, 13, 17, 21, 25,
3, 19, 11, 27, 11, 19, 7, 23, 15, 31, 15, 23, 7, 11, 15, 19,
23, 27, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33,
34, 35, 32, 34, 33, 35, 33, 34, 36, 37, 38, 39, 36, 38, 37, 39,
37, 38, 32, 36, 34, 38, 34, 36, 33, 37, 35, 39, 35, 37, 33, 34,
35, 36, 37, 38, 40, 41, 42, 43, 40, 42, 41, 43, 41, 42, 44, 45,
46, 47, 44, 46, 45, 47, 45, 46, 40, 44, 42, 46, 42, 44, 41, 45,
43, 47, 43, 45, 41, 42, 43, 44, 45, 46, 32, 40, 36, 44, 36, 40,
34, 42, 38, 46, 38, 42, 34, 36, 38, 40, 42, 44, 33, 41, 37, 45,
37, 41, 35, 43, 39, 47, 39, 43, 35, 37, 39, 41, 43, 45, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51,
48, 50, 49, 51, 49, 50, 52, 53, 54, 55, 52, 54, 53, 55, 53, 54,
48, 52, 50, 54, 50, 52, 49, 53, 51, 55, 51, 53, 49, 50, 51, 52,
53, 54, 56, 57, 58, 59, 56, 58, 57, 59, 57, 58, 60, 61, 62, 63,
60, 62, 61, 63, 61, 62, 56, 60, 58, 62, 58, 60, 57, 61, 59, 63,
59, 61, 57, 58, 59, 60, 61, 62, 48, 56, 52, 60, 52, 56, 50, 58,
54, 62, 54, 58, 50, 52, 54, 56, 58, 60, 49, 57, 53, 61, 53, 57,
51, 59, 55, 63, 55, 59, 51, 53, 55, 57, 59, 61, 49, 50, 51, 52,
53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 32, 48, 40, 56, 40, 48,
36, 52, 44, 60, 44, 52, 36, 40, 44, 48, 52, 56, 34, 50, 42, 58,
42, 50, 38, 54, 46, 62, 46, 54, 38, 42, 46, 50, 54, 58, 34, 36,
38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 33, 49, 41, 57,
41, 49, 37, 53, 45, 61, 45, 53, 37, 41, 45, 49, 53, 57, 35, 51,
43, 59, 43, 51, 39, 55, 47, 63, 47, 55, 39, 43, 47, 51, 55, 59,
35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 0, 32, 16, 48,
16, 32, 8, 40, 24, 56, 24, 40, 8, 16, 24, 32, 40, 48, 4, 36,
20, 52, 20, 36, 12, 44, 28, 60, 28, 44, 12, 20, 28, 36, 44, 52,
4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 2, 34,
18, 50, 18, 34, 10, 42, 26, 58, 26, 42, 10, 18, 26, 34, 42, 50,
6, 38, 22, 54, 22, 38, 14, 46, 30, 62, 30, 46, 14, 22, 30, 38,
46, 54, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58,
2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32,
34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 1, 33,
17, 49, 17, 33, 9, 41, 25, 57, 25, 41, 9, 17, 25, 33, 41, 49,
5, 37, 21, 53, 21, 37, 13, 45, 29, 61, 29, 45, 13, 21, 29, 37,
45, 53, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57,
3, 35, 19, 51, 19, 35, 11, 43, 27, 59, 27, 43, 11, 19, 27, 35,
43, 51, 7, 39, 23, 55, 23, 39, 15, 47, 31, 63, 31, 47, 15, 23,
31, 39, 47, 55, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51,
55, 59, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29,
31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 64, 65,
66, 67, 64, 66, 65, 67, 65, 66, 68, 69, 70, 71, 68, 70, 69, 71,
69, 70, 64, 68, 66, 70, 66, 68, 65, 69, 67, 71, 67, 69, 65, 66,
67, 68, 69, 70, 72, 73, 74, 75, 72, 74, 73, 75, 73, 74, 76, 77,
78, 79, 76, 78, 77, 79, 77, 78, 72, 76, 74, 78, 74, 76, 73, 77,
75, 79, 75, 77, 73, 74, 75, 76, 77, 78, 64, 72, 68, 76, 68, 72,
66, 74, 70, 78, 70, 74, 66, 68, 70, 72, 74, 76, 65, 73, 69, 77,
69, 73, 67, 75, 71, 79, 71, 75, 67, 69, 71, 73, 75, 77, 65, 66,
67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81, 82, 83,
80, 82, 81, 83, 81, 82, 81, 82, 81, 82, 64, 80, 72, 80, 68, 72,
76, 80, 66, 82, 74, 82, 70, 74, 78, 82, 66, 68, 70, 72, 74, 76,
78, 80, 65, 81, 73, 81, 69, 73, 77, 81, 67, 83, 75, 83, 71, 75,
79, 83, 67, 69, 71, 73, 75, 77, 79, 81, 65, 66, 67, 68, 69, 70,
71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 72, 80, 68, 72,
76, 80, 74, 82, 70, 74, 78, 82, 66, 68, 70, 72, 74, 76, 78, 80,
73, 81, 69, 73, 77, 81, 75, 83, 71, 75, 79, 83, 67, 69, 71, 73,
75, 77, 79, 81, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
77, 78, 79, 80, 81, 82, 0, 64, 32, 64, 16, 80, 48, 80, 16, 32,
48, 64, 8, 72, 40, 72, 24, 40, 56, 72, 8, 16, 24, 32, 40, 48,
56, 64, 72, 80, 4, 68, 36, 68, 20, 36, 52, 68, 12, 76, 44, 76,
28, 44, 60, 76, 12, 20, 28, 36, 44, 52, 60, 68, 4, 8, 12, 16,
20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80,
2, 66, 34, 66, 18, 82, 50, 82, 18, 34, 50, 66, 10, 74, 42, 74,
26, 42, 58, 74, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 6, 70,
38, 70, 22, 38, 54, 70, 14, 78, 46, 78, 30, 46, 62, 78, 14, 22,
30, 38, 46, 54, 62, 70, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42,
46, 50, 54, 58, 62, 66, 70, 74, 78, 82, 2, 4, 6, 8, 10, 12,
14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44,
46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
78, 80, 1, 65, 33, 65, 17, 81, 49, 81, 17, 33, 49, 65, 9, 73,
41, 73, 25, 41, 57, 73, 9, 17, 25, 33, 41, 49, 57, 65, 73, 81,
5, 69, 37, 69, 21, 37, 53, 69, 13, 77, 45, 77, 29, 45, 61, 77,
13, 21, 29, 37, 45, 53, 61, 69, 5, 9, 13, 17, 21, 25, 29, 33,
37, 41, 45, 49, 53, 57, 61, 65, 69, 73, 77, 81, 3, 67, 35, 67,
19, 83, 51, 83, 19, 35, 51, 67, 11, 75, 43, 75, 27, 43, 59, 75,
11, 19, 27, 35, 43, 51, 59, 67, 75, 83, 7, 71, 39, 71, 23, 39,
55, 71, 15, 79, 47, 79, 31, 47, 63, 79, 15, 23, 31, 39, 47, 55,
63, 71, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59,
63, 67, 71, 75, 79, 83, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21,
23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53,
55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79, 81, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82
};
unsigned index, i, j;
unsigned step = r * 2;
unsigned i;
int32_t c;
uint16_t t;
for (index = 0; index < sizeof(swap_table); index += 2) {
i = swap_table[index];
j = swap_table[index + 1];
compare_and_swap(x, i, j);
if (lo >= 84)
return;
if (step < (hi - lo)) {
if ((step * 2) >= (hi - lo) && hi < 84) {
// The next recursion down is a leaf, so unroll a little.
compare_and_swap(x, lo, lo + step);
compare_and_swap(x, lo + r, lo + r + step);
compare_and_swap(x, lo + r, lo + step);
return;
}
oddeven_merge(x, lo, hi, step);
oddeven_merge(x, lo + r, hi, step);
for (i = lo + r; i < (hi - r) && (i + r) < 84; i += step) {
compare_and_swap(x, i, i + r);
}
} else if ((lo + r) < 84) {
compare_and_swap(x, lo, lo + r);
}
}
static void oddeven_merge_sort_range(uint16_t *x, unsigned lo, unsigned hi)
{
if (lo == hi || lo >= 84)
return;
unsigned mid = lo + ((hi - lo) / 2);
if ((hi - lo) == 3 && hi < 84) {
// Optimization for sub lists of size 4. Unroll the comparisons.
int32_t c;
uint16_t t;
compare_and_swap(x, lo , lo + 1);
compare_and_swap(x, lo + 2, lo + 3);
compare_and_swap(x, lo , lo + 2);
compare_and_swap(x, lo + 1, lo + 3);
compare_and_swap(x, lo + 1, lo + 2);
return;
}
oddeven_merge_sort_range(x, lo, mid);
oddeven_merge_sort_range(x, mid + 1, hi);
oddeven_merge(x, lo, hi, 1);
}
static void batcher84(uint16_t *x)
{
// Batcher sort is defined over a power of two list size but 84
// is not a power of two. Round up to the next power of two and
// then ignore any swap with an index that is out of range.
oddeven_merge_sort_range(x, 0, 127);
}
static int discardtopoly(uint16_t *x)
{