mirror of
https://github.com/taigrr/arduinolibs
synced 2025-01-18 04:33:12 -08:00
236 lines
7.4 KiB
C
236 lines
7.4 KiB
C
/*
|
|
* Copyright (C) 2016 Southern Storm Software, Pty Ltd.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included
|
|
* in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
// Generates the Terminal::isWideCharacter() function from the data at:
|
|
// http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
|
|
// http://www.unicode.org/reports/tr11/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#define MAX_UNICODE 0x10FFFF
|
|
#define NUM_UNICODE (MAX_UNICODE + 1)
|
|
|
|
static unsigned char *masks;
|
|
|
|
static void mark_range(long code1, long code2)
|
|
{
|
|
while (code1 <= code2) {
|
|
if (code1 > MAX_UNICODE)
|
|
break;
|
|
masks[code1 / 8] |= (1 << (code1 % 8));
|
|
++code1;
|
|
}
|
|
}
|
|
|
|
static void unmark_range(long code1, long code2)
|
|
{
|
|
while (code1 <= code2) {
|
|
if (code1 > MAX_UNICODE)
|
|
break;
|
|
masks[code1 / 8] &= ~(1 << (code1 % 8));
|
|
++code1;
|
|
}
|
|
}
|
|
|
|
static void dump_ranges(void)
|
|
{
|
|
long code;
|
|
int index, sum;
|
|
unsigned char *prevptr = 0;
|
|
unsigned char *ptr;
|
|
int dotdot = 0;
|
|
for (code = 0; code <= MAX_UNICODE; code += 0x100) {
|
|
ptr = masks + (code / 8);
|
|
sum = 0;
|
|
for (index = 0; index < 32; ++index)
|
|
sum += ptr[index];
|
|
if (sum == 0 || sum == (0xFF * 32)) {
|
|
if (prevptr && !memcmp(ptr, prevptr, 32)) {
|
|
if (!dotdot) {
|
|
dotdot = 1;
|
|
printf("..\n");
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
dotdot = 0;
|
|
printf("%06lX: ", code);
|
|
for (index = 0; index < 32; ++index)
|
|
printf("%02X", ptr[index]);
|
|
printf("\n");
|
|
prevptr = ptr;
|
|
}
|
|
printf("\n");
|
|
}
|
|
|
|
static void print_lookup_table(const char *name, long first, long last)
|
|
{
|
|
long index, size;
|
|
unsigned char *ptr = masks + first / 8;
|
|
size = (last - first + 1) / 8;
|
|
printf(" static unsigned char const %s[%ld] PROGMEM = {\n", name, size);
|
|
for (index = 0; index < size; ++index) {
|
|
if ((index % 8) == 0)
|
|
printf(" ");
|
|
printf("0x%02X", ptr[index]);
|
|
if (index < (size - 1)) {
|
|
if ((index % 8) == 7)
|
|
printf(",\n");
|
|
else
|
|
printf(", ");
|
|
} else {
|
|
printf("\n");
|
|
}
|
|
}
|
|
printf(" };\n");
|
|
}
|
|
|
|
static void recognizer(void)
|
|
{
|
|
long code;
|
|
int first = 1;
|
|
|
|
printf("bool Terminal::isWideCharacter(long code)\n{\n");
|
|
printf(" // This function was automatically generated by genwcwidth.c\n");
|
|
print_lookup_table("range3000", 0x3000, 0x30FF);
|
|
print_lookup_table("rangeFE00", 0xFE00, 0xFFFF);
|
|
printf(" unsigned c;\n");
|
|
|
|
// Bail out early for Latin character sets.
|
|
printf(" if (code < 0x2300) {\n");
|
|
printf(" return false;\n");
|
|
|
|
// Densely populated ranges.
|
|
printf(" } else if (code >= 0x3000 && code <= 0x30FF) {\n");
|
|
printf(" c = (unsigned)(code - 0x3000);\n");
|
|
printf(" return (pgm_read_byte(range3000 + (c / 8)) & (1 << (c %% 8))) != 0;\n");
|
|
printf(" } else if (code >= 0xFE00 && code <= 0xFFFF) {\n");
|
|
printf(" c = (unsigned)(code - 0xFE00);\n");
|
|
printf(" return (pgm_read_byte(rangeFE00 + (c / 8)) & (1 << (c %% 8))) != 0;\n");
|
|
|
|
// Deal with the main wide character ranges.
|
|
printf(" } else if (code >= 0x3400 && code <= 0x4DBF) {\n");
|
|
printf(" return true;\n");
|
|
printf(" } else if (code >= 0x4E00 && code <= 0x9FFF) {\n");
|
|
printf(" return true;\n");
|
|
printf(" } else if (code >= 0xF900 && code <= 0xFAFF) {\n");
|
|
printf(" return true;\n");
|
|
printf(" } else if (code >= 0x20000 && code <= 0x2FFFD) {\n");
|
|
printf(" return true;\n");
|
|
printf(" } else if (code >= 0x30000 && code <= 0x3FFFD) {\n");
|
|
printf(" return true;\n");
|
|
printf(" } else if (");
|
|
|
|
// Deal with the left-overs.
|
|
unmark_range(0x3000, 0x30FF);
|
|
unmark_range(0xFE00, 0xFFFF);
|
|
for (code = 0; code <= MAX_UNICODE; ++code) {
|
|
if (masks[code / 8] & (1 << (code % 8))) {
|
|
if (!first)
|
|
printf(" ||\n ");
|
|
else
|
|
first = 0;
|
|
printf("code == 0x%04lX", code);
|
|
}
|
|
}
|
|
printf(") {\n");
|
|
printf(" return true;\n");
|
|
printf(" }\n");
|
|
|
|
printf(" return false;\n");
|
|
printf("}\n");
|
|
}
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
FILE *file;
|
|
char buffer[BUFSIZ];
|
|
|
|
// Allocate memory for the "is this a wide character?" mask array.
|
|
masks = calloc(NUM_UNICODE / 8, sizeof(unsigned char));
|
|
if (!masks) {
|
|
fprintf(stderr, "out of memory\n");
|
|
return 1;
|
|
}
|
|
|
|
// Load the contents of "EastAsianWidth.txt".
|
|
if (argc < 2) {
|
|
fprintf(stderr, "Usage: %s EastAsianWidth.txt\n", argv[0]);
|
|
return 1;
|
|
}
|
|
if ((file = fopen(argv[1], "r")) == NULL) {
|
|
perror(argv[1]);
|
|
return 1;
|
|
}
|
|
while (fgets(buffer, sizeof(buffer), file)) {
|
|
if ((buffer[0] >= '0' && buffer[0] <= '9') ||
|
|
(buffer[0] >= 'A' && buffer[0] <= 'F')) {
|
|
long code1 = 0;
|
|
long code2 = 0;
|
|
char *endptr = NULL;
|
|
code1 = strtol(buffer, &endptr, 16);
|
|
if (endptr[0] == '.' && endptr[1] == '.') {
|
|
endptr += 2;
|
|
code2 = strtol(buffer, &endptr, 16);
|
|
} else {
|
|
code2 = code1;
|
|
}
|
|
if (endptr[0] == ';') {
|
|
// Recognise 'W' and 'F' as wide characters. It is possible
|
|
// that 'A' (ambiguous) characters may also be wide but only
|
|
// in East Asian contexts, which we assume we're not for now.
|
|
if (endptr[1] == 'W' || endptr[1] == 'F') {
|
|
mark_range(code1, code2);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
fclose(file);
|
|
|
|
// Some special ranges that are implicitly all-wide even if the
|
|
// code points aren't currently allocated by the Unicode standard.
|
|
mark_range(0x3400, 0x4DBF);
|
|
mark_range(0x4E00, 0x9FFF);
|
|
mark_range(0xF900, 0xFAFF);
|
|
mark_range(0x20000, 0x2FFFD);
|
|
mark_range(0x30000, 0x3FFFD);
|
|
|
|
// Dump the ranges.
|
|
dump_ranges();
|
|
|
|
// Unmark the special ranges to make it easier to find the left-overs.
|
|
unmark_range(0x3400, 0x4DBF);
|
|
unmark_range(0x4E00, 0x9FFF);
|
|
unmark_range(0xF900, 0xFAFF);
|
|
unmark_range(0x20000, 0x2FFFD);
|
|
unmark_range(0x30000, 0x3FFFD);
|
|
|
|
// Create the recognition tree.
|
|
recognizer();
|
|
|
|
// Clean up and exit.
|
|
free(masks);
|
|
return 0;
|
|
}
|