#include #include #include #include #include #include #include #include #include #if HAWK_SIZEOF_WCHAR_T == HAWK_SIZEOF_INT16_T #define MAX_CHAR 0xFFFF #else /*#define MAX_CHAR 0xE01EF*/ #define MAX_CHAR 0x10FFFF #endif #define CASE_PAGE_SIZE 512 #define MAX_CASE_PAGE_COUNT ((MAX_CHAR + CASE_PAGE_SIZE) / CASE_PAGE_SIZE) /* * short is enough as the diff does not exceed * the maixmum value of the short type. */ typedef long int wcdiff_t; typedef struct case_page_t case_page_t; struct case_page_t { size_t no; wcdiff_t upper[CASE_PAGE_SIZE]; wcdiff_t lower[CASE_PAGE_SIZE]; case_page_t* next; }; size_t case_page_count = 0; case_page_t* case_pages = NULL; size_t case_map_count = 0; case_page_t* case_maps[MAX_CASE_PAGE_COUNT]; void make_case_page (hawk_uci_t start, hawk_uci_t end) { hawk_uci_t code, c; wcdiff_t upper[CASE_PAGE_SIZE]; wcdiff_t lower[CASE_PAGE_SIZE]; case_page_t* page; memset (upper, 0, sizeof(upper)); memset (lower, 0, sizeof(lower)); for (code = start; code <= end; code++) { c = code - start; upper[c] = (wcdiff_t)code - (wcdiff_t)towupper(code); lower[c] = (wcdiff_t)towlower(code) - (wcdiff_t)code; } for (page = case_pages; page != NULL; page = page->next) { if (memcmp(upper, page->upper, sizeof(upper)) == 0 && memcmp(lower, page->lower, sizeof(lower)) == 0) { case_maps[case_map_count++] = page; return; } } page = (case_page_t*)malloc(sizeof(case_page_t)); page->no = case_page_count++; memcpy (page->upper, upper, sizeof(upper)); memcpy (page->lower, lower, sizeof(lower)); page->next = case_pages; case_pages = page; case_maps[case_map_count++] = page; } void emit_case_page (case_page_t* page, int page_seq) { size_t i; wcdiff_t upper, lower; printf ("static uch_case_page_t uch_case_page_%04X[%u] =\n{\n", (unsigned int)page->no, (unsigned int)CASE_PAGE_SIZE); for (i = 0; i < CASE_PAGE_SIZE; i++) { upper = page->upper[i]; lower = page->lower[i]; if (i != 0) printf (",\n"); printf ("\t"); #if 0 if (upper > SHRT_MAX || upper < SHRT_MIN || lower > SHRT_MAX || lower < SHRT_MIN) { fprintf (stderr, "WARNING: page %u, index %u: value out of range - upper %ld lower %ld\n", (unsigned int)page->no, (unsigned int)i, (long int)upper, (long int)lower); } #endif printf ("{%ld, %ld}", (long int)upper, (long int)lower); } printf ("\n};\n"); } void emit_case_map () { size_t i; printf ("static uch_case_page_t* uch_case_map[%u] =\n{\n", (unsigned int)case_map_count); for (i = 0; i < case_map_count; i++) { if (i != 0) printf (",\n"); printf ("\t /* 0x%lX-0x%lX */ ", (unsigned long int)(i * CASE_PAGE_SIZE), (unsigned long int)((i + 1) * CASE_PAGE_SIZE - 1)); printf ("uch_case_page_%04X", (int)case_maps[i]->no); } printf ("\n};\n"); } static void emit_case_macros (void) { printf ("/* generated by tools/uni-case.c */\n\n"); printf ("#define UCH_CASE_MAX 0x%lX\n\n", (unsigned long)MAX_CHAR); printf ("typedef struct uch_case_page_t uch_case_page_t;\n\n"); printf ("struct uch_case_page_t\n"); printf ("{\n"); printf (" hawk_int32_t upper;\n"); printf (" hawk_int32_t lower;\n"); printf ("};\n\n"); printf ("\n"); } int main () { hawk_uci_t code; case_page_t* page; char* locale; int page_seq = 0; locale = setlocale (LC_ALL, ""); if (locale == NULL || (strstr(locale, ".utf8") == NULL && strstr(locale, ".UTF8") == NULL && strstr(locale, ".utf-8") == NULL && strstr(locale, ".UTF-8") == NULL)) { fprintf (stderr, "error: the locale should be utf-8 compatible\n"); return -1; } for (code = 0; code < MAX_CHAR; code += CASE_PAGE_SIZE) { make_case_page (code, code + CASE_PAGE_SIZE - 1); } emit_case_macros (); for (page = case_pages; page != NULL; page = page->next) { emit_case_page (page, page_seq); printf ("\n"); page_seq++; } emit_case_map (); return 0; }