qse/tools/uni-case.c

178 lines
3.9 KiB
C
Raw Permalink Normal View History

#include <qse/types.h>
#include <locale.h>
#include <wchar.h>
#include <wctype.h>
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#if QSE_SIZEOF_WCHAR_T == QSE_SIZEOF_SHORT
#define MAX_CHAR 0xFFFF
#else
/*#define MAX_CHAR 0xE01EF*/
#define MAX_CHAR 0x10FFFF
#endif
#define CASE_PAGE_SIZE 512
#define MAX_CASE_PAGE_COUNT ((MAX_CHAR + CASE_PAGE_SIZE) / CASE_PAGE_SIZE)
/*
* short is enough as the diff does not exceed
* the maixmum value of the short type.
*/
typedef long int wcdiff_t;
typedef struct case_page_t case_page_t;
struct case_page_t
{
size_t no;
wcdiff_t upper[CASE_PAGE_SIZE];
wcdiff_t lower[CASE_PAGE_SIZE];
case_page_t* next;
};
size_t case_page_count = 0;
case_page_t* case_pages = NULL;
size_t case_map_count = 0;
case_page_t* case_maps[MAX_CASE_PAGE_COUNT];
void make_case_page (qse_wcint_t start, qse_wcint_t end)
{
qse_wcint_t code, c;
size_t i;
wcdiff_t upper[CASE_PAGE_SIZE];
wcdiff_t lower[CASE_PAGE_SIZE];
case_page_t* page;
memset (upper, 0, sizeof(upper));
memset (lower, 0, sizeof(lower));
for (code = start; code <= end; code++)
{
c = code - start;
upper[c] = (wcdiff_t)code - (wcdiff_t)towupper(code);
lower[c] = (wcdiff_t)towlower(code) - (wcdiff_t)code;
}
for (page = case_pages; page != NULL; page = page->next)
{
if (memcmp (upper, page->upper, sizeof(upper)) == 0 &&
memcmp (lower, page->lower, sizeof(lower)) == 0)
{
case_maps[case_map_count++] = page;
return;
}
}
page = (case_page_t*)malloc (sizeof(case_page_t));
page->no = case_page_count++;
memcpy (page->upper, upper, sizeof(upper));
memcpy (page->lower, lower, sizeof(lower));
page->next = case_pages;
case_pages = page;
case_maps[case_map_count++] = page;
}
void emit_case_page (case_page_t* page, int page_seq)
{
size_t i;
wcdiff_t upper, lower;
int need_or;
qse_wcint_t c;
printf ("static case_page_t case_page_%04X[%u] =\n{\n",
(unsigned int)page->no, (unsigned int)CASE_PAGE_SIZE);
for (i = 0; i < CASE_PAGE_SIZE; i++)
{
need_or = 0;
upper = page->upper[i];
lower = page->lower[i];
if (i != 0) printf (",\n");
printf ("\t");
if (upper > SHRT_MAX || upper < SHRT_MIN ||
lower > SHRT_MAX || lower < SHRT_MIN)
{
fprintf (stderr, "WARNING: page %u, index %u: value out of range - upper %ld lower %ld\n",
(unsigned int)page->no, (unsigned int)i, (long int)upper, (long int)lower);
}
printf ("{%ld, %ld}", (long int)upper, (long int)lower);
}
printf ("\n};\n");
}
void emit_case_map ()
{
size_t i;
printf ("static case_page_t* case_map[%u] =\n{\n", (unsigned int)case_map_count);
for (i = 0; i < case_map_count; i++) {
if (i != 0) printf (",\n");
printf ("\t /* 0x%X-0x%X */ ",
i * CASE_PAGE_SIZE, (i + 1) * CASE_PAGE_SIZE - 1);
printf ("case_page_%04X", case_maps[i]->no);
}
printf ("\n};\n");
}
static void emit_case_macros (void)
{
printf ("/* generated by tools/uni-case.c */\n\n");
printf ("#define CASE_MAX 0x%lX\n\n", (unsigned long)MAX_CHAR);
printf ("typedef struct case_page_t case_page_t;\n\n");
printf ("struct case_page_t {\n");
/* use a 16-bit integer to use less memory. */
printf (" qse_int16_t upper;\n");
printf (" qse_int16_t lower;\n");
printf ("};\n\n");
printf ("\n");
}
int main ()
{
qse_wcint_t code;
case_page_t* page;
char* locale;
int page_seq = 0;
locale = setlocale (LC_ALL, "");
if (locale == NULL ||
(strstr(locale, ".utf8") == NULL && strstr(locale, ".UTF8") == NULL &&
strstr(locale, ".utf-8") == NULL && strstr(locale, ".UTF-8") == NULL))
{
fprintf (stderr, "error: the locale should be utf-8 compatible\n");
return -1;
}
for (code = 0; code < MAX_CHAR; code += CASE_PAGE_SIZE)
{
make_case_page (code, code + CASE_PAGE_SIZE - 1);
}
emit_case_macros ();
for (page = case_pages; page != NULL; page = page->next)
{
emit_case_page (page, page_seq);
printf ("\n");
page_seq++;
}
emit_case_map ();
return 0;
}