qse/tools/uni-trait.c

283 lines
5.7 KiB
C
Raw Normal View History

#include <qse/types.h>
#include <locale.h>
#include <wchar.h>
#include <wctype.h>
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#if QSE_SIZEOF_WCHAR_T == QSE_SIZEOF_SHORT
#define MAX_CHAR 0xFFFF
#else
/*#define MAX_CHAR 0xE01EF*/
#define MAX_CHAR 0x10FFFF
#endif
#define TRAIT_PAGE_SIZE 256
#define MAX_TRAIT_PAGE_COUNT ((MAX_CHAR + TRAIT_PAGE_SIZE) / TRAIT_PAGE_SIZE)
typedef struct trait_page_t trait_page_t;
struct trait_page_t
{
size_t no;
short traits[TRAIT_PAGE_SIZE];
trait_page_t* next;
};
size_t trait_page_count = 0;
trait_page_t* trait_pages = NULL;
size_t trait_map_count = 0;
trait_page_t* trait_maps[MAX_TRAIT_PAGE_COUNT];
enum
{
TRAIT_UPPER = (1 << 0),
TRAIT_LOWER = (1 << 1),
TRAIT_ALPHA = (1 << 2),
TRAIT_DIGIT = (1 << 3),
TRAIT_XDIGIT = (1 << 4),
TRAIT_ALNUM = (1 << 5),
TRAIT_SPACE = (1 << 6),
TRAIT_PRINT = (1 << 8),
TRAIT_GRAPH = (1 << 9),
TRAIT_CNTRL = (1 << 10),
TRAIT_PUNCT = (1 << 11),
TRAIT_BLANK = (1 << 12)
};
int get_trait (qse_wcint_t code)
{
int trait = 0;
if (iswupper(code)) trait |= TRAIT_UPPER;
if (iswlower(code)) trait |= TRAIT_LOWER;
if (iswalpha(code)) trait |= TRAIT_ALPHA;
if (iswdigit(code)) trait |= TRAIT_DIGIT;
if (iswxdigit(code)) trait |= TRAIT_XDIGIT;
if (iswalnum(code)) trait |= TRAIT_ALNUM;
if (iswspace(code)) trait |= TRAIT_SPACE;
if (iswprint(code)) trait |= TRAIT_PRINT;
if (iswgraph(code)) trait |= TRAIT_GRAPH;
if (iswcntrl(code)) trait |= TRAIT_CNTRL;
if (iswpunct(code)) trait |= TRAIT_PUNCT;
if (iswblank(code)) trait |= TRAIT_BLANK;
/*
if (iswascii(code)) trait |= TRAIT_ASCII;
if (isphonogram(code)) trait |= TRAIT_PHONO;
if (isideogram(code)) trait |= TRAIT_IDEOG;
if (isenglish(code)) trait |= TRAIT_ENGLI;
*/
return trait;
}
void make_trait_page (qse_wcint_t start, qse_wcint_t end)
{
qse_wcint_t code;
size_t i;
short traits[TRAIT_PAGE_SIZE];
trait_page_t* page;
memset (traits, 0, sizeof(traits));
for (code = start; code <= end; code++) {
traits[code - start] = get_trait(code);
}
for (page = trait_pages; page != NULL; page = page->next) {
if (memcmp (traits, page->traits, sizeof(traits)) == 0) {
trait_maps[trait_map_count++] = page;
return;
}
}
page = (trait_page_t*)malloc (sizeof(trait_page_t));
page->no = trait_page_count++;
memcpy (page->traits, traits, sizeof(traits));
page->next = trait_pages;
trait_pages = page;
trait_maps[trait_map_count++] = page;
}
void emit_trait_page (trait_page_t* page)
{
size_t i;
int trait, need_or;
printf ("static qse_uint16_t trait_page_%04X[%u] =\n{\n",
(unsigned int)page->no, (unsigned int)TRAIT_PAGE_SIZE);
for (i = 0; i < TRAIT_PAGE_SIZE; i++) {
need_or = 0;
trait = page->traits[i];
if (i != 0) printf (",\n");
printf ("\t");
if (trait == 0) {
printf ("0");
continue;
}
if (trait & TRAIT_UPPER) {
if (need_or) printf (" | ");
printf ("TRAIT_UPPER");
need_or = 1;
}
if (trait & TRAIT_LOWER) {
if (need_or) printf (" | ");
printf ("TRAIT_LOWER");
need_or = 1;
}
if (trait & TRAIT_ALPHA) {
if (need_or) printf (" | ");
printf ("TRAIT_ALPHA");
need_or = 1;
}
if (trait & TRAIT_DIGIT) {
if (need_or) printf (" | ");
printf ("TRAIT_DIGIT");
need_or = 1;
}
if (trait & TRAIT_XDIGIT) {
if (need_or) printf (" | ");
printf ("TRAIT_XDIGIT");
need_or = 1;
}
if (trait & TRAIT_ALNUM) {
if (need_or) printf (" | ");
printf ("TRAIT_ALNUM");
need_or = 1;
}
if (trait & TRAIT_SPACE) {
if (need_or) printf (" | ");
printf ("TRAIT_SPACE");
need_or = 1;
}
if (trait & TRAIT_PRINT) {
if (need_or) printf (" | ");
printf ("TRAIT_PRINT");
need_or = 1;
}
if (trait & TRAIT_GRAPH) {
if (need_or) printf (" | ");
printf ("TRAIT_GRAPH");
need_or = 1;
}
if (trait & TRAIT_CNTRL) {
if (need_or) printf (" | ");
printf ("TRAIT_CNTRL");
need_or = 1;
}
if (trait & TRAIT_PUNCT) {
if (need_or) printf (" | ");
printf ("TRAIT_PUNCT");
need_or = 1;
}
if (trait & TRAIT_BLANK) {
if (need_or) printf (" | ");
printf ("TRAIT_BLANK");
need_or = 1;
}
/*
if (trait & TRAIT_ASCII) {
if (need_or) printf (" | ");
printf ("TRAIT_ASCII");
need_or = 1;
}
if (trait & TRAIT_IDEOG) {
if (need_or) printf (" | ");
printf ("TRAIT_IDEOG");
need_or = 1;
}
if (trait & TRAIT_PHONO) {
if (need_or) printf (" | ");
printf ("TRAIT_PHONO");
need_or = 1;
}
if (trait & TRAIT_ENGLI) {
if (need_or) printf (" | ");
printf ("TRAIT_ENGLI");
need_or = 1;
}
*/
}
printf ("\n};\n");
}
void emit_trait_map ()
{
size_t i;
printf ("static qse_uint16_t* trait_map[%u] =\n{\n", (unsigned int)trait_map_count);
for (i = 0; i < trait_map_count; i++) {
if (i != 0) printf (",\n");
printf ("\t /* 0x%X-0x%X */ ",
i * TRAIT_PAGE_SIZE, (i + 1) * TRAIT_PAGE_SIZE - 1);
printf ("trait_page_%04X", trait_maps[i]->no);
}
printf ("\n};\n");
}
static void emit_trait_macros (void)
{
printf ("/* generated by tools/uni-trait.c */\n\n");
printf ("#define TRAIT_MAX 0x%lX\n", (unsigned long)MAX_CHAR);
printf ("\n");
}
int main ()
{
qse_wcint_t code;
trait_page_t* page;
char* locale;
locale = setlocale (LC_ALL, "");
if (locale == NULL ||
(strstr(locale, ".utf8") == NULL && strstr(locale, ".UTF8") == NULL &&
strstr(locale, ".utf-8") == NULL && strstr(locale, ".UTF-8") == NULL)) {
fprintf (stderr, "error: the locale should be utf-8 compatible\n");
return -1;
}
for (code = 0; code < MAX_CHAR; code += TRAIT_PAGE_SIZE) {
make_trait_page (code, code + TRAIT_PAGE_SIZE - 1);
}
emit_trait_macros ();
for (page = trait_pages; page != NULL; page = page->next) {
emit_trait_page (page);
printf ("\n");
}
emit_trait_map ();
return 0;
}