#include #include #include #include #include #include #include #include #if HAWK_SIZEOF_UCH_T == HAWK_SIZEOF_INT16_T #define MAX_CHAR 0xFFFF #else /*#define MAX_CHAR 0xE01EF*/ #define MAX_CHAR 0x10FFFF #endif #define UCH_PROP_PAGE_SIZE 256 #define MAX_UCH_PROP_PAGE_COUNT ((MAX_CHAR + UCH_PROP_PAGE_SIZE) / UCH_PROP_PAGE_SIZE) typedef struct prop_page_t prop_page_t; struct prop_page_t { size_t no; hawk_uint16_t props[UCH_PROP_PAGE_SIZE]; prop_page_t* next; }; size_t prop_page_count = 0; prop_page_t* prop_pages = NULL; size_t prop_map_count = 0; prop_page_t* prop_maps[MAX_UCH_PROP_PAGE_COUNT]; enum { UCH_PROP_UPPER = (1 << 0), UCH_PROP_LOWER = (1 << 1), UCH_PROP_ALPHA = (1 << 2), UCH_PROP_DIGIT = (1 << 3), UCH_PROP_XDIGIT = (1 << 4), UCH_PROP_ALNUM = (1 << 5), UCH_PROP_SPACE = (1 << 6), UCH_PROP_PRINT = (1 << 8), UCH_PROP_GRAPH = (1 << 9), UCH_PROP_CNTRL = (1 << 10), UCH_PROP_PUNCT = (1 << 11), UCH_PROP_BLANK = (1 << 12) }; int get_prop (hawk_uci_t code) { int prop = 0; if (iswupper(code)) prop |= UCH_PROP_UPPER; if (iswlower(code)) prop |= UCH_PROP_LOWER; if (iswalpha(code)) prop |= UCH_PROP_ALPHA; if (iswdigit(code)) prop |= UCH_PROP_DIGIT; if (iswxdigit(code)) prop |= UCH_PROP_XDIGIT; if (iswalnum(code)) prop |= UCH_PROP_ALNUM; if (iswspace(code)) prop |= UCH_PROP_SPACE; if (iswprint(code)) prop |= UCH_PROP_PRINT; if (iswgraph(code)) prop |= UCH_PROP_GRAPH; if (iswcntrl(code)) prop |= UCH_PROP_CNTRL; if (iswpunct(code)) prop |= UCH_PROP_PUNCT; if (iswblank(code)) prop |= UCH_PROP_BLANK; /* if (iswascii(code)) prop |= UCH_PROP_ASCII; if (isphonogram(code)) prop |= UCH_PROP_PHONO; if (isideogram(code)) prop |= UCH_PROP_IDEOG; if (isenglish(code)) prop |= UCH_PROP_ENGLI; */ return prop; } void make_prop_page (hawk_uci_t start, hawk_uci_t end) { hawk_uci_t code; hawk_uint16_t props[UCH_PROP_PAGE_SIZE]; prop_page_t* page; memset (props, 0, sizeof(props)); for (code = start; code <= end; code++) { props[code - start] = get_prop(code); } for (page = prop_pages; page != NULL; page = page->next) { if (memcmp (props, page->props, sizeof(props)) == 0) { prop_maps[prop_map_count++] = page; return; } } page = (prop_page_t*)malloc (sizeof(prop_page_t)); page->no = prop_page_count++; memcpy (page->props, props, sizeof(props)); page->next = prop_pages; prop_pages = page; prop_maps[prop_map_count++] = page; } void emit_prop_page (prop_page_t* page) { size_t i; int prop, need_or; printf ("static hawk_uint16_t uch_prop_page_%04X[%u] =\n{\n", (unsigned int)page->no, (unsigned int)UCH_PROP_PAGE_SIZE); for (i = 0; i < UCH_PROP_PAGE_SIZE; i++) { need_or = 0; prop = page->props[i]; if (i != 0) printf (",\n"); printf ("\t"); if (prop == 0) { printf ("0"); continue; } if (prop & UCH_PROP_UPPER) { if (need_or) printf (" | "); printf ("HAWK_UCH_PROP_UPPER"); need_or = 1; } if (prop & UCH_PROP_LOWER) { if (need_or) printf (" | "); printf ("HAWK_UCH_PROP_LOWER"); need_or = 1; } if (prop & UCH_PROP_ALPHA) { if (need_or) printf (" | "); printf ("HAWK_UCH_PROP_ALPHA"); need_or = 1; } if (prop & UCH_PROP_DIGIT) { if (need_or) printf (" | "); printf ("HAWK_UCH_PROP_DIGIT"); need_or = 1; } if (prop & UCH_PROP_XDIGIT) { if (need_or) printf (" | "); printf ("HAWK_UCH_PROP_XDIGIT"); need_or = 1; } if (prop & UCH_PROP_ALNUM) { if (need_or) printf (" | "); printf ("HAWK_UCH_PROP_ALNUM"); need_or = 1; } if (prop & UCH_PROP_SPACE) { if (need_or) printf (" | "); printf ("HAWK_UCH_PROP_SPACE"); need_or = 1; } if (prop & UCH_PROP_PRINT) { if (need_or) printf (" | "); printf ("HAWK_UCH_PROP_PRINT"); need_or = 1; } if (prop & UCH_PROP_GRAPH) { if (need_or) printf (" | "); printf ("HAWK_UCH_PROP_GRAPH"); need_or = 1; } if (prop & UCH_PROP_CNTRL) { if (need_or) printf (" | "); printf ("HAWK_UCH_PROP_CNTRL"); need_or = 1; } if (prop & UCH_PROP_PUNCT) { if (need_or) printf (" | "); printf ("HAWK_UCH_PROP_PUNCT"); need_or = 1; } if (prop & UCH_PROP_BLANK) { if (need_or) printf (" | "); printf ("HAWK_UCH_PROP_BLANK"); need_or = 1; } /* if (prop & UCH_PROP_ASCII) { if (need_or) printf (" | "); printf ("HAWK_UCH_PROP_ASCII"); need_or = 1; } if (prop & UCH_PROP_IDEOG) { if (need_or) printf (" | "); printf ("HAWK_UCH_PROP_IDEOG"); need_or = 1; } if (prop & UCH_PROP_PHONO) { if (need_or) printf (" | "); printf ("HAWK_UCH_PROP_PHONO"); need_or = 1; } if (prop & UCH_PROP_ENGLI) { if (need_or) printf (" | "); printf ("HAWK_UCH_PROP_ENGLI"); need_or = 1; } */ } printf ("\n};\n"); } void emit_prop_map () { size_t i; printf ("static hawk_uint16_t* uch_prop_map[%u] =\n{\n", (unsigned int)prop_map_count); for (i = 0; i < prop_map_count; i++) { if (i != 0) printf (",\n"); printf ("\t /* 0x%lX-0x%lX */ ", (unsigned long int)(i * UCH_PROP_PAGE_SIZE), (unsigned long int)((i + 1) * UCH_PROP_PAGE_SIZE - 1)); printf ("uch_prop_page_%04X", (int)prop_maps[i]->no); } printf ("\n};\n"); } static void emit_prop_macros (void) { printf ("/* generated by tools/uni-prop.c */\n\n"); printf ("#define UCH_PROP_MAX 0x%lX\n", (unsigned long)MAX_CHAR); printf ("\n"); } int main () { hawk_uci_t code; prop_page_t* page; char* locale; locale = setlocale (LC_ALL, ""); if (locale == NULL || (strstr(locale, ".utf8") == NULL && strstr(locale, ".UTF8") == NULL && strstr(locale, ".utf-8") == NULL && strstr(locale, ".UTF-8") == NULL)) { fprintf (stderr, "error: the locale should be utf-8 compatible\n"); return -1; } for (code = 0; code < MAX_CHAR; code += UCH_PROP_PAGE_SIZE) { make_prop_page (code, code + UCH_PROP_PAGE_SIZE - 1); } emit_prop_macros (); for (page = prop_pages; page != NULL; page = page->next) { emit_prop_page (page); printf ("\n"); } emit_prop_map (); return 0; }