hawk/tools/uni-prop.c

283 lines
6.0 KiB
C

#include <hawk-cmn.h>
#include <locale.h>
#include <wchar.h>
#include <wctype.h>
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#if HAWK_SIZEOF_UCH_T == HAWK_SIZEOF_INT16_T
#define MAX_CHAR 0xFFFF
#else
/*#define MAX_CHAR 0xE01EF*/
#define MAX_CHAR 0x10FFFF
#endif
#define UCH_PROP_PAGE_SIZE 256
#define MAX_UCH_PROP_PAGE_COUNT ((MAX_CHAR + UCH_PROP_PAGE_SIZE) / UCH_PROP_PAGE_SIZE)
typedef struct prop_page_t prop_page_t;
struct prop_page_t
{
size_t no;
hawk_uint16_t props[UCH_PROP_PAGE_SIZE];
prop_page_t* next;
};
size_t prop_page_count = 0;
prop_page_t* prop_pages = NULL;
size_t prop_map_count = 0;
prop_page_t* prop_maps[MAX_UCH_PROP_PAGE_COUNT];
enum
{
UCH_PROP_UPPER = (1 << 0),
UCH_PROP_LOWER = (1 << 1),
UCH_PROP_ALPHA = (1 << 2),
UCH_PROP_DIGIT = (1 << 3),
UCH_PROP_XDIGIT = (1 << 4),
UCH_PROP_ALNUM = (1 << 5),
UCH_PROP_SPACE = (1 << 6),
UCH_PROP_PRINT = (1 << 8),
UCH_PROP_GRAPH = (1 << 9),
UCH_PROP_CNTRL = (1 << 10),
UCH_PROP_PUNCT = (1 << 11),
UCH_PROP_BLANK = (1 << 12)
};
int get_prop (hawk_uci_t code)
{
int prop = 0;
if (iswupper(code)) prop |= UCH_PROP_UPPER;
if (iswlower(code)) prop |= UCH_PROP_LOWER;
if (iswalpha(code)) prop |= UCH_PROP_ALPHA;
if (iswdigit(code)) prop |= UCH_PROP_DIGIT;
if (iswxdigit(code)) prop |= UCH_PROP_XDIGIT;
if (iswalnum(code)) prop |= UCH_PROP_ALNUM;
if (iswspace(code)) prop |= UCH_PROP_SPACE;
if (iswprint(code)) prop |= UCH_PROP_PRINT;
if (iswgraph(code)) prop |= UCH_PROP_GRAPH;
if (iswcntrl(code)) prop |= UCH_PROP_CNTRL;
if (iswpunct(code)) prop |= UCH_PROP_PUNCT;
if (iswblank(code)) prop |= UCH_PROP_BLANK;
/*
if (iswascii(code)) prop |= UCH_PROP_ASCII;
if (isphonogram(code)) prop |= UCH_PROP_PHONO;
if (isideogram(code)) prop |= UCH_PROP_IDEOG;
if (isenglish(code)) prop |= UCH_PROP_ENGLI;
*/
return prop;
}
void make_prop_page (hawk_uci_t start, hawk_uci_t end)
{
hawk_uci_t code;
hawk_uint16_t props[UCH_PROP_PAGE_SIZE];
prop_page_t* page;
memset (props, 0, sizeof(props));
for (code = start; code <= end; code++) {
props[code - start] = get_prop(code);
}
for (page = prop_pages; page != NULL; page = page->next) {
if (memcmp (props, page->props, sizeof(props)) == 0) {
prop_maps[prop_map_count++] = page;
return;
}
}
page = (prop_page_t*)malloc (sizeof(prop_page_t));
page->no = prop_page_count++;
memcpy (page->props, props, sizeof(props));
page->next = prop_pages;
prop_pages = page;
prop_maps[prop_map_count++] = page;
}
void emit_prop_page (prop_page_t* page)
{
size_t i;
int prop, need_or;
printf ("static hawk_uint16_t uch_prop_page_%04X[%u] =\n{\n",
(unsigned int)page->no, (unsigned int)UCH_PROP_PAGE_SIZE);
for (i = 0; i < UCH_PROP_PAGE_SIZE; i++) {
need_or = 0;
prop = page->props[i];
if (i != 0) printf (",\n");
printf ("\t");
if (prop == 0) {
printf ("0");
continue;
}
if (prop & UCH_PROP_UPPER) {
if (need_or) printf (" | ");
printf ("HAWK_UCH_PROP_UPPER");
need_or = 1;
}
if (prop & UCH_PROP_LOWER) {
if (need_or) printf (" | ");
printf ("HAWK_UCH_PROP_LOWER");
need_or = 1;
}
if (prop & UCH_PROP_ALPHA) {
if (need_or) printf (" | ");
printf ("HAWK_UCH_PROP_ALPHA");
need_or = 1;
}
if (prop & UCH_PROP_DIGIT) {
if (need_or) printf (" | ");
printf ("HAWK_UCH_PROP_DIGIT");
need_or = 1;
}
if (prop & UCH_PROP_XDIGIT) {
if (need_or) printf (" | ");
printf ("HAWK_UCH_PROP_XDIGIT");
need_or = 1;
}
if (prop & UCH_PROP_ALNUM) {
if (need_or) printf (" | ");
printf ("HAWK_UCH_PROP_ALNUM");
need_or = 1;
}
if (prop & UCH_PROP_SPACE) {
if (need_or) printf (" | ");
printf ("HAWK_UCH_PROP_SPACE");
need_or = 1;
}
if (prop & UCH_PROP_PRINT) {
if (need_or) printf (" | ");
printf ("HAWK_UCH_PROP_PRINT");
need_or = 1;
}
if (prop & UCH_PROP_GRAPH) {
if (need_or) printf (" | ");
printf ("HAWK_UCH_PROP_GRAPH");
need_or = 1;
}
if (prop & UCH_PROP_CNTRL) {
if (need_or) printf (" | ");
printf ("HAWK_UCH_PROP_CNTRL");
need_or = 1;
}
if (prop & UCH_PROP_PUNCT) {
if (need_or) printf (" | ");
printf ("HAWK_UCH_PROP_PUNCT");
need_or = 1;
}
if (prop & UCH_PROP_BLANK) {
if (need_or) printf (" | ");
printf ("HAWK_UCH_PROP_BLANK");
need_or = 1;
}
/*
if (prop & UCH_PROP_ASCII) {
if (need_or) printf (" | ");
printf ("HAWK_UCH_PROP_ASCII");
need_or = 1;
}
if (prop & UCH_PROP_IDEOG) {
if (need_or) printf (" | ");
printf ("HAWK_UCH_PROP_IDEOG");
need_or = 1;
}
if (prop & UCH_PROP_PHONO) {
if (need_or) printf (" | ");
printf ("HAWK_UCH_PROP_PHONO");
need_or = 1;
}
if (prop & UCH_PROP_ENGLI) {
if (need_or) printf (" | ");
printf ("HAWK_UCH_PROP_ENGLI");
need_or = 1;
}
*/
}
printf ("\n};\n");
}
void emit_prop_map ()
{
size_t i;
printf ("static hawk_uint16_t* uch_prop_map[%u] =\n{\n", (unsigned int)prop_map_count);
for (i = 0; i < prop_map_count; i++) {
if (i != 0) printf (",\n");
printf ("\t /* 0x%lX-0x%lX */ ",
(unsigned long int)(i * UCH_PROP_PAGE_SIZE),
(unsigned long int)((i + 1) * UCH_PROP_PAGE_SIZE - 1));
printf ("uch_prop_page_%04X", (int)prop_maps[i]->no);
}
printf ("\n};\n");
}
static void emit_prop_macros (void)
{
printf ("/* generated by tools/uni-prop.c */\n\n");
printf ("#define UCH_PROP_MAX 0x%lX\n", (unsigned long)MAX_CHAR);
printf ("\n");
}
int main ()
{
hawk_uci_t code;
prop_page_t* page;
char* locale;
locale = setlocale (LC_ALL, "");
if (locale == NULL ||
(strstr(locale, ".utf8") == NULL && strstr(locale, ".UTF8") == NULL &&
strstr(locale, ".utf-8") == NULL && strstr(locale, ".UTF-8") == NULL)) {
fprintf (stderr, "error: the locale should be utf-8 compatible\n");
return -1;
}
for (code = 0; code < MAX_CHAR; code += UCH_PROP_PAGE_SIZE) {
make_prop_page (code, code + UCH_PROP_PAGE_SIZE - 1);
}
emit_prop_macros ();
for (page = prop_pages; page != NULL; page = page->next) {
emit_prop_page (page);
printf ("\n");
}
emit_prop_map ();
return 0;
}