283 lines
6.0 KiB
C
283 lines
6.0 KiB
C
#include <hawk-cmn.h>
|
|
#include <locale.h>
|
|
#include <wchar.h>
|
|
#include <wctype.h>
|
|
#include <stdio.h>
|
|
#include <stddef.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#if HAWK_SIZEOF_UCH_T == HAWK_SIZEOF_INT16_T
|
|
#define MAX_CHAR 0xFFFF
|
|
#else
|
|
/*#define MAX_CHAR 0xE01EF*/
|
|
#define MAX_CHAR 0x10FFFF
|
|
#endif
|
|
|
|
#define UCH_PROP_PAGE_SIZE 256
|
|
#define MAX_UCH_PROP_PAGE_COUNT ((MAX_CHAR + UCH_PROP_PAGE_SIZE) / UCH_PROP_PAGE_SIZE)
|
|
|
|
typedef struct prop_page_t prop_page_t;
|
|
struct prop_page_t
|
|
{
|
|
size_t no;
|
|
hawk_uint16_t props[UCH_PROP_PAGE_SIZE];
|
|
prop_page_t* next;
|
|
};
|
|
|
|
size_t prop_page_count = 0;
|
|
prop_page_t* prop_pages = NULL;
|
|
|
|
size_t prop_map_count = 0;
|
|
prop_page_t* prop_maps[MAX_UCH_PROP_PAGE_COUNT];
|
|
|
|
enum
|
|
{
|
|
UCH_PROP_UPPER = (1 << 0),
|
|
UCH_PROP_LOWER = (1 << 1),
|
|
UCH_PROP_ALPHA = (1 << 2),
|
|
UCH_PROP_DIGIT = (1 << 3),
|
|
UCH_PROP_XDIGIT = (1 << 4),
|
|
UCH_PROP_ALNUM = (1 << 5),
|
|
UCH_PROP_SPACE = (1 << 6),
|
|
UCH_PROP_PRINT = (1 << 8),
|
|
UCH_PROP_GRAPH = (1 << 9),
|
|
UCH_PROP_CNTRL = (1 << 10),
|
|
UCH_PROP_PUNCT = (1 << 11),
|
|
UCH_PROP_BLANK = (1 << 12)
|
|
};
|
|
|
|
int get_prop (hawk_uci_t code)
|
|
{
|
|
int prop = 0;
|
|
|
|
if (iswupper(code)) prop |= UCH_PROP_UPPER;
|
|
if (iswlower(code)) prop |= UCH_PROP_LOWER;
|
|
if (iswalpha(code)) prop |= UCH_PROP_ALPHA;
|
|
if (iswdigit(code)) prop |= UCH_PROP_DIGIT;
|
|
if (iswxdigit(code)) prop |= UCH_PROP_XDIGIT;
|
|
if (iswalnum(code)) prop |= UCH_PROP_ALNUM;
|
|
if (iswspace(code)) prop |= UCH_PROP_SPACE;
|
|
if (iswprint(code)) prop |= UCH_PROP_PRINT;
|
|
if (iswgraph(code)) prop |= UCH_PROP_GRAPH;
|
|
if (iswcntrl(code)) prop |= UCH_PROP_CNTRL;
|
|
if (iswpunct(code)) prop |= UCH_PROP_PUNCT;
|
|
if (iswblank(code)) prop |= UCH_PROP_BLANK;
|
|
/*
|
|
if (iswascii(code)) prop |= UCH_PROP_ASCII;
|
|
if (isphonogram(code)) prop |= UCH_PROP_PHONO;
|
|
if (isideogram(code)) prop |= UCH_PROP_IDEOG;
|
|
if (isenglish(code)) prop |= UCH_PROP_ENGLI;
|
|
*/
|
|
|
|
return prop;
|
|
}
|
|
|
|
void make_prop_page (hawk_uci_t start, hawk_uci_t end)
|
|
{
|
|
hawk_uci_t code;
|
|
hawk_uint16_t props[UCH_PROP_PAGE_SIZE];
|
|
prop_page_t* page;
|
|
|
|
memset (props, 0, sizeof(props));
|
|
for (code = start; code <= end; code++) {
|
|
props[code - start] = get_prop(code);
|
|
}
|
|
|
|
for (page = prop_pages; page != NULL; page = page->next) {
|
|
if (memcmp (props, page->props, sizeof(props)) == 0) {
|
|
prop_maps[prop_map_count++] = page;
|
|
return;
|
|
}
|
|
}
|
|
|
|
page = (prop_page_t*)malloc (sizeof(prop_page_t));
|
|
page->no = prop_page_count++;
|
|
memcpy (page->props, props, sizeof(props));
|
|
page->next = prop_pages;
|
|
|
|
prop_pages = page;
|
|
prop_maps[prop_map_count++] = page;
|
|
}
|
|
|
|
void emit_prop_page (prop_page_t* page)
|
|
{
|
|
size_t i;
|
|
int prop, need_or;
|
|
|
|
printf ("static hawk_uint16_t uch_prop_page_%04X[%u] =\n{\n",
|
|
(unsigned int)page->no, (unsigned int)UCH_PROP_PAGE_SIZE);
|
|
|
|
for (i = 0; i < UCH_PROP_PAGE_SIZE; i++) {
|
|
|
|
need_or = 0;
|
|
prop = page->props[i];
|
|
|
|
if (i != 0) printf (",\n");
|
|
printf ("\t");
|
|
|
|
if (prop == 0) {
|
|
printf ("0");
|
|
continue;
|
|
}
|
|
|
|
if (prop & UCH_PROP_UPPER) {
|
|
if (need_or) printf (" | ");
|
|
printf ("HAWK_UCH_PROP_UPPER");
|
|
need_or = 1;
|
|
}
|
|
|
|
if (prop & UCH_PROP_LOWER) {
|
|
if (need_or) printf (" | ");
|
|
printf ("HAWK_UCH_PROP_LOWER");
|
|
need_or = 1;
|
|
}
|
|
|
|
if (prop & UCH_PROP_ALPHA) {
|
|
if (need_or) printf (" | ");
|
|
printf ("HAWK_UCH_PROP_ALPHA");
|
|
need_or = 1;
|
|
}
|
|
|
|
if (prop & UCH_PROP_DIGIT) {
|
|
if (need_or) printf (" | ");
|
|
printf ("HAWK_UCH_PROP_DIGIT");
|
|
need_or = 1;
|
|
}
|
|
|
|
if (prop & UCH_PROP_XDIGIT) {
|
|
if (need_or) printf (" | ");
|
|
printf ("HAWK_UCH_PROP_XDIGIT");
|
|
need_or = 1;
|
|
}
|
|
|
|
if (prop & UCH_PROP_ALNUM) {
|
|
if (need_or) printf (" | ");
|
|
printf ("HAWK_UCH_PROP_ALNUM");
|
|
need_or = 1;
|
|
}
|
|
|
|
if (prop & UCH_PROP_SPACE) {
|
|
if (need_or) printf (" | ");
|
|
printf ("HAWK_UCH_PROP_SPACE");
|
|
need_or = 1;
|
|
}
|
|
|
|
if (prop & UCH_PROP_PRINT) {
|
|
if (need_or) printf (" | ");
|
|
printf ("HAWK_UCH_PROP_PRINT");
|
|
need_or = 1;
|
|
}
|
|
|
|
if (prop & UCH_PROP_GRAPH) {
|
|
if (need_or) printf (" | ");
|
|
printf ("HAWK_UCH_PROP_GRAPH");
|
|
need_or = 1;
|
|
}
|
|
|
|
if (prop & UCH_PROP_CNTRL) {
|
|
if (need_or) printf (" | ");
|
|
printf ("HAWK_UCH_PROP_CNTRL");
|
|
need_or = 1;
|
|
}
|
|
|
|
if (prop & UCH_PROP_PUNCT) {
|
|
if (need_or) printf (" | ");
|
|
printf ("HAWK_UCH_PROP_PUNCT");
|
|
need_or = 1;
|
|
}
|
|
|
|
if (prop & UCH_PROP_BLANK) {
|
|
if (need_or) printf (" | ");
|
|
printf ("HAWK_UCH_PROP_BLANK");
|
|
need_or = 1;
|
|
}
|
|
|
|
|
|
/*
|
|
if (prop & UCH_PROP_ASCII) {
|
|
if (need_or) printf (" | ");
|
|
printf ("HAWK_UCH_PROP_ASCII");
|
|
need_or = 1;
|
|
}
|
|
|
|
if (prop & UCH_PROP_IDEOG) {
|
|
if (need_or) printf (" | ");
|
|
printf ("HAWK_UCH_PROP_IDEOG");
|
|
need_or = 1;
|
|
}
|
|
|
|
if (prop & UCH_PROP_PHONO) {
|
|
if (need_or) printf (" | ");
|
|
printf ("HAWK_UCH_PROP_PHONO");
|
|
need_or = 1;
|
|
}
|
|
|
|
if (prop & UCH_PROP_ENGLI) {
|
|
if (need_or) printf (" | ");
|
|
printf ("HAWK_UCH_PROP_ENGLI");
|
|
need_or = 1;
|
|
}
|
|
*/
|
|
|
|
}
|
|
|
|
printf ("\n};\n");
|
|
}
|
|
|
|
void emit_prop_map ()
|
|
{
|
|
size_t i;
|
|
|
|
printf ("static hawk_uint16_t* uch_prop_map[%u] =\n{\n", (unsigned int)prop_map_count);
|
|
|
|
for (i = 0; i < prop_map_count; i++) {
|
|
if (i != 0) printf (",\n");
|
|
printf ("\t /* 0x%lX-0x%lX */ ",
|
|
(unsigned long int)(i * UCH_PROP_PAGE_SIZE),
|
|
(unsigned long int)((i + 1) * UCH_PROP_PAGE_SIZE - 1));
|
|
printf ("uch_prop_page_%04X", (int)prop_maps[i]->no);
|
|
}
|
|
|
|
printf ("\n};\n");
|
|
}
|
|
|
|
static void emit_prop_macros (void)
|
|
{
|
|
printf ("/* generated by tools/uni-prop.c */\n\n");
|
|
printf ("#define UCH_PROP_MAX 0x%lX\n", (unsigned long)MAX_CHAR);
|
|
printf ("\n");
|
|
}
|
|
|
|
int main ()
|
|
{
|
|
hawk_uci_t code;
|
|
prop_page_t* page;
|
|
char* locale;
|
|
|
|
locale = setlocale (LC_ALL, "");
|
|
if (locale == NULL ||
|
|
(strstr(locale, ".utf8") == NULL && strstr(locale, ".UTF8") == NULL &&
|
|
strstr(locale, ".utf-8") == NULL && strstr(locale, ".UTF-8") == NULL)) {
|
|
fprintf (stderr, "error: the locale should be utf-8 compatible\n");
|
|
return -1;
|
|
}
|
|
|
|
|
|
for (code = 0; code < MAX_CHAR; code += UCH_PROP_PAGE_SIZE) {
|
|
make_prop_page (code, code + UCH_PROP_PAGE_SIZE - 1);
|
|
}
|
|
|
|
emit_prop_macros ();
|
|
|
|
for (page = prop_pages; page != NULL; page = page->next) {
|
|
emit_prop_page (page);
|
|
printf ("\n");
|
|
}
|
|
|
|
emit_prop_map ();
|
|
|
|
return 0;
|
|
}
|
|
|