283 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			283 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
|  | #include <hawk-cmn.h>
 | ||
|  | #include <locale.h>
 | ||
|  | #include <wchar.h>
 | ||
|  | #include <wctype.h>
 | ||
|  | #include <stdio.h>
 | ||
|  | #include <stddef.h>
 | ||
|  | #include <stdlib.h>
 | ||
|  | #include <string.h>
 | ||
|  | 
 | ||
|  | #if HAWK_SIZEOF_UCH_T == HAWK_SIZEOF_INT16_T
 | ||
|  | 	#define MAX_CHAR 0xFFFF
 | ||
|  | #else
 | ||
|  | 	/*#define MAX_CHAR 0xE01EF*/ | ||
|  | 	#define MAX_CHAR 0x10FFFF
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | #define UCH_PROP_PAGE_SIZE 256
 | ||
|  | #define MAX_UCH_PROP_PAGE_COUNT ((MAX_CHAR + UCH_PROP_PAGE_SIZE) / UCH_PROP_PAGE_SIZE)
 | ||
|  | 
 | ||
|  | typedef struct prop_page_t prop_page_t; | ||
|  | struct prop_page_t | ||
|  | { | ||
|  | 	size_t no; | ||
|  | 	hawk_uint16_t props[UCH_PROP_PAGE_SIZE]; | ||
|  | 	prop_page_t* next; | ||
|  | }; | ||
|  | 
 | ||
|  | size_t prop_page_count = 0; | ||
|  | prop_page_t* prop_pages = NULL; | ||
|  | 
 | ||
|  | size_t prop_map_count = 0; | ||
|  | prop_page_t* prop_maps[MAX_UCH_PROP_PAGE_COUNT]; | ||
|  | 
 | ||
|  | enum | ||
|  | { | ||
|  | 	UCH_PROP_UPPER  = (1 << 0), | ||
|  | 	UCH_PROP_LOWER  = (1 << 1), | ||
|  | 	UCH_PROP_ALPHA  = (1 << 2), | ||
|  | 	UCH_PROP_DIGIT  = (1 << 3), | ||
|  | 	UCH_PROP_XDIGIT = (1 << 4), | ||
|  | 	UCH_PROP_ALNUM  = (1 << 5), | ||
|  | 	UCH_PROP_SPACE  = (1 << 6), | ||
|  | 	UCH_PROP_PRINT  = (1 << 8), | ||
|  | 	UCH_PROP_GRAPH  = (1 << 9), | ||
|  | 	UCH_PROP_CNTRL  = (1 << 10), | ||
|  | 	UCH_PROP_PUNCT  = (1 << 11), | ||
|  | 	UCH_PROP_BLANK  = (1 << 12) | ||
|  | }; | ||
|  | 
 | ||
|  | int get_prop (hawk_uci_t code) | ||
|  | { | ||
|  | 	int prop = 0; | ||
|  | 
 | ||
|  | 	if (iswupper(code))    prop |= UCH_PROP_UPPER; | ||
|  | 	if (iswlower(code))    prop |= UCH_PROP_LOWER; | ||
|  | 	if (iswalpha(code))    prop |= UCH_PROP_ALPHA; | ||
|  | 	if (iswdigit(code))    prop |= UCH_PROP_DIGIT; | ||
|  | 	if (iswxdigit(code))   prop |= UCH_PROP_XDIGIT; | ||
|  | 	if (iswalnum(code))    prop |= UCH_PROP_ALNUM; | ||
|  | 	if (iswspace(code))    prop |= UCH_PROP_SPACE; | ||
|  | 	if (iswprint(code))    prop |= UCH_PROP_PRINT; | ||
|  | 	if (iswgraph(code))    prop |= UCH_PROP_GRAPH; | ||
|  | 	if (iswcntrl(code))    prop |= UCH_PROP_CNTRL; | ||
|  | 	if (iswpunct(code))    prop |= UCH_PROP_PUNCT; | ||
|  | 	if (iswblank(code))    prop |= UCH_PROP_BLANK; | ||
|  | 	/*
 | ||
|  | 	if (iswascii(code))    prop |= UCH_PROP_ASCII; | ||
|  | 	if (isphonogram(code)) prop |= UCH_PROP_PHONO; | ||
|  | 	if (isideogram(code))  prop |= UCH_PROP_IDEOG; | ||
|  | 	if (isenglish(code))   prop |= UCH_PROP_ENGLI; | ||
|  | 	*/ | ||
|  | 
 | ||
|  | 	return prop; | ||
|  | } | ||
|  | 
 | ||
|  | void make_prop_page (hawk_uci_t start, hawk_uci_t end) | ||
|  | { | ||
|  | 	hawk_uci_t code; | ||
|  | 	hawk_uint16_t props[UCH_PROP_PAGE_SIZE]; | ||
|  | 	prop_page_t* page; | ||
|  | 
 | ||
|  | 	memset (props, 0, sizeof(props)); | ||
|  | 	for (code = start; code <= end; code++) { | ||
|  | 		props[code - start] = get_prop(code); | ||
|  | 	} | ||
|  | 
 | ||
|  | 	for (page = prop_pages; page != NULL; page = page->next) { | ||
|  | 		if (memcmp (props, page->props, sizeof(props)) == 0) { | ||
|  | 			prop_maps[prop_map_count++] = page; | ||
|  | 			return; | ||
|  | 		} | ||
|  | 	} | ||
|  | 
 | ||
|  | 	page = (prop_page_t*)malloc (sizeof(prop_page_t)); | ||
|  | 	page->no = prop_page_count++; | ||
|  | 	memcpy (page->props, props, sizeof(props)); | ||
|  | 	page->next = prop_pages; | ||
|  | 
 | ||
|  | 	prop_pages = page; | ||
|  | 	prop_maps[prop_map_count++] = page; | ||
|  | } | ||
|  | 
 | ||
|  | void emit_prop_page (prop_page_t* page) | ||
|  | { | ||
|  | 	size_t i; | ||
|  | 	int prop, need_or; | ||
|  | 
 | ||
|  | 	printf ("static hawk_uint16_t uch_prop_page_%04X[%u] =\n{\n",  | ||
|  | 		(unsigned int)page->no, (unsigned int)UCH_PROP_PAGE_SIZE); | ||
|  | 
 | ||
|  | 	for (i = 0; i < UCH_PROP_PAGE_SIZE; i++) { | ||
|  | 
 | ||
|  | 		need_or = 0; | ||
|  | 		prop = page->props[i]; | ||
|  | 
 | ||
|  | 		if (i != 0) printf (",\n"); | ||
|  | 		printf ("\t"); | ||
|  | 
 | ||
|  | 		if (prop == 0) { | ||
|  | 			printf ("0"); | ||
|  | 			continue; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if (prop & UCH_PROP_UPPER) { | ||
|  | 			if (need_or) printf (" | "); | ||
|  | 			printf ("HAWK_UCH_PROP_UPPER"); | ||
|  | 			need_or = 1; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if (prop & UCH_PROP_LOWER) { | ||
|  | 			if (need_or) printf (" | "); | ||
|  | 			printf ("HAWK_UCH_PROP_LOWER"); | ||
|  | 			need_or = 1; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if (prop & UCH_PROP_ALPHA) { | ||
|  | 			if (need_or) printf (" | "); | ||
|  | 			printf ("HAWK_UCH_PROP_ALPHA"); | ||
|  | 			need_or = 1; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if (prop & UCH_PROP_DIGIT) { | ||
|  | 			if (need_or) printf (" | "); | ||
|  | 			printf ("HAWK_UCH_PROP_DIGIT"); | ||
|  | 			need_or = 1; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if (prop & UCH_PROP_XDIGIT) { | ||
|  | 			if (need_or) printf (" | "); | ||
|  | 			printf ("HAWK_UCH_PROP_XDIGIT"); | ||
|  | 			need_or = 1; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if (prop & UCH_PROP_ALNUM) { | ||
|  | 			if (need_or) printf (" | "); | ||
|  | 			printf ("HAWK_UCH_PROP_ALNUM"); | ||
|  | 			need_or = 1; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if (prop & UCH_PROP_SPACE) { | ||
|  | 			if (need_or) printf (" | "); | ||
|  | 			printf ("HAWK_UCH_PROP_SPACE"); | ||
|  | 			need_or = 1; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if (prop & UCH_PROP_PRINT) { | ||
|  | 			if (need_or) printf (" | "); | ||
|  | 			printf ("HAWK_UCH_PROP_PRINT"); | ||
|  | 			need_or = 1; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if (prop & UCH_PROP_GRAPH) { | ||
|  | 			if (need_or) printf (" | "); | ||
|  | 			printf ("HAWK_UCH_PROP_GRAPH"); | ||
|  | 			need_or = 1; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if (prop & UCH_PROP_CNTRL) { | ||
|  | 			if (need_or) printf (" | "); | ||
|  | 			printf ("HAWK_UCH_PROP_CNTRL"); | ||
|  | 			need_or = 1; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if (prop & UCH_PROP_PUNCT) { | ||
|  | 			if (need_or) printf (" | "); | ||
|  | 			printf ("HAWK_UCH_PROP_PUNCT"); | ||
|  | 			need_or = 1; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if (prop & UCH_PROP_BLANK) { | ||
|  | 			if (need_or) printf (" | "); | ||
|  | 			printf ("HAWK_UCH_PROP_BLANK"); | ||
|  | 			need_or = 1; | ||
|  | 		} | ||
|  | 
 | ||
|  | 
 | ||
|  | 		/*
 | ||
|  | 		if (prop & UCH_PROP_ASCII) { | ||
|  | 			if (need_or) printf (" | "); | ||
|  | 			printf ("HAWK_UCH_PROP_ASCII"); | ||
|  | 			need_or = 1; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if (prop & UCH_PROP_IDEOG) { | ||
|  | 			if (need_or) printf (" | "); | ||
|  | 			printf ("HAWK_UCH_PROP_IDEOG"); | ||
|  | 			need_or = 1; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if (prop & UCH_PROP_PHONO) { | ||
|  | 			if (need_or) printf (" | "); | ||
|  | 			printf ("HAWK_UCH_PROP_PHONO"); | ||
|  | 			need_or = 1; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		if (prop & UCH_PROP_ENGLI) { | ||
|  | 			if (need_or) printf (" | "); | ||
|  | 			printf ("HAWK_UCH_PROP_ENGLI"); | ||
|  | 			need_or = 1; | ||
|  | 		} | ||
|  | 		*/ | ||
|  | 
 | ||
|  | 	} | ||
|  | 
 | ||
|  | 	printf ("\n};\n"); | ||
|  | } | ||
|  | 
 | ||
|  | void emit_prop_map () | ||
|  | { | ||
|  | 	size_t i; | ||
|  | 
 | ||
|  | 	printf ("static hawk_uint16_t* uch_prop_map[%u] =\n{\n", (unsigned int)prop_map_count); | ||
|  | 
 | ||
|  | 	for (i = 0; i < prop_map_count; i++) { | ||
|  | 		if (i != 0) printf (",\n"); | ||
|  | 		printf ("\t /* 0x%lX-0x%lX */ ",  | ||
|  | 			(unsigned long int)(i * UCH_PROP_PAGE_SIZE), | ||
|  | 			(unsigned long int)((i + 1) * UCH_PROP_PAGE_SIZE - 1)); | ||
|  | 		printf ("uch_prop_page_%04X", (int)prop_maps[i]->no); | ||
|  | 	} | ||
|  | 
 | ||
|  | 	printf ("\n};\n"); | ||
|  | } | ||
|  | 
 | ||
|  | static void emit_prop_macros (void) | ||
|  | { | ||
|  | 	printf ("/* generated by tools/uni-prop.c */\n\n"); | ||
|  | 	printf ("#define UCH_PROP_MAX 0x%lX\n", (unsigned long)MAX_CHAR); | ||
|  | 	printf ("\n"); | ||
|  | } | ||
|  | 
 | ||
|  | int main () | ||
|  | { | ||
|  | 	hawk_uci_t code; | ||
|  | 	prop_page_t* page; | ||
|  | 	char* locale; | ||
|  | 
 | ||
|  | 	locale = setlocale (LC_ALL, ""); | ||
|  | 	if (locale == NULL || | ||
|  | 	    (strstr(locale, ".utf8") == NULL && strstr(locale, ".UTF8") == NULL && | ||
|  | 	     strstr(locale, ".utf-8") == NULL && strstr(locale, ".UTF-8") == NULL)) { | ||
|  | 		fprintf (stderr, "error: the locale should be utf-8 compatible\n"); | ||
|  | 		return -1; | ||
|  | 	} | ||
|  | 
 | ||
|  | 
 | ||
|  | 	for (code = 0; code < MAX_CHAR; code += UCH_PROP_PAGE_SIZE) { | ||
|  | 		make_prop_page (code, code + UCH_PROP_PAGE_SIZE - 1); | ||
|  | 	} | ||
|  | 
 | ||
|  | 	emit_prop_macros (); | ||
|  | 
 | ||
|  | 	for (page = prop_pages; page != NULL; page = page->next) { | ||
|  | 		emit_prop_page (page);	 | ||
|  | 		printf ("\n"); | ||
|  | 	} | ||
|  | 
 | ||
|  | 	emit_prop_map (); | ||
|  | 
 | ||
|  | 	return 0; | ||
|  | } | ||
|  | 
 |