added cp949 and cp950.
deleted win32 target files for watcom. too difficult to maintain. added cmgr for cp949 and cp950.
This commit is contained in:
		
							
								
								
									
										23
									
								
								qse/tools/gencp.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								qse/tools/gencp.sh
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,23 @@ | ||||
|  | ||||
| # | ||||
| # get the following unicode mapping files  | ||||
| # from unicode.org before executing this script. | ||||
| #   CP932.TXT CP936.TXT CP949.TXT CP950.TXT | ||||
| # | ||||
|  | ||||
| gencp() { | ||||
| 	name="$1" | ||||
| 	max_gap="$2" | ||||
|  | ||||
| 	qseawk -vMAX_GAP="${max_gap}"  --extraops=on -f gencp1.awk "`echo $name | tr '[a-z]' '[A-Z]'`.TXT" > "${name}.h" 2>/dev/null | ||||
| 	ln -sf "${name}.h" x.h | ||||
| 	cc -o testcp testcp.c | ||||
| 	qseawk --extraops=on -f gencp0.awk "`echo $name | tr '[a-z]' '[A-Z]'`.TXT" > "${name}.0" 2>/dev/null | ||||
| 	./testcp > "${name}.1"  | ||||
| 	diff -q "${name}.0" "${name}.1" && echo "[$name] OK" || echo "[$name] NOT OK" | ||||
| } | ||||
|  | ||||
| gencp cp932 64  # ms shift-jis | ||||
| gencp cp936 96  # ms gbk | ||||
| gencp cp949 128 # ms euc-kr | ||||
| gencp cp950 64  # ms big5 | ||||
							
								
								
									
										54
									
								
								qse/tools/gencp0.awk
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								qse/tools/gencp0.awk
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,54 @@ | ||||
| #global mb_min, mb_max, wc_min, wc_max, mb, wc; | ||||
|  | ||||
| BEGIN { | ||||
| 	mb_min = 0xFFFFFFFF; | ||||
| 	mb_max = 0; | ||||
| 	wc_min = 0xFFFFFFFF; | ||||
| 	wc_max = 0; | ||||
| } | ||||
|  | ||||
| !/^[[:space:]]*#/ { | ||||
| 	if (!($1 ~ /^0x/ && $2 ~ /^0x/)) next; | ||||
|  | ||||
| 	mb = int($1); | ||||
| 	wc = int($2); | ||||
|  | ||||
| 	if (mb < 128)  | ||||
| 	{ | ||||
| 		if (mb != wc) | ||||
| 		{ | ||||
| 			print "ERROR: mb != wc where mb < 128. i can't handle this encoding map"; | ||||
| 			exit 1; | ||||
| 		} | ||||
| 		next; | ||||
| 	} | ||||
|  | ||||
| 	if (mb < mb_min) mb_min = mb; | ||||
| 	if (mb > mb_max) mb_max = mb; | ||||
| 	if (wc < wc_min) wc_min = wc; | ||||
| 	if (wc > mb_max) wc_max = wc; | ||||
|  | ||||
| #	print mb, wc; | ||||
| 	#mb_arr[mb] = wc; | ||||
| 	#wc_arr[wc] = mb; | ||||
| 	if (mb in mb_arr) | ||||
| 		printf ("WARNING: 0x%04X already in mb_arr. old value = 0x%04X, this value = 0x%04x\n", mb, mb_arr[mb], wc) >  "/dev/stderr"; | ||||
| 	else | ||||
| 		mb_arr[mb] = wc; | ||||
|  | ||||
| 	if (wc in wc_arr) | ||||
| 		printf ("WARNING: 0x%04X already in mb_arr. old value = 0x%04X, this value = 0x%04x\n", wc, wc_arr[wc], mb) > "/dev/stderr"; | ||||
| 	else | ||||
| 		wc_arr[wc] = mb; | ||||
| } | ||||
|  | ||||
| END { | ||||
| 	#for (i = mb_min; i <= mb_max; i++) | ||||
| 	for (mb = 0; mb < 0xffff; mb++) | ||||
| 	{ | ||||
| 		#wc = (i in mb_arr)? mb_arr[i]: 0xffff; | ||||
| 		if (mb <= 127) wc = mb; | ||||
| 		else wc = (mb in mb_arr)? mb_arr[mb]: 0xffff; | ||||
| 		printf ("0x%04x 0x%04x\n", mb, wc); | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										201
									
								
								qse/tools/gencp1.awk
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										201
									
								
								qse/tools/gencp1.awk
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,201 @@ | ||||
| #global mb_min, mb_max, wc_min, wc_max, mb, wc; | ||||
|  | ||||
| BEGIN { | ||||
| 	if (ARGC != 2) | ||||
| 	{ | ||||
| 		ERROR_CODE=1 | ||||
| 		exit 1;  | ||||
| 	} | ||||
|  | ||||
| 	mb_min = 0xFFFFFFFF; | ||||
| 	mb_max = 0; | ||||
| 	wc_min = 0xFFFFFFFF; | ||||
| 	wc_max = 0; | ||||
|  | ||||
| 	if (MAX_GAP <= 0) MAX_GAP=64 | ||||
| } | ||||
|  | ||||
| !/^[[:space:]]*#/ { | ||||
| 	if (!($1 ~ /^0x/ && $2 ~ /^0x/)) next; | ||||
|  | ||||
| 	mb = int($1); | ||||
| 	wc = int($2); | ||||
|  | ||||
| 	if (mb < 128)  | ||||
| 	{ | ||||
| 		if (mb != wc) | ||||
| 		{ | ||||
| 			ERROR_CODE = 2; | ||||
| 			exit 1; | ||||
| 		} | ||||
| 		next; | ||||
| 	} | ||||
|  | ||||
| 	if (mb < mb_min) mb_min = mb; | ||||
| 	if (mb > mb_max) mb_max = mb; | ||||
| 	if (wc < wc_min) wc_min = wc; | ||||
| 	if (wc > wc_max) wc_max = wc; | ||||
|  | ||||
| 	if (mb in mb_arr) | ||||
| 		printf ("WARNING: 0x%04X already in mb_arr. old value = 0x%04X, this value = 0x%04x\n", mb, mb_arr[mb], wc) >  "/dev/stderr"; | ||||
| 	else | ||||
| 		mb_arr[mb] = wc; | ||||
| 		 | ||||
| 	if (wc in wc_arr) | ||||
| 		printf ("WARNING: 0x%04X already in wc_arr. old value = 0x%04X, this value = 0x%04x\n", wc, wc_arr[wc], mb) > "/dev/stderr"; | ||||
| 	else | ||||
| 		wc_arr[wc] = mb; | ||||
| } | ||||
|  | ||||
| function emit_simple (name, min, max, arr) { | ||||
| 	printf ("static qse_uint16_t %s_tab[] =\n", name); | ||||
| 	printf ("{\n"); | ||||
| 	for (i = min; i <= max; i++) | ||||
| 	{ | ||||
| 		wc = (i in arr)? arr[i]: 0xffff; | ||||
|  | ||||
| 		printf ("\t0x%04xu", wc); | ||||
| 		if (i < max) printf (",\n"); | ||||
| 		else printf ("\n"); | ||||
| 	} | ||||
| 	printf ("};\n"); | ||||
|  | ||||
| 	printf ("static qse_uint16_t %s (qse_uint16_t c)\n{\n", name); | ||||
| 	#printf ("\tif (c >= 0 && c <= 127) return c;\n"); | ||||
| 	printf ("\tif (c >= 0x%04xu && c <= 0x%04xu) return %s_tab[c - 0x%04xu];\n", min, max, name, min); | ||||
| 	printf ("\treturn 0xffffu;\n"); | ||||
| 	printf ("};\n"); | ||||
| } | ||||
|  | ||||
| function emit_bsearch (name, min, max, arr) { | ||||
| 	prev_in_arr = 0; | ||||
| 	prev_no_in_arr = 0; | ||||
| 	seg_no = 0; | ||||
|  | ||||
| 	for (i = min; i <= max; i++) | ||||
| 	{ | ||||
| 		if (i in arr) | ||||
| 		{ | ||||
| 			if (prev_in_arr <= 0) | ||||
| 			{ | ||||
| 				if (prev_not_in_arr > 0 && prev_not_in_arr <= MAX_GAP) | ||||
| 				{ | ||||
| 					# if the segment whole is not large enough | ||||
| 					# combine two segments together | ||||
| 					for (j = 0; j < prev_not_in_arr; j++)  | ||||
| 						printf (",\n\t0xffffu"); | ||||
| 					seg_last[seg_no] = i; | ||||
| 					printf (",\n"); | ||||
| 				} | ||||
| 				else  | ||||
| 				{ | ||||
| 					if (prev_not_in_arr > 0) | ||||
| 					{ | ||||
| 						printf ("\n}; /* range 0x%x - 0x%x, total %d chars */\n",  | ||||
| 							seg_first[seg_no], seg_last[seg_no],  | ||||
| 							seg_last[seg_no] - seg_first[seg_no] + 1); | ||||
| 						seg_no++; | ||||
| 					} | ||||
|  | ||||
| 					printf ("static qse_uint16_t %s_seg_%d[] =\n{\n", name, seg_no); | ||||
| 					seg_first[seg_no] = i; | ||||
| 					seg_last[seg_no] = i; | ||||
| 				} | ||||
| 			} | ||||
| 			else | ||||
| 			{ | ||||
| 				seg_last[seg_no] = i; | ||||
| 				printf (",\n"); | ||||
| 			} | ||||
|  | ||||
| 			printf ("\t0x%04xu /* 0x%04x */", arr[i], i); | ||||
| 			prev_in_arr++; | ||||
| 			prev_not_in_arr = 0; | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 		#	if (prev_in_arr > 0)  | ||||
| 		#	{ | ||||
| 		#		printf ("\n}; /* range 0x%x - 0x%x, total %d chars */\n",  | ||||
| 		#			seg_first[seg_no], seg_last[seg_no],  | ||||
| 		#			seg_last[seg_no] - seg_first[seg_no] + 1); | ||||
| 		#		seg_no++; | ||||
| 		#	} | ||||
|  | ||||
| 			prev_in_arr = 0; | ||||
| 			prev_not_in_arr++; | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if (prev_in_arr > 0)  | ||||
| 	{ | ||||
| 		printf ("\n}; /* range 0x%x - 0x%x, total %d chars */\n",  | ||||
| 			seg_first[seg_no], seg_last[seg_no],  | ||||
| 			seg_last[seg_no] - seg_first[seg_no] + 1); | ||||
| 	} | ||||
|  | ||||
| 	printf ("static struct %s_range_t\n{\n\tqse_uint16_t first, last;\n\tqse_uint16_t* seg;\n} %s_range[] =\n{\n", name, name);  | ||||
| 	printf ("\t{ 0x%04xu, 0x%04xu, %s_seg_0 }", seg_first[0], seg_last[0], name);  | ||||
| 	for (i = 1; i <= seg_no; i++) printf (",\n\t{ 0x%04xu, 0x%04xu, %s_seg_%d }", seg_first[i], seg_last[i], name, i);  | ||||
| 	printf ("\n};\n");  | ||||
|  | ||||
| 	printf ("static qse_uint16_t %s (qse_uint16_t c)\n{\n", name); | ||||
|  | ||||
| 	#printf ("\tif (c >= 0 && c <= 127) return c;\n"); | ||||
| 	printf ("\tif (c >= %s_range[0].first &&\n\t    c <= %s_range[QSE_COUNTOF(%s_range)-1].last)\n\t{\n", name, name, name); | ||||
|  | ||||
| 	printf ("\t\tint left = 0, right = QSE_COUNTOF(%s_range) - 1, mid; | ||||
| 		while (left <= right) | ||||
| 		{ | ||||
| 			mid = (left + right) / 2; | ||||
| 			if (c >= %s_range[mid].first && c <= %s_range[mid].last)  | ||||
| 				return %s_range[mid].seg[c - %s_range[mid].first]; | ||||
| 			else if (c > %s_range[mid].last)  | ||||
| 				left = mid + 1;  | ||||
| 			else | ||||
| 				right = mid - 1; | ||||
| 		}\n", name, name, name, name, name, name); | ||||
|  | ||||
| 	printf ("\t}\n\treturn 0xffffu;\n"); | ||||
| 	printf ("}\n"); | ||||
| } | ||||
|  | ||||
| END { | ||||
|  | ||||
| 	if (ERROR_CODE == 1) | ||||
| 	{ | ||||
| 		print "USAGE: gencp.awk codepage-file" > "/dev/stderr"; | ||||
| 		exit 1 | ||||
| 	} | ||||
| 	else if (ERROR_CODE == 2) | ||||
| 	{ | ||||
| 		print "ERROR: mb != wc where mb < 128. i can't handle this encoding map" > "/dev/stderr"; | ||||
| 		exit 1; | ||||
| 	} | ||||
| 	else | ||||
| 	{ | ||||
| 		"date" | getline date; | ||||
| 		printf ("/* This is a privite file automatically generated\n"); | ||||
| 		printf (" * from %s on %s.\n", ARGV[1], date); | ||||
| 		printf (" * Never include this file directly into your source code.\n"); | ||||
| 		printf (" *   mode=%s \n", (SIMPLE_MODE? "simple": "bsearch")); | ||||
| 		printf (" *   mb_min=0x%04x \n", mb_min); | ||||
| 		printf (" *   mb_max=0x%04x \n", mb_max); | ||||
| 		printf (" *   wc_min=0x%04x \n", wc_min); | ||||
| 		printf (" *   wc_max=0x%04x \n", wc_max); | ||||
| 		printf (" */\n\n"); | ||||
|  | ||||
| 		if (SIMPLE_MODE) | ||||
| 		{ | ||||
| 			emit_simple ("mbtowc", mb_min, mb_max, mb_arr); | ||||
| 			printf ("\n/* ----------------------------------------- */\n\n"); | ||||
| 			emit_simple ("wctomb", wc_min, wc_max, wc_arr); | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			emit_bsearch ("mbtowc", mb_min, mb_max, mb_arr); | ||||
| 			printf ("\n/* ----------------------------------------- */\n\n"); | ||||
| 			emit_bsearch ("wctomb", wc_min, wc_max, wc_arr); | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										29
									
								
								qse/tools/testcp.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								qse/tools/testcp.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,29 @@ | ||||
| #include <stdio.h> | ||||
|  | ||||
| typedef unsigned short qse_uint16_t; | ||||
| #define QSE_COUNTOF(x) (sizeof(x) / sizeof(x[0])) | ||||
|  | ||||
| #include "x.h" | ||||
|  | ||||
| int main () | ||||
| { | ||||
| 	qse_uint16_t mb; | ||||
| 	for (mb = 0; mb <= 127; mb++) | ||||
| 	{ | ||||
| 		printf ("0x%04x 0x%04x\n", mb, mb); | ||||
| 	} | ||||
| 	for (mb = 128; mb < 0xFFFF; mb++) | ||||
| 	{ | ||||
| 		qse_uint16_t wc = mbtowc(mb); | ||||
| 		printf ("0x%04x 0x%04x", mb, wc); | ||||
| 		if (wc != 0xFFFF) | ||||
| 		{ | ||||
| 			qse_uint16_t xmb = wctomb(wc); | ||||
| 			if (xmb != mb) printf (" (ERROR xmb=0x%04x)", xmb); | ||||
| 			 | ||||
| 		} | ||||
| 		printf ("\n"); | ||||
| 		 | ||||
| 	} | ||||
| 	return 0; | ||||
| } | ||||
		Reference in New Issue
	
	Block a user