added cp949 and cp950.

deleted win32 target files for watcom. too difficult to maintain.
added cmgr for cp949 and cp950.
This commit is contained in:
2012-02-24 09:09:45 +00:00
parent 64fbfed781
commit d7c5e50364
37 changed files with 96662 additions and 2985 deletions

23
qse/tools/gencp.sh Normal file
View File

@ -0,0 +1,23 @@
#
# get the following unicode mapping files
# from unicode.org before executing this script.
# CP932.TXT CP936.TXT CP949.TXT CP950.TXT
#
gencp() {
name="$1"
max_gap="$2"
qseawk -vMAX_GAP="${max_gap}" --extraops=on -f gencp1.awk "`echo $name | tr '[a-z]' '[A-Z]'`.TXT" > "${name}.h" 2>/dev/null
ln -sf "${name}.h" x.h
cc -o testcp testcp.c
qseawk --extraops=on -f gencp0.awk "`echo $name | tr '[a-z]' '[A-Z]'`.TXT" > "${name}.0" 2>/dev/null
./testcp > "${name}.1"
diff -q "${name}.0" "${name}.1" && echo "[$name] OK" || echo "[$name] NOT OK"
}
gencp cp932 64 # ms shift-jis
gencp cp936 96 # ms gbk
gencp cp949 128 # ms euc-kr
gencp cp950 64 # ms big5

54
qse/tools/gencp0.awk Normal file
View File

@ -0,0 +1,54 @@
#global mb_min, mb_max, wc_min, wc_max, mb, wc;
BEGIN {
mb_min = 0xFFFFFFFF;
mb_max = 0;
wc_min = 0xFFFFFFFF;
wc_max = 0;
}
!/^[[:space:]]*#/ {
if (!($1 ~ /^0x/ && $2 ~ /^0x/)) next;
mb = int($1);
wc = int($2);
if (mb < 128)
{
if (mb != wc)
{
print "ERROR: mb != wc where mb < 128. i can't handle this encoding map";
exit 1;
}
next;
}
if (mb < mb_min) mb_min = mb;
if (mb > mb_max) mb_max = mb;
if (wc < wc_min) wc_min = wc;
if (wc > mb_max) wc_max = wc;
# print mb, wc;
#mb_arr[mb] = wc;
#wc_arr[wc] = mb;
if (mb in mb_arr)
printf ("WARNING: 0x%04X already in mb_arr. old value = 0x%04X, this value = 0x%04x\n", mb, mb_arr[mb], wc) > "/dev/stderr";
else
mb_arr[mb] = wc;
if (wc in wc_arr)
printf ("WARNING: 0x%04X already in mb_arr. old value = 0x%04X, this value = 0x%04x\n", wc, wc_arr[wc], mb) > "/dev/stderr";
else
wc_arr[wc] = mb;
}
END {
#for (i = mb_min; i <= mb_max; i++)
for (mb = 0; mb < 0xffff; mb++)
{
#wc = (i in mb_arr)? mb_arr[i]: 0xffff;
if (mb <= 127) wc = mb;
else wc = (mb in mb_arr)? mb_arr[mb]: 0xffff;
printf ("0x%04x 0x%04x\n", mb, wc);
}
}

201
qse/tools/gencp1.awk Normal file
View File

@ -0,0 +1,201 @@
#global mb_min, mb_max, wc_min, wc_max, mb, wc;
BEGIN {
if (ARGC != 2)
{
ERROR_CODE=1
exit 1;
}
mb_min = 0xFFFFFFFF;
mb_max = 0;
wc_min = 0xFFFFFFFF;
wc_max = 0;
if (MAX_GAP <= 0) MAX_GAP=64
}
!/^[[:space:]]*#/ {
if (!($1 ~ /^0x/ && $2 ~ /^0x/)) next;
mb = int($1);
wc = int($2);
if (mb < 128)
{
if (mb != wc)
{
ERROR_CODE = 2;
exit 1;
}
next;
}
if (mb < mb_min) mb_min = mb;
if (mb > mb_max) mb_max = mb;
if (wc < wc_min) wc_min = wc;
if (wc > wc_max) wc_max = wc;
if (mb in mb_arr)
printf ("WARNING: 0x%04X already in mb_arr. old value = 0x%04X, this value = 0x%04x\n", mb, mb_arr[mb], wc) > "/dev/stderr";
else
mb_arr[mb] = wc;
if (wc in wc_arr)
printf ("WARNING: 0x%04X already in wc_arr. old value = 0x%04X, this value = 0x%04x\n", wc, wc_arr[wc], mb) > "/dev/stderr";
else
wc_arr[wc] = mb;
}
function emit_simple (name, min, max, arr) {
printf ("static qse_uint16_t %s_tab[] =\n", name);
printf ("{\n");
for (i = min; i <= max; i++)
{
wc = (i in arr)? arr[i]: 0xffff;
printf ("\t0x%04xu", wc);
if (i < max) printf (",\n");
else printf ("\n");
}
printf ("};\n");
printf ("static qse_uint16_t %s (qse_uint16_t c)\n{\n", name);
#printf ("\tif (c >= 0 && c <= 127) return c;\n");
printf ("\tif (c >= 0x%04xu && c <= 0x%04xu) return %s_tab[c - 0x%04xu];\n", min, max, name, min);
printf ("\treturn 0xffffu;\n");
printf ("};\n");
}
function emit_bsearch (name, min, max, arr) {
prev_in_arr = 0;
prev_no_in_arr = 0;
seg_no = 0;
for (i = min; i <= max; i++)
{
if (i in arr)
{
if (prev_in_arr <= 0)
{
if (prev_not_in_arr > 0 && prev_not_in_arr <= MAX_GAP)
{
# if the segment whole is not large enough
# combine two segments together
for (j = 0; j < prev_not_in_arr; j++)
printf (",\n\t0xffffu");
seg_last[seg_no] = i;
printf (",\n");
}
else
{
if (prev_not_in_arr > 0)
{
printf ("\n}; /* range 0x%x - 0x%x, total %d chars */\n",
seg_first[seg_no], seg_last[seg_no],
seg_last[seg_no] - seg_first[seg_no] + 1);
seg_no++;
}
printf ("static qse_uint16_t %s_seg_%d[] =\n{\n", name, seg_no);
seg_first[seg_no] = i;
seg_last[seg_no] = i;
}
}
else
{
seg_last[seg_no] = i;
printf (",\n");
}
printf ("\t0x%04xu /* 0x%04x */", arr[i], i);
prev_in_arr++;
prev_not_in_arr = 0;
}
else
{
# if (prev_in_arr > 0)
# {
# printf ("\n}; /* range 0x%x - 0x%x, total %d chars */\n",
# seg_first[seg_no], seg_last[seg_no],
# seg_last[seg_no] - seg_first[seg_no] + 1);
# seg_no++;
# }
prev_in_arr = 0;
prev_not_in_arr++;
}
}
if (prev_in_arr > 0)
{
printf ("\n}; /* range 0x%x - 0x%x, total %d chars */\n",
seg_first[seg_no], seg_last[seg_no],
seg_last[seg_no] - seg_first[seg_no] + 1);
}
printf ("static struct %s_range_t\n{\n\tqse_uint16_t first, last;\n\tqse_uint16_t* seg;\n} %s_range[] =\n{\n", name, name);
printf ("\t{ 0x%04xu, 0x%04xu, %s_seg_0 }", seg_first[0], seg_last[0], name);
for (i = 1; i <= seg_no; i++) printf (",\n\t{ 0x%04xu, 0x%04xu, %s_seg_%d }", seg_first[i], seg_last[i], name, i);
printf ("\n};\n");
printf ("static qse_uint16_t %s (qse_uint16_t c)\n{\n", name);
#printf ("\tif (c >= 0 && c <= 127) return c;\n");
printf ("\tif (c >= %s_range[0].first &&\n\t c <= %s_range[QSE_COUNTOF(%s_range)-1].last)\n\t{\n", name, name, name);
printf ("\t\tint left = 0, right = QSE_COUNTOF(%s_range) - 1, mid;
while (left <= right)
{
mid = (left + right) / 2;
if (c >= %s_range[mid].first && c <= %s_range[mid].last)
return %s_range[mid].seg[c - %s_range[mid].first];
else if (c > %s_range[mid].last)
left = mid + 1;
else
right = mid - 1;
}\n", name, name, name, name, name, name);
printf ("\t}\n\treturn 0xffffu;\n");
printf ("}\n");
}
END {
if (ERROR_CODE == 1)
{
print "USAGE: gencp.awk codepage-file" > "/dev/stderr";
exit 1
}
else if (ERROR_CODE == 2)
{
print "ERROR: mb != wc where mb < 128. i can't handle this encoding map" > "/dev/stderr";
exit 1;
}
else
{
"date" | getline date;
printf ("/* This is a privite file automatically generated\n");
printf (" * from %s on %s.\n", ARGV[1], date);
printf (" * Never include this file directly into your source code.\n");
printf (" * mode=%s \n", (SIMPLE_MODE? "simple": "bsearch"));
printf (" * mb_min=0x%04x \n", mb_min);
printf (" * mb_max=0x%04x \n", mb_max);
printf (" * wc_min=0x%04x \n", wc_min);
printf (" * wc_max=0x%04x \n", wc_max);
printf (" */\n\n");
if (SIMPLE_MODE)
{
emit_simple ("mbtowc", mb_min, mb_max, mb_arr);
printf ("\n/* ----------------------------------------- */\n\n");
emit_simple ("wctomb", wc_min, wc_max, wc_arr);
}
else
{
emit_bsearch ("mbtowc", mb_min, mb_max, mb_arr);
printf ("\n/* ----------------------------------------- */\n\n");
emit_bsearch ("wctomb", wc_min, wc_max, wc_arr);
}
}
}

29
qse/tools/testcp.c Normal file
View File

@ -0,0 +1,29 @@
#include <stdio.h>
typedef unsigned short qse_uint16_t;
#define QSE_COUNTOF(x) (sizeof(x) / sizeof(x[0]))
#include "x.h"
int main ()
{
qse_uint16_t mb;
for (mb = 0; mb <= 127; mb++)
{
printf ("0x%04x 0x%04x\n", mb, mb);
}
for (mb = 128; mb < 0xFFFF; mb++)
{
qse_uint16_t wc = mbtowc(mb);
printf ("0x%04x 0x%04x", mb, wc);
if (wc != 0xFFFF)
{
qse_uint16_t xmb = wctomb(wc);
if (xmb != mb) printf (" (ERROR xmb=0x%04x)", xmb);
}
printf ("\n");
}
return 0;
}