added cp949 and cp950.
deleted win32 target files for watcom. too difficult to maintain. added cmgr for cp949 and cp950.
This commit is contained in:
23
qse/tools/gencp.sh
Normal file
23
qse/tools/gencp.sh
Normal file
@ -0,0 +1,23 @@
|
||||
|
||||
#
|
||||
# get the following unicode mapping files
|
||||
# from unicode.org before executing this script.
|
||||
# CP932.TXT CP936.TXT CP949.TXT CP950.TXT
|
||||
#
|
||||
|
||||
gencp() {
|
||||
name="$1"
|
||||
max_gap="$2"
|
||||
|
||||
qseawk -vMAX_GAP="${max_gap}" --extraops=on -f gencp1.awk "`echo $name | tr '[a-z]' '[A-Z]'`.TXT" > "${name}.h" 2>/dev/null
|
||||
ln -sf "${name}.h" x.h
|
||||
cc -o testcp testcp.c
|
||||
qseawk --extraops=on -f gencp0.awk "`echo $name | tr '[a-z]' '[A-Z]'`.TXT" > "${name}.0" 2>/dev/null
|
||||
./testcp > "${name}.1"
|
||||
diff -q "${name}.0" "${name}.1" && echo "[$name] OK" || echo "[$name] NOT OK"
|
||||
}
|
||||
|
||||
gencp cp932 64 # ms shift-jis
|
||||
gencp cp936 96 # ms gbk
|
||||
gencp cp949 128 # ms euc-kr
|
||||
gencp cp950 64 # ms big5
|
54
qse/tools/gencp0.awk
Normal file
54
qse/tools/gencp0.awk
Normal file
@ -0,0 +1,54 @@
|
||||
#global mb_min, mb_max, wc_min, wc_max, mb, wc;
|
||||
|
||||
BEGIN {
|
||||
mb_min = 0xFFFFFFFF;
|
||||
mb_max = 0;
|
||||
wc_min = 0xFFFFFFFF;
|
||||
wc_max = 0;
|
||||
}
|
||||
|
||||
!/^[[:space:]]*#/ {
|
||||
if (!($1 ~ /^0x/ && $2 ~ /^0x/)) next;
|
||||
|
||||
mb = int($1);
|
||||
wc = int($2);
|
||||
|
||||
if (mb < 128)
|
||||
{
|
||||
if (mb != wc)
|
||||
{
|
||||
print "ERROR: mb != wc where mb < 128. i can't handle this encoding map";
|
||||
exit 1;
|
||||
}
|
||||
next;
|
||||
}
|
||||
|
||||
if (mb < mb_min) mb_min = mb;
|
||||
if (mb > mb_max) mb_max = mb;
|
||||
if (wc < wc_min) wc_min = wc;
|
||||
if (wc > mb_max) wc_max = wc;
|
||||
|
||||
# print mb, wc;
|
||||
#mb_arr[mb] = wc;
|
||||
#wc_arr[wc] = mb;
|
||||
if (mb in mb_arr)
|
||||
printf ("WARNING: 0x%04X already in mb_arr. old value = 0x%04X, this value = 0x%04x\n", mb, mb_arr[mb], wc) > "/dev/stderr";
|
||||
else
|
||||
mb_arr[mb] = wc;
|
||||
|
||||
if (wc in wc_arr)
|
||||
printf ("WARNING: 0x%04X already in mb_arr. old value = 0x%04X, this value = 0x%04x\n", wc, wc_arr[wc], mb) > "/dev/stderr";
|
||||
else
|
||||
wc_arr[wc] = mb;
|
||||
}
|
||||
|
||||
END {
|
||||
#for (i = mb_min; i <= mb_max; i++)
|
||||
for (mb = 0; mb < 0xffff; mb++)
|
||||
{
|
||||
#wc = (i in mb_arr)? mb_arr[i]: 0xffff;
|
||||
if (mb <= 127) wc = mb;
|
||||
else wc = (mb in mb_arr)? mb_arr[mb]: 0xffff;
|
||||
printf ("0x%04x 0x%04x\n", mb, wc);
|
||||
}
|
||||
}
|
201
qse/tools/gencp1.awk
Normal file
201
qse/tools/gencp1.awk
Normal file
@ -0,0 +1,201 @@
|
||||
#global mb_min, mb_max, wc_min, wc_max, mb, wc;
|
||||
|
||||
BEGIN {
|
||||
if (ARGC != 2)
|
||||
{
|
||||
ERROR_CODE=1
|
||||
exit 1;
|
||||
}
|
||||
|
||||
mb_min = 0xFFFFFFFF;
|
||||
mb_max = 0;
|
||||
wc_min = 0xFFFFFFFF;
|
||||
wc_max = 0;
|
||||
|
||||
if (MAX_GAP <= 0) MAX_GAP=64
|
||||
}
|
||||
|
||||
!/^[[:space:]]*#/ {
|
||||
if (!($1 ~ /^0x/ && $2 ~ /^0x/)) next;
|
||||
|
||||
mb = int($1);
|
||||
wc = int($2);
|
||||
|
||||
if (mb < 128)
|
||||
{
|
||||
if (mb != wc)
|
||||
{
|
||||
ERROR_CODE = 2;
|
||||
exit 1;
|
||||
}
|
||||
next;
|
||||
}
|
||||
|
||||
if (mb < mb_min) mb_min = mb;
|
||||
if (mb > mb_max) mb_max = mb;
|
||||
if (wc < wc_min) wc_min = wc;
|
||||
if (wc > wc_max) wc_max = wc;
|
||||
|
||||
if (mb in mb_arr)
|
||||
printf ("WARNING: 0x%04X already in mb_arr. old value = 0x%04X, this value = 0x%04x\n", mb, mb_arr[mb], wc) > "/dev/stderr";
|
||||
else
|
||||
mb_arr[mb] = wc;
|
||||
|
||||
if (wc in wc_arr)
|
||||
printf ("WARNING: 0x%04X already in wc_arr. old value = 0x%04X, this value = 0x%04x\n", wc, wc_arr[wc], mb) > "/dev/stderr";
|
||||
else
|
||||
wc_arr[wc] = mb;
|
||||
}
|
||||
|
||||
function emit_simple (name, min, max, arr) {
|
||||
printf ("static qse_uint16_t %s_tab[] =\n", name);
|
||||
printf ("{\n");
|
||||
for (i = min; i <= max; i++)
|
||||
{
|
||||
wc = (i in arr)? arr[i]: 0xffff;
|
||||
|
||||
printf ("\t0x%04xu", wc);
|
||||
if (i < max) printf (",\n");
|
||||
else printf ("\n");
|
||||
}
|
||||
printf ("};\n");
|
||||
|
||||
printf ("static qse_uint16_t %s (qse_uint16_t c)\n{\n", name);
|
||||
#printf ("\tif (c >= 0 && c <= 127) return c;\n");
|
||||
printf ("\tif (c >= 0x%04xu && c <= 0x%04xu) return %s_tab[c - 0x%04xu];\n", min, max, name, min);
|
||||
printf ("\treturn 0xffffu;\n");
|
||||
printf ("};\n");
|
||||
}
|
||||
|
||||
function emit_bsearch (name, min, max, arr) {
|
||||
prev_in_arr = 0;
|
||||
prev_no_in_arr = 0;
|
||||
seg_no = 0;
|
||||
|
||||
for (i = min; i <= max; i++)
|
||||
{
|
||||
if (i in arr)
|
||||
{
|
||||
if (prev_in_arr <= 0)
|
||||
{
|
||||
if (prev_not_in_arr > 0 && prev_not_in_arr <= MAX_GAP)
|
||||
{
|
||||
# if the segment whole is not large enough
|
||||
# combine two segments together
|
||||
for (j = 0; j < prev_not_in_arr; j++)
|
||||
printf (",\n\t0xffffu");
|
||||
seg_last[seg_no] = i;
|
||||
printf (",\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
if (prev_not_in_arr > 0)
|
||||
{
|
||||
printf ("\n}; /* range 0x%x - 0x%x, total %d chars */\n",
|
||||
seg_first[seg_no], seg_last[seg_no],
|
||||
seg_last[seg_no] - seg_first[seg_no] + 1);
|
||||
seg_no++;
|
||||
}
|
||||
|
||||
printf ("static qse_uint16_t %s_seg_%d[] =\n{\n", name, seg_no);
|
||||
seg_first[seg_no] = i;
|
||||
seg_last[seg_no] = i;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
seg_last[seg_no] = i;
|
||||
printf (",\n");
|
||||
}
|
||||
|
||||
printf ("\t0x%04xu /* 0x%04x */", arr[i], i);
|
||||
prev_in_arr++;
|
||||
prev_not_in_arr = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
# if (prev_in_arr > 0)
|
||||
# {
|
||||
# printf ("\n}; /* range 0x%x - 0x%x, total %d chars */\n",
|
||||
# seg_first[seg_no], seg_last[seg_no],
|
||||
# seg_last[seg_no] - seg_first[seg_no] + 1);
|
||||
# seg_no++;
|
||||
# }
|
||||
|
||||
prev_in_arr = 0;
|
||||
prev_not_in_arr++;
|
||||
}
|
||||
}
|
||||
|
||||
if (prev_in_arr > 0)
|
||||
{
|
||||
printf ("\n}; /* range 0x%x - 0x%x, total %d chars */\n",
|
||||
seg_first[seg_no], seg_last[seg_no],
|
||||
seg_last[seg_no] - seg_first[seg_no] + 1);
|
||||
}
|
||||
|
||||
printf ("static struct %s_range_t\n{\n\tqse_uint16_t first, last;\n\tqse_uint16_t* seg;\n} %s_range[] =\n{\n", name, name);
|
||||
printf ("\t{ 0x%04xu, 0x%04xu, %s_seg_0 }", seg_first[0], seg_last[0], name);
|
||||
for (i = 1; i <= seg_no; i++) printf (",\n\t{ 0x%04xu, 0x%04xu, %s_seg_%d }", seg_first[i], seg_last[i], name, i);
|
||||
printf ("\n};\n");
|
||||
|
||||
printf ("static qse_uint16_t %s (qse_uint16_t c)\n{\n", name);
|
||||
|
||||
#printf ("\tif (c >= 0 && c <= 127) return c;\n");
|
||||
printf ("\tif (c >= %s_range[0].first &&\n\t c <= %s_range[QSE_COUNTOF(%s_range)-1].last)\n\t{\n", name, name, name);
|
||||
|
||||
printf ("\t\tint left = 0, right = QSE_COUNTOF(%s_range) - 1, mid;
|
||||
while (left <= right)
|
||||
{
|
||||
mid = (left + right) / 2;
|
||||
if (c >= %s_range[mid].first && c <= %s_range[mid].last)
|
||||
return %s_range[mid].seg[c - %s_range[mid].first];
|
||||
else if (c > %s_range[mid].last)
|
||||
left = mid + 1;
|
||||
else
|
||||
right = mid - 1;
|
||||
}\n", name, name, name, name, name, name);
|
||||
|
||||
printf ("\t}\n\treturn 0xffffu;\n");
|
||||
printf ("}\n");
|
||||
}
|
||||
|
||||
END {
|
||||
|
||||
if (ERROR_CODE == 1)
|
||||
{
|
||||
print "USAGE: gencp.awk codepage-file" > "/dev/stderr";
|
||||
exit 1
|
||||
}
|
||||
else if (ERROR_CODE == 2)
|
||||
{
|
||||
print "ERROR: mb != wc where mb < 128. i can't handle this encoding map" > "/dev/stderr";
|
||||
exit 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
"date" | getline date;
|
||||
printf ("/* This is a privite file automatically generated\n");
|
||||
printf (" * from %s on %s.\n", ARGV[1], date);
|
||||
printf (" * Never include this file directly into your source code.\n");
|
||||
printf (" * mode=%s \n", (SIMPLE_MODE? "simple": "bsearch"));
|
||||
printf (" * mb_min=0x%04x \n", mb_min);
|
||||
printf (" * mb_max=0x%04x \n", mb_max);
|
||||
printf (" * wc_min=0x%04x \n", wc_min);
|
||||
printf (" * wc_max=0x%04x \n", wc_max);
|
||||
printf (" */\n\n");
|
||||
|
||||
if (SIMPLE_MODE)
|
||||
{
|
||||
emit_simple ("mbtowc", mb_min, mb_max, mb_arr);
|
||||
printf ("\n/* ----------------------------------------- */\n\n");
|
||||
emit_simple ("wctomb", wc_min, wc_max, wc_arr);
|
||||
}
|
||||
else
|
||||
{
|
||||
emit_bsearch ("mbtowc", mb_min, mb_max, mb_arr);
|
||||
printf ("\n/* ----------------------------------------- */\n\n");
|
||||
emit_bsearch ("wctomb", wc_min, wc_max, wc_arr);
|
||||
}
|
||||
}
|
||||
}
|
29
qse/tools/testcp.c
Normal file
29
qse/tools/testcp.c
Normal file
@ -0,0 +1,29 @@
|
||||
#include <stdio.h>
|
||||
|
||||
typedef unsigned short qse_uint16_t;
|
||||
#define QSE_COUNTOF(x) (sizeof(x) / sizeof(x[0]))
|
||||
|
||||
#include "x.h"
|
||||
|
||||
int main ()
|
||||
{
|
||||
qse_uint16_t mb;
|
||||
for (mb = 0; mb <= 127; mb++)
|
||||
{
|
||||
printf ("0x%04x 0x%04x\n", mb, mb);
|
||||
}
|
||||
for (mb = 128; mb < 0xFFFF; mb++)
|
||||
{
|
||||
qse_uint16_t wc = mbtowc(mb);
|
||||
printf ("0x%04x 0x%04x", mb, wc);
|
||||
if (wc != 0xFFFF)
|
||||
{
|
||||
qse_uint16_t xmb = wctomb(wc);
|
||||
if (xmb != mb) printf (" (ERROR xmb=0x%04x)", xmb);
|
||||
|
||||
}
|
||||
printf ("\n");
|
||||
|
||||
}
|
||||
return 0;
|
||||
}
|
Reference in New Issue
Block a user