qse/qse/tools/gencp1.awk

203 lines
4.9 KiB
Awk

#global mb_min, mb_max, wc_min, wc_max, mb, wc;
BEGIN {
if (ARGC != 2)
{
ERROR_CODE=1
exit 1;
}
mb_min = 0xFFFFFFFF;
mb_max = 0;
wc_min = 0xFFFFFFFF;
wc_max = 0;
if (MAX_GAP <= 0) MAX_GAP=64
}
!/^[[:space:]]*#/ {
if (!($1 ~ /^0x/ && $2 ~ /^0x/)) next;
mb = int($1);
wc = int($2);
if (mb < 128)
{
if (mb != wc)
{
ERROR_CODE = 2;
exit 1;
}
next;
}
if (mb < mb_min) mb_min = mb;
if (mb > mb_max) mb_max = mb;
if (wc < wc_min) wc_min = wc;
if (wc > wc_max) wc_max = wc;
if (mb in mb_arr)
printf ("WARNING: 0x%04X already in mb_arr. old value = 0x%04X, this value = 0x%04x\n", mb, mb_arr[mb], wc) > "/dev/stderr";
else
mb_arr[mb] = wc;
if (wc in wc_arr)
printf ("WARNING: 0x%04X already in wc_arr. old value = 0x%04X, this value = 0x%04x\n", wc, wc_arr[wc], mb) > "/dev/stderr";
else
wc_arr[wc] = mb;
}
function emit_simple (name, min, max, arr) {
printf ("static qse_uint16_t %s_tab[] =\n", name);
printf ("{\n");
for (i = min; i <= max; i++)
{
wc = (i in arr)? arr[i]: 0xffff;
printf ("\t0x%04xu", wc);
if (i < max) printf (",\n");
else printf ("\n");
}
printf ("};\n");
printf ("static qse_uint16_t %s (qse_uint16_t c)\n{\n", name);
#printf ("\tif (c >= 0 && c <= 127) return c;\n");
printf ("\tif (c >= 0x%04xu && c <= 0x%04xu) return %s_tab[c - 0x%04xu];\n", min, max, name, min);
printf ("\treturn 0xffffu;\n");
printf ("};\n");
}
function emit_bsearch (name, min, max, arr) {
prev_in_arr = 0;
prev_no_in_arr = 0;
seg_no = 0;
for (i = min; i <= max; i++)
{
if (i in arr)
{
if (prev_in_arr <= 0)
{
if (prev_not_in_arr > 0 && prev_not_in_arr <= MAX_GAP)
{
# if the segment whole is not large enough
# combine two segments together
for (j = 0; j < prev_not_in_arr; j++)
printf (",\n\t0xffffu");
seg_last[seg_no] = i;
printf (",\n");
}
else
{
if (prev_not_in_arr > 0)
{
printf ("\n}; /* range 0x%x - 0x%x, total %d chars */\n",
seg_first[seg_no], seg_last[seg_no],
seg_last[seg_no] - seg_first[seg_no] + 1);
seg_no++;
}
printf ("static qse_uint16_t %s_seg_%d[] =\n{\n", name, seg_no);
seg_first[seg_no] = i;
seg_last[seg_no] = i;
}
}
else
{
seg_last[seg_no] = i;
printf (",\n");
}
printf ("\t0x%04xu /* 0x%04x */", arr[i], i);
prev_in_arr++;
prev_not_in_arr = 0;
}
else
{
# if (prev_in_arr > 0)
# {
# printf ("\n}; /* range 0x%x - 0x%x, total %d chars */\n",
# seg_first[seg_no], seg_last[seg_no],
# seg_last[seg_no] - seg_first[seg_no] + 1);
# seg_no++;
# }
prev_in_arr = 0;
prev_not_in_arr++;
}
}
if (prev_in_arr > 0)
{
printf ("\n}; /* range 0x%x - 0x%x, total %d chars */\n",
seg_first[seg_no], seg_last[seg_no],
seg_last[seg_no] - seg_first[seg_no] + 1);
}
printf ("static struct %s_range_t\n{\n\tqse_uint16_t first, last;\n\tqse_uint16_t* seg;\n} %s_range[] =\n{\n", name, name);
printf ("\t{ 0x%04xu, 0x%04xu, %s_seg_0 }", seg_first[0], seg_last[0], name);
for (i = 1; i <= seg_no; i++) printf (",\n\t{ 0x%04xu, 0x%04xu, %s_seg_%d }", seg_first[i], seg_last[i], name, i);
printf ("\n};\n");
printf ("static qse_uint16_t %s (qse_uint16_t c)\n{\n", name);
#printf ("\tif (c >= 0 && c <= 127) return c;\n");
printf ("\tif (c >= %s_range[0].first &&\n\t c <= %s_range[QSE_COUNTOF(%s_range)-1].last)\n\t{\n", name, name, name);
printf ("\t\tint left = 0, right = QSE_COUNTOF(%s_range) - 1, mid;
while (left <= right)
{
mid = left + (right - left) / 2;
if (c >= %s_range[mid].first && c <= %s_range[mid].last)
return %s_range[mid].seg[c - %s_range[mid].first];
else if (c > %s_range[mid].last)
left = mid + 1;
else
right = mid - 1;
}\n", name, name, name, name, name, name);
printf ("\t}\n\treturn 0xffffu;\n");
printf ("}\n");
}
END {
if (ERROR_CODE == 1)
{
print "USAGE: gencp.awk codepage-file" > "/dev/stderr";
exit 1
}
else if (ERROR_CODE == 2)
{
print "ERROR: mb != wc where mb < 128. i can't handle this encoding map" > "/dev/stderr";
exit 1;
}
else
{
"date" | getline date;
printf ("/* This is a privite file automatically generated\n");
printf (" * from %s on %s.\n", ARGV[1], date);
printf (" * Never include this file directly into your source code.\n");
printf (" * mode=%s \n", (SIMPLE_MODE? "simple": "bsearch"));
printf (" * mb_min=0x%04x \n", mb_min);
printf (" * mb_max=0x%04x \n", mb_max);
printf (" * wc_min=0x%04x \n", wc_min);
printf (" * wc_max=0x%04x \n", wc_max);
printf (" */\n\n");
if (SIMPLE_MODE)
{
emit_simple ("mbtowc", mb_min, mb_max, mb_arr);
printf ("\n/* ----------------------------------------- */\n\n");
emit_simple ("wctomb", wc_min, wc_max, wc_arr);
}
else
{
emit_bsearch ("mbtowc", mb_min, mb_max, mb_arr);
printf ("\n/* ----------------------------------------- */\n\n");
emit_bsearch ("wctomb", wc_min, wc_max, wc_arr);
}
}
}