Files
hawk/tools/gen-gbk-tabs.hawk

232 lines
4.1 KiB
Plaintext

# Generate the dense GBK lookup arrays used by lib/gbk.c
#
# Usage:
# hawk -f tools/gen-gbk-tabs.hawk -- tools/CP936.TXT
# hawk -f tools/gen-gbk-tabs.hawk -- --forward tools/CP936.TXT
# hawk -f tools/gen-gbk-tabs.hawk -- --reverse tools/CP936.TXT
#
BEGIN {
MODE = "all";
bad = 0;
init_forward_pages();
init_reverse_pages();
parse_args();
}
/^[ \t]*0x[0-9A-Fa-f]+[ \t]+0x[0-9A-Fa-f]+/ {
@local gbk, uc;
gbk = parse_hex($1);
uc = parse_hex($2);
if (gbk < 0 || uc < 0) next;
if (gbk >= 0x8140)
add_forward_mapping(gbk, uc);
if (gbk >= 0x80)
add_reverse_mapping(gbk, uc);
}
END {
@local i, first;
if (bad) exit 1;
printf("/* generated by tools/gen-gbk-tabs.hawk from %s */\n\n", input_name());
first = 1;
if (MODE == "all" || MODE == "forward")
{
for (i = 1; i <= fwd_page_count; i++)
{
if (!first) printf("\n");
dump_forward_page(i);
first = 0;
}
}
if (MODE == "all" || MODE == "reverse")
{
for (i = 1; i <= rev_page_count; i++)
{
if (!first) printf("\n");
dump_reverse_page(i);
first = 0;
}
}
}
function parse_args()
{
@local i, a;
for (i = 1; i < ARGC; i++)
{
a = ARGV[i];
if (a == "" || substr(a, 1, 2) != "--") continue;
if (a == "--all")
MODE = "all";
else if (a == "--forward")
MODE = "forward";
else if (a == "--reverse")
MODE = "reverse";
else
{
printf("unknown option %s\n", a) > "/dev/stderr";
bad = 1;
}
ARGV[i] = "";
}
}
function input_name()
{
@local i, sep, out;
sep = "";
out = "";
for (i = 1; i < ARGC; i++)
{
if (ARGV[i] == "") continue;
out = out sep ARGV[i];
sep = ", ";
}
return (out != "")? out: "<stdin>";
}
function hex_digit(ch)
{
if (ch >= "0" && ch <= "9") return ch + 0;
if (ch >= "A" && ch <= "F") return 10 + index("ABCDEF", ch) - 1;
if (ch >= "a" && ch <= "f") return 10 + index("abcdef", ch) - 1;
return -1;
}
function parse_hex(s)
{
@local i, n, d;
n = 0;
if (substr(s, 1, 2) == "0x" || substr(s, 1, 2) == "0X")
s = substr(s, 3);
for (i = 1; i <= length(s); i++)
{
d = hex_digit(substr(s, i, 1));
if (d < 0) return -1;
n = n * 16 + d;
}
return n;
}
function init_forward_pages()
{
fwd_page_count = 1;
fwd_page_no = @[0];
fwd_name = @["gbk_to_uc_0"];
fwd_from = @[0x8140];
fwd_to = @[0xfe4f];
fwd_len = @[fwd_to[1] - fwd_from[1] + 1];
fwd_data = @[];
}
function init_reverse_pages()
{
rev_page_count = 1;
rev_page_no = @[0];
rev_name = @["uc_to_gbk_0"];
rev_from = @[0x00a4];
rev_to = @[0xffe5];
rev_len = @[rev_to[1] - rev_from[1] + 1];
}
function add_forward_mapping(gbk, uc)
{
@local i, idx, old;
for (i = 1; i <= fwd_page_count; i++)
{
if (gbk < fwd_from[i] || gbk > fwd_to[i]) continue;
idx = gbk - fwd_from[i];
old = fwd_data[i][idx] + 0;
if (old != 0 && old != uc)
{
printf("duplicate forward mapping for 0x%04x: 0x%04x vs 0x%04x\n", gbk, old, uc) > "/dev/stderr";
bad = 1;
return;
}
fwd_data[i][idx] = uc;
return;
}
}
function add_reverse_mapping(gbk, uc)
{
@local i, idx, old;
for (i = 1; i <= rev_page_count; i++)
{
if (uc < rev_from[i] || uc > rev_to[i]) continue;
idx = uc - rev_from[i];
old = rev_data[i][idx] + 0;
if (old != 0 && old != gbk)
{
printf("duplicate reverse mapping for U+%04x: 0x%04x vs 0x%04x\n", uc, old, gbk) > "/dev/stderr";
bad = 1;
return;
}
rev_data[i][idx] = gbk;
return;
}
}
function dump_dense_array(kind, page, total)
{
@local i, v;
for (i = 0; i < total; i++)
{
if ((i % 9) == 0) printf("\t");
if (kind == "forward")
v = fwd_data[page][i] + 0;
else
v = rev_data[page][i] + 0;
printf("0x%04x", v);
if (i + 1 < total) printf(", ");
if ((i % 9) == 8 || i + 1 >= total)
printf("\n");
}
}
function dump_forward_page(i)
{
printf("/* page %d 0x%04x-0x%04x */\n", fwd_page_no[i], fwd_from[i], fwd_to[i]);
printf("static const hawk_uint16_t %s[] = {\n", fwd_name[i]);
dump_dense_array("forward", i, fwd_len[i]);
printf("};\n");
}
function dump_reverse_page(i)
{
printf("/* page %d 0x%04x-0x%04x */\n", rev_page_no[i], rev_from[i], rev_to[i]);
printf("static const hawk_uint16_t %s[] = {\n", rev_name[i]);
dump_dense_array("reverse", i, rev_len[i]);
printf("};\n");
}