# Generate the dense EUC-KR/UHC lookup arrays used by lib/ksc5601.c # # Usage: # hawk -f tools/gen-ksc5601-tabs.hawk -- tools/KSC5601.TXT # hawk -f tools/gen-ksc5601-tabs.hawk -- --forward tools/KSC5601.TXT # hawk -f tools/gen-ksc5601-tabs.hawk -- --reverse tools/KSC5601.TXT # BEGIN { MODE = "all"; bad = 0; init_forward_pages(); init_reverse_pages(); parse_args(); } /^[ \t]*0x[0-9A-Fa-f]+/ { @local ks, uc; ks = parse_hex($1); uc = parse_hex($2); if (ks < 0 || uc < 0) next; add_forward_mapping(ks, uc); add_reverse_mapping(ks, uc); } END { @local i, first; install_ctype_euc_kr_overrides(); if (bad) exit 1; printf("/* generated by tools/gen-ksc5601-tabs.hawk from %s */\n\n", input_name()); first = 1; if (MODE == "all" || MODE == "forward") { for (i = 1; i <= fwd_page_count; i++) { if (!first) printf("\n"); dump_forward_page(i); first = 0; } } if (MODE == "all" || MODE == "reverse") { for (i = 1; i <= rev_page_count; i++) { if (!first) printf("\n"); dump_reverse_page(i); first = 0; } } } function parse_args() { @local i, a; for (i = 1; i < ARGC; i++) { a = ARGV[i]; if (a == "" || substr(a, 1, 2) != "--") continue; if (a == "--all") MODE = "all"; else if (a == "--forward") MODE = "forward"; else if (a == "--reverse") MODE = "reverse"; else { printf("unknown option %s\n", a) > "/dev/stderr"; bad = 1; } ARGV[i] = ""; } } function input_name() { @local i, sep, out; sep = ""; out = ""; for (i = 1; i < ARGC; i++) { if (ARGV[i] == "") continue; out = out sep ARGV[i]; sep = ", "; } return (out != "")? out: ""; } function hex_digit(ch) { if (ch >= "0" && ch <= "9") return ch + 0; if (ch >= "A" && ch <= "F") return 10 + index("ABCDEF", ch) - 1; if (ch >= "a" && ch <= "f") return 10 + index("abcdef", ch) - 1; return -1; } function parse_hex(s) { @local i, n, d; n = 0; if (substr(s, 1, 2) == "0x" || substr(s, 1, 2) == "0X") s = substr(s, 3); for (i = 1; i <= length(s); i++) { d = hex_digit(substr(s, i, 1)); if (d < 0) return -1; n = n * 16 + d; } return n; } function init_forward_pages() { fwd_page_count = 2; fwd_page_no = @[0, 1]; fwd_name = @["ksc_to_uc_0", "ksc_to_uc_1"]; fwd_from = @[0x8141, 0xcaa1]; fwd_to = @[0xc8fe, 0xfdfe]; fwd_len = @[fwd_to[1] - fwd_from[1] + 1, fwd_to[2] - fwd_from[2] + 1]; fwd_data = @[]; } function init_reverse_pages() { rev_page_count = 11; rev_page_no[1] = 0; rev_name[1] = "uc_to_ksc_0"; rev_from[1] = 0x00A1; rev_to[1] = 0x0167; rev_page_no[2] = 1; rev_name[2] = "uc_to_ksc_1"; rev_from[2] = 0x02C7; rev_to[2] = 0x0451; rev_page_no[3] = 2; rev_name[3] = "uc_to_ksc_2"; rev_from[3] = 0x2015; rev_to[3] = 0x2312; rev_page_no[4] = 3; rev_name[4] = "uc_to_ksc_3"; rev_from[4] = 0x2460; rev_to[4] = 0x266D; rev_page_no[5] = 4; rev_name[5] = "uc_to_ksc_4"; rev_from[5] = 0x3000; rev_to[5] = 0x327F; rev_page_no[6] = 5; rev_name[6] = "uc_to_ksc_5"; rev_from[6] = 0x3380; rev_to[6] = 0x33DD; rev_page_no[7] = 6; rev_name[7] = "uc_to_ksc_6"; rev_from[7] = 0x4E00; rev_to[7] = 0x947F; rev_page_no[8] = 7; rev_name[8] = "uc_to_ksc_7"; rev_from[8] = 0x9577; rev_to[8] = 0x9F9C; rev_page_no[9] = 8; rev_name[9] = "uc_to_ksc_8"; rev_from[9] = 0xAC00; rev_to[9] = 0xD7A3; rev_page_no[10] = 9; rev_name[10] = "uc_to_ksc_9"; rev_from[10] = 0xF900; rev_to[10] = 0xFA0B; rev_page_no[11] = 10; rev_name[11] = "uc_to_ksc_10"; rev_from[11] = 0xFF01; rev_to[11] = 0xFFE6; for (i = 1; i <= rev_page_count; i++) rev_len[i] = rev_to[i] - rev_from[i] + 1; } function add_forward_mapping(ks, uc) { @local i, idx, old; for (i = 1; i <= fwd_page_count; i++) { if (ks < fwd_from[i] || ks > fwd_to[i]) continue; idx = ks - fwd_from[i]; old = fwd_data[i][idx] + 0; if (old != 0 && old != uc) { printf("duplicate forward mapping for 0x%04x: 0x%04x vs 0x%04x\n", ks, old, uc) > "/dev/stderr"; bad = 1; return; } fwd_data[i][idx] = uc; return; } } function add_reverse_mapping(ks, uc) { @local i, idx, old; for (i = 1; i <= rev_page_count; i++) { if (uc < rev_from[i] || uc > rev_to[i]) continue; idx = uc - rev_from[i]; old = rev_data[i][idx] + 0; if (old != 0 && old != ks) { printf("duplicate reverse mapping for U+%04x: 0x%04x vs 0x%04x\n", uc, old, ks) > "/dev/stderr"; bad = 1; return; } rev_data[i][idx] = ks; return; } } function install_ctype_euc_kr_overrides() { ## some characters not found in the obsolete KSC5601.TXT file ## euro sign add_forward_mapping(0xA2E6, 0x20AC); add_reverse_mapping(0xA2E6, 0x20AC); ## registered trademark sign add_forward_mapping(0xA2E7, 0x00AE); add_reverse_mapping(0xA2E7, 0x00AE); # circled korean postal code sign add_forward_mapping(0xA2E8, 0x327E); add_reverse_mapping(0xA2E8, 0x327E); } function dump_dense_array(kind, page, total) { @local i, v; for (i = 0; i < total; i++) { if ((i % 9) == 0) printf("\t"); if (kind == "forward") v = fwd_data[page][i] + 0; else v = rev_data[page][i] + 0; printf("0x%04x", v); if (i + 1 < total) printf(", "); if ((i % 9) == 8 || i + 1 >= total) printf("\n"); } } function dump_forward_page(i) { printf("/* page %d 0x%04x-0x%04x */\n", fwd_page_no[i], fwd_from[i], fwd_to[i]); printf("static const hawk_uint16_t %s[] = {\n", fwd_name[i]); dump_dense_array("forward", i, fwd_len[i]); printf("};\n"); } function dump_reverse_page(i) { printf("/* page %d 0x%04x-0x%04x */\n", rev_page_no[i], rev_from[i], rev_to[i]); printf("static const hawk_uint16_t %s[] = {\n", rev_name[i]); dump_dense_array("reverse", i, rev_len[i]); printf("};\n"); }