diff --git a/hawk/lib/fnc.c b/hawk/lib/fnc.c index 3dd8caec..af8da7f1 100644 --- a/hawk/lib/fnc.c +++ b/hawk/lib/fnc.c @@ -1093,8 +1093,23 @@ static int __substitute_oocs (hawk_rtx_t* rtx, hawk_oow_t* max_count, hawk_tre_t for (i = 0; i < s1->len; i++) { - if ((i+1) < s1->len && s1->ptr[i] == '\\' && s1->ptr[i+1] == '&') + if ((i + 3) < s1->len && s1->ptr[i] == '\\' && s1->ptr[i+1] == '\\' && s1->ptr[i+2] == '\\' && s1->ptr[i+3] == '&') { + /* \\\& to produce a literal \& */ + m = hawk_ooecs_cat(new, HAWK_T("\\&")); + i += 3; + } + else if ((i + 2) < s1->len && s1->ptr[i] == '\\' && s1->ptr[i+1] == '\\' && s1->ptr[i+2] == '&') + { + /* \\& to produce a literal \ followed by the matched text */ + m = hawk_ooecs_ccat(new, '\\'); + if (HAWK_UNLIKELY(m == (hawk_oow_t)-1)) goto oops; + m = hawk_ooecs_ncat(new, mat.ptr, mat.len); + i += 2; + } + else if ((i + 1) < s1->len && s1->ptr[i] == '\\' && s1->ptr[i+1] == '&') + { + /* \& to produce literal '&' */ m = hawk_ooecs_ccat(new, '&'); i++; } @@ -1183,8 +1198,23 @@ static int __substitute_bcs (hawk_rtx_t* rtx, hawk_oow_t* max_count, hawk_tre_t* for (i = 0; i < s1->len; i++) { - if ((i+1) < s1->len && s1->ptr[i] == '\\' && s1->ptr[i+1] == '&') + if ((i + 3) < s1->len && s1->ptr[i] == '\\' && s1->ptr[i+1] == '\\' && s1->ptr[i+2] == '\\' && s1->ptr[i+3] == '&') { + /* \\\& to produce a literal \& */ + m = hawk_becs_cat(new, "\\&"); + i += 3; + } + else if ((i + 2) < s1->len && s1->ptr[i] == '\\' && s1->ptr[i+1] == '\\' && s1->ptr[i+2] == '&') + { + /* \\& to produce a literal \ followed by the matched text */ + m = hawk_becs_ccat(new, '\\'); + if (HAWK_UNLIKELY(m == (hawk_oow_t)-1)) goto oops; + m = hawk_becs_ncat(new, mat.ptr, mat.len); + i += 2; + } + else if ((i + 1) < s1->len && s1->ptr[i] == '\\' && s1->ptr[i+1] == '&') + { + /* \& to produce literal '&' */ m = hawk_becs_ccat(new, '&'); i++; } @@ -1455,7 +1485,7 @@ int hawk_fnc_match (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) for (i = 1; i <= q; i++) print x[i,"start"], x[i,"length"]; print RSTART, RLENGTH; * ------------------------------------ - match(B"ab\xB0\x98cdefgdefx", /(def)g(.+)/, 1, x); + match(@b"ab\xB0\x98cdefgdefx", /(def)g(.+)/, 1, x); q = length(x) / 2; for (i = 1; i <= q; i++) print x[i,"start"], x[i,"length"]; print RSTART, RLENGTH; diff --git a/hawk/lib/mod-str.c b/hawk/lib/mod-str.c index f4866f9e..5cb130fd 100644 --- a/hawk/lib/mod-str.c +++ b/hawk/lib/mod-str.c @@ -352,10 +352,10 @@ static int fnc_tocharcode (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) static int fnc_frommbs (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) { - /* str::frommbs(B"byte-string" [, "encoding-name"]) + /* str::frommbs(@b"byte-string" [, "encoding-name"]) * * if you use a supported encoding name, it may look like this: - * a = str::frommbs(B"\xC7\xD1\xB1\xDB", "cp949"); + * a = str::frommbs(@b"\xC7\xD1\xB1\xDB", "cp949"); * printf ("%K\n", a); */ hawk_val_t* a0, * r; @@ -413,7 +413,7 @@ static int fnc_tombs (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) * * if you use a supported encoding name, it may look like this: * a = str::tombs("\uD55C\uAE00", "cp949"); - * printf (B"%K\n", a); + * printf (@b"%K\n", a); */ hawk_val_t* a0, * r; diff --git a/hawk/lib/mod-sys.c b/hawk/lib/mod-sys.c index ebb626a0..6fd85d6b 100644 --- a/hawk/lib/mod-sys.c +++ b/hawk/lib/mod-sys.c @@ -580,7 +580,7 @@ static int fnc_close (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) /* BEGIN { f = sys::open("/tmp/test.txt", sys::O_RDONLY); - while (sys::read(f, x, 10) > 0) printf (B"%s", x); + while (sys::read(f, x, 10) > 0) printf (@b"%s", x); sys::close (f); } */ @@ -642,7 +642,7 @@ static int fnc_open (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) /* a = sys::openfd(1); - sys::write (a, B"let me write something here\n"); + sys::write (a, @b"let me write something here\n"); sys::close (a, sys::C_KEEPFD); ## set C_KEEPFD to release 1 without closing it. ##sys::close (a); print "done\n"; @@ -874,7 +874,7 @@ static int fnc_write (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) b = sys::dup(a); sys::close(a); - while (sys::read(b, abc, 100) > 0) printf (B"%s", abc); + while (sys::read(b, abc, 100) > 0) printf (@b"%s", abc); print "-------------------------------"; @@ -882,7 +882,7 @@ static int fnc_write (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) ## assertion: b == c sys::close (x); - while (sys::read(c, abc, 100) > 0) printf (B"%s", abc); + while (sys::read(c, abc, 100) > 0) printf (@b"%s", abc); sys::close (c); */ @@ -1265,7 +1265,7 @@ BEGIN { a["lflag"] &= ~sys::TC_LFLAG_ECHO; sys::tcsetattr(IN, 0, a); printf ("Password:"); - ##sys::write (OUT, B"Password:"); + ##sys::write (OUT, @b"Password:"); getline x; a["lflag"] |= sys::TC_LFLAG_ECHO; sys::tcsetattr(IN, 0, a); @@ -1492,8 +1492,8 @@ done: ## parent printf ("parent.... %d %d %d\n", sys::getpid(), p0, p1); sys::close (p0); - sys::write (p1, B"hello"); - sys::write (p1, B"world"); + sys::write (p1, @b"hello"); + sys::write (p1, @b"world"); sys::close (p1); sys::wait(a); }##if (sys::pipe(p0, p1) <= -1) @@ -1532,8 +1532,8 @@ done: ## parent printf ("parent.... %d %d %d\n", sys::getpid(), p0, p1); sys::close (p0); - sys::write (p1, B"hello"); - sys::write (p1, B"world"); + sys::write (p1, @b"hello"); + sys::write (p1, @b"world"); sys::close (p1); sys::wait(a); } diff --git a/hawk/lib/parse.c b/hawk/lib/parse.c index e6bb4d99..635cd90c 100644 --- a/hawk/lib/parse.c +++ b/hawk/lib/parse.c @@ -6104,7 +6104,7 @@ static int get_string ( #if defined(HAWK_OOCH_IS_BCH) /* nothing extra to handle byte_only */ #else - if (byte_only && c != HAWK_T('\\') && !HAWK_BYTE_PRINTABLE(c)) + if (byte_only && c != '\\' && !HAWK_BYTE_PRINTABLE(c)) { hawk_seterrfmt (hawk, &hawk->tok.loc, HAWK_EMBSCHR, HAWK_T("invalid mbs character '%jc'"), (hawk_ooch_t)c); return -1; @@ -6330,7 +6330,7 @@ static int get_rexstr (hawk_t* hawk, hawk_tok_t* tok) } -static int get_single_quoted_string (hawk_t* hawk, int byte_only, hawk_tok_t* tok) +static int get_raw_string (hawk_t* hawk, hawk_ooch_t end_char, int byte_only, hawk_tok_t* tok) { hawk_ooci_t c; @@ -6347,14 +6347,14 @@ static int get_single_quoted_string (hawk_t* hawk, int byte_only, hawk_tok_t* to #if defined(HAWK_OOCH_IS_BCH) /* nothing extra to handle byte_only */ #else - if (byte_only && c != HAWK_T('\\') && !HAWK_BYTE_PRINTABLE(c)) + if (byte_only && c != '\\' && !HAWK_BYTE_PRINTABLE(c)) { hawk_seterrfmt (hawk, &hawk->tok.loc, HAWK_EMBSCHR, HAWK_T("invalid mbs character '%jc'"), (hawk_ooch_t)c); return -1; } #endif - if (c == HAWK_T('\'')) + if (c == end_char) { /* terminating quote */ GET_CHAR (hawk); @@ -6668,7 +6668,6 @@ retry: { int type; - ADD_TOKEN_CHAR (hawk, tok, c); GET_CHAR_TO (hawk, c); if (c != HAWK_T('_') && !hawk_is_ooch_alpha(c)) @@ -6678,8 +6677,100 @@ retry: hawk_seterrnum (hawk, &(hawk)->tok.loc, HAWK_EXKWEM); return -1; } + + if (c == 'B' || c == 'b') + { + hawk_sio_lxc_t pc1 = hawk->sio.last; + GET_CHAR_TO (hawk, c); + if (c == 'R' || c == 'r') + { + hawk_sio_lxc_t pc2 = hawk->sio.last; + GET_CHAR_TO (hawk, c); + if (c == '\"') + { + /* raw byte string */ + SET_TOKEN_TYPE (hawk, tok, TOK_MBS); + if (get_raw_string(hawk, c, 1, tok) <= -1) return -1; + } + else + { + unget_char (hawk, &hawk->sio.last); + unget_char (hawk, &pc2); + hawk->sio.last = pc1; + c = pc1.c; + goto process_at_identifier; + } + } + else if (c == '\"') + { + /* B, b - byte string */ + SET_TOKEN_TYPE (hawk, tok, TOK_MBS); + if (get_string(hawk, c, HAWK_T('\\'), 0, 1, 0, tok) <= -1) return -1; + } + #if 0 + + else if (c == '\'') + { + /* TODO: character literal when I add a character type?? */ + } + #endif + else + { + unget_char (hawk, &hawk->sio.last); + hawk->sio.last = pc1; + c = pc1.c; + goto process_at_identifier; + } + } + else if (c == 'R' || c == 'r') + { + hawk_sio_lxc_t pc1 = hawk->sio.last; + GET_CHAR_TO (hawk, c); + if (c == 'B' || c == 'b') + { + hawk_sio_lxc_t pc2 = hawk->sio.last; + GET_CHAR_TO (hawk, c); + if (c == '\"') + { + /* raw byte string */ + SET_TOKEN_TYPE (hawk, tok, TOK_MBS); + if (get_raw_string(hawk, c, 1, tok) <= -1) return -1; + } + else + { + unget_char (hawk, &hawk->sio.last); + unget_char (hawk, &pc2); + hawk->sio.last = pc1; + c = pc1.c; + goto process_at_identifier; + } + } + else if (c == '\"') + { + /* R, r - raw string */ + SET_TOKEN_TYPE (hawk, tok, TOK_STR); + if (get_raw_string(hawk, c, 0, tok) <= -1) return -1; + } + #if 0 + + else if (c == '\'') + { + /* TODO: character literal when I add a character type?? */ + } + #endif + else + { + unget_char (hawk, &hawk->sio.last); + hawk->sio.last = pc1; + c = pc1.c; + goto process_at_identifier; + } + } else { + process_at_identifier: + ADD_TOKEN_CHAR (hawk, tok, HAWK_T('@')); + /* expect normal identifier starting with an alphabet */ do { @@ -6712,54 +6803,33 @@ retry: } } } - else if (c == HAWK_T('B')) - { - GET_CHAR_TO (hawk, c); - if (c == HAWK_T('\"')) - { - /* multi-byte string/byte array */ - SET_TOKEN_TYPE (hawk, tok, TOK_MBS); - if (get_string(hawk, c, HAWK_T('\\'), 0, 1, 0, tok) <= -1) return -1; - } - else if (c == HAWK_T('\'')) - { - SET_TOKEN_TYPE (hawk, tok, TOK_MBS); - if (get_single_quoted_string(hawk, 1, tok) <= -1) return -1; - } - else - { - ADD_TOKEN_CHAR (hawk, tok, HAWK_T('B')); - goto process_identifier; - } - } - else if (c == HAWK_T('_') || hawk_is_ooch_alpha(c)) + else if (c == '_' || hawk_is_ooch_alpha(c)) { int type; - process_identifier: /* identifier */ do { ADD_TOKEN_CHAR (hawk, tok, c); GET_CHAR_TO (hawk, c); } - while (c == HAWK_T('_') || hawk_is_ooch_alpha(c) || hawk_is_ooch_digit(c)); + while (c == '_' || hawk_is_ooch_alpha(c) || hawk_is_ooch_digit(c)); type = classify_ident(hawk, HAWK_OOECS_OOCS(tok->name)); SET_TOKEN_TYPE (hawk, tok, type); } - else if (c == HAWK_T('\"')) + else if (c == '\"') { /* double-quoted string */ SET_TOKEN_TYPE (hawk, tok, TOK_STR); - if (get_string(hawk, c, HAWK_T('\\'), 0, 0, 0, tok) <= -1) return -1; + if (get_string(hawk, c, '\\', 0, 0, 0, tok) <= -1) return -1; } - else if (c == HAWK_T('\'')) +#if 0 + else if (c == '\'')) { - /* single-quoted string - no escaping */ - SET_TOKEN_TYPE (hawk, tok, TOK_STR); - if (get_single_quoted_string(hawk, 0, tok) <= -1) return -1; + /* TODO: character literal */ } +#endif else { try_get_symbols: @@ -7025,9 +7095,9 @@ static int deparse (hawk_t* hawk) hawk_getkwname (hawk, HAWK_KWID_BEGIN, &kw); - if (hawk_putsrcoochars (hawk, kw.ptr, kw.len) <= -1) EXIT_DEPARSE (); - if (hawk_putsrcoocstr (hawk, HAWK_T(" ")) <= -1) EXIT_DEPARSE (); - if (hawk_prnnde (hawk, nde) <= -1) EXIT_DEPARSE (); + if (hawk_putsrcoochars(hawk, kw.ptr, kw.len) <= -1) EXIT_DEPARSE (); + if (hawk_putsrcoocstr(hawk, HAWK_T(" ")) <= -1) EXIT_DEPARSE (); + if (hawk_prnnde(hawk, nde) <= -1) EXIT_DEPARSE (); if (hawk->opt.trait & HAWK_CRLF) { @@ -7194,7 +7264,7 @@ static int flush_out (hawk_t* hawk) int hawk_putsrcoocstr (hawk_t* hawk, const hawk_ooch_t* str) { - while (*str != HAWK_T('\0')) + while (*str != '\0') { if (put_char(hawk, *str) <= -1) return -1; str++; diff --git a/hawk/lib/tree.c b/hawk/lib/tree.c index 359614de..234e0f6b 100644 --- a/hawk/lib/tree.c +++ b/hawk/lib/tree.c @@ -423,7 +423,7 @@ static int print_expr (hawk_t* hawk, hawk_nde_t* nde) hawk_bch_t* ptr; hawk_oow_t len, i; - PUT_SRCSTR (hawk, HAWK_T("B\"")); + PUT_SRCSTR (hawk, HAWK_T("@b\"")); ptr = ((hawk_nde_mbs_t*)nde)->ptr; len = ((hawk_nde_mbs_t*)nde)->len; for (i = 0; i < len; i++) @@ -1143,9 +1143,9 @@ static int print_stmts (hawk_t* hawk, hawk_nde_t* tree, int depth) { hawk_nde_t* p = tree; - while (p != HAWK_NULL) + while (p) { - if (print_stmt (hawk, p, depth) == -1) return -1; + if (print_stmt(hawk, p, depth) == -1) return -1; p = p->next; } @@ -1154,19 +1154,19 @@ static int print_stmts (hawk_t* hawk, hawk_nde_t* tree, int depth) int hawk_prnpt (hawk_t* hawk, hawk_nde_t* tree) { - return print_stmts (hawk, tree, 0); + return print_stmts(hawk, tree, 0); } int hawk_prnnde (hawk_t* hawk, hawk_nde_t* tree) { - return print_stmt (hawk, tree, 0); + return print_stmt(hawk, tree, 0); } int hawk_prnptnpt (hawk_t* hawk, hawk_nde_t* tree) { hawk_nde_t* nde = tree; - while (nde != HAWK_NULL) + while (nde) { if (print_expr (hawk, nde) == -1) return -1; if (nde->next == HAWK_NULL) break; @@ -1183,7 +1183,7 @@ void hawk_clrpt (hawk_t* hawk, hawk_nde_t* tree) hawk_nde_t* p = tree; hawk_nde_t* next; - while (p != HAWK_NULL) + while (p) { next = p->next; diff --git a/hawk/t/Makefile.am b/hawk/t/Makefile.am index 945b9008..755546da 100644 --- a/hawk/t/Makefile.am +++ b/hawk/t/Makefile.am @@ -12,7 +12,7 @@ AM_CPPFLAGS = \ AM_LDFLAGS = -L$(abs_builddir)/../lib -L$(libdir) LDADD = $(PTHREAD_LIBS) -check_SCRIPTS = h-001.hawk +check_SCRIPTS = h-001.hawk h-002.hawk ##noinst_SCRIPTS = $(check_SCRIPTS) EXTRA_DIST = $(check_SCRIPTS) diff --git a/hawk/t/Makefile.in b/hawk/t/Makefile.in index 4005f0c6..0bbb5627 100644 --- a/hawk/t/Makefile.in +++ b/hawk/t/Makefile.in @@ -558,7 +558,7 @@ AM_CPPFLAGS = \ AM_LDFLAGS = -L$(abs_builddir)/../lib -L$(libdir) LDADD = $(PTHREAD_LIBS) -check_SCRIPTS = h-001.hawk +check_SCRIPTS = h-001.hawk h-002.hawk EXTRA_DIST = $(check_SCRIPTS) t_001_SOURCES = t-001.c t.h t_002_SOURCES = t-002.c t.h diff --git a/hawk/t/ensure.inc b/hawk/t/ensure.inc new file mode 100644 index 00000000..9d07a0df --- /dev/null +++ b/hawk/t/ensure.inc @@ -0,0 +1,8 @@ +function ensure (a, b, desc, line) +{ + if (a != b) + { + printf ("FAILURE[%d] %s\n", line, desc); + exit (-1); + } +} diff --git a/hawk/t/h-001.hawk b/hawk/t/h-001.hawk index 0a657d7f..784fa41c 100644 --- a/hawk/t/h-001.hawk +++ b/hawk/t/h-001.hawk @@ -1,14 +1,6 @@ @pragma entry main - -function ensure (a, b, desc, line) -{ - if (a != b) - { - printf ("FAILURE[%d] %s\n", line, desc); - exit (-1); - } -} +@include "ensure.inc"; function call_by_ref_1(&a, b, &c) { diff --git a/hawk/t/h-002.hawk b/hawk/t/h-002.hawk new file mode 100644 index 00000000..313429c1 --- /dev/null +++ b/hawk/t/h-002.hawk @@ -0,0 +1,87 @@ +@pragma entry main +@pragma implicit off + +@include "ensure.inc"; + +function main() +{ + + ## call by reference. change an argument to array in the callee + { + @local ini, foo1, foo2, foo3, foo4; + + test1(foo1); + test2(foo2); + test3(foo3); + test4(foo4); + ensure (hawk::typename(foo1), "map", @SCRIPTNAME, @SCRIPTLINE); + ensure (hawk::typename(foo2), "map", @SCRIPTNAME, @SCRIPTLINE); + ensure (hawk::typename(foo3), "nil", @SCRIPTNAME, @SCRIPTLINE); + ensure (hawk::typename(foo4), "nil", @SCRIPTNAME, @SCRIPTLINE); + ensure (foo1[1], 1, @SCRIPTNAME, @SCRIPTLINE); + ensure (foo2[1], 1, @SCRIPTNAME, @SCRIPTLINE); + ensure (foo3[1], ini, @SCRIPTNAME, @SCRIPTLINE); + ensure (foo4[1], ini, @SCRIPTNAME, @SCRIPTLINE); + } + + ## gsub + { + @local w, x, y, z, z1, z2; + x = y = "x\\y"; + gsub(/\\\\/, "A", x); + gsub("\\\\", "A", y); + ensure (x, "x\\y", @SCRIPTNAME, @SCRIPTLINE); + ensure (y, "xAy", @SCRIPTNAME, @SCRIPTLINE); + + x = y = "x\\\\y"; + gsub(/\\\\/, "A", x); + gsub("\\\\", "A", y); + ensure (x, "xAy", @SCRIPTNAME, @SCRIPTLINE); + ensure (y, "xAAy", @SCRIPTNAME, @SCRIPTLINE); + } + + ## gsub - POSIX rule for &, \&, \\&, \\\& + { + @local w, x, y, z, z1, z2, z3; + w = x = y = z = z1 = z2 = "xax"; + + gsub(/a/, "\&", w); # gsub sees & + gsub(/a/, "\\&", x); # gsub sees \& + gsub(/a/, "\\\&", y); # gsub sees \& + + gsub(/a/, "\\\\&", z); # gsub sees \\& + gsub(/a/, "\\\\\&", z1); # gsub sees \\& + gsub(/a/, "\\\\\\&", z2); # gsub sees \\\& + + ensure (w, "xax", @SCRIPTNAME, @SCRIPTLINE); + ensure (x, "x&x", @SCRIPTNAME, @SCRIPTLINE); + ensure (y, "x&x", @SCRIPTNAME, @SCRIPTLINE); + ensure (z, "x\\ax", @SCRIPTNAME, @SCRIPTLINE); + ensure (z1, "x\\ax", @SCRIPTNAME, @SCRIPTLINE); + ensure (z2, "x\\&x", @SCRIPTNAME, @SCRIPTLINE); + } + + ## gsub - POSIX rule for &, \&, \\&, \\\& - express the same test with a raw string literal + { + @local w, x, y, z; + w = x = y = z = "xax"; + + gsub(/a/, @r"&", w); # gsub sees & + gsub(/a/, @r"\&", x); # gsub sees \& + + gsub(/a/, @r"\\&", y); # gsub sees \\& + gsub(/a/, @r"\\\&", z); # gsub sees \\\& + + ensure (w, "xax", @SCRIPTNAME, @SCRIPTLINE); + ensure (x, "x&x", @SCRIPTNAME, @SCRIPTLINE); + ensure (y, @r"x\ax", @SCRIPTNAME, @SCRIPTLINE); + ensure (z, @r"x\&x", @SCRIPTNAME, @SCRIPTLINE); + } + + print "SUCCESS"; +} + +function test1(&foo) { test2(foo) } +function test2(&bar) { bar[1] = 1 } +function test3(foo) { test2(foo) } +function test4(bar) { bar[1] = 1 }