From aaa6e35787dd0df350cfc0ef53c34ccf1d66227c Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Thu, 25 Jan 2024 23:48:06 +0900 Subject: [PATCH] enhanced the reader and compiler to treat characters and strings prefixed with b and u as a limited range character and a byte array with internal terminating null at the back --- lib/cnode.c | 15 ++++++++++++++- lib/comp.c | 9 +++++++++ lib/hcl-prv.h | 16 +++++++++++++++- lib/hcl.h | 14 ++++++++++++++ lib/obj.c | 28 ++++++++++++++++++++++++++++ lib/print.c | 2 ++ lib/read.c | 48 +++++++++++++++++++++++++++++++++++++++--------- t/Makefile.am | 1 + t/Makefile.in | 1 + t/feed-5005.err | 4 ++++ 10 files changed, 127 insertions(+), 11 deletions(-) create mode 100644 t/feed-5005.err diff --git a/lib/cnode.c b/lib/cnode.c index 6c696db..3c30e1b 100644 --- a/lib/cnode.c +++ b/lib/cnode.c @@ -91,7 +91,7 @@ hcl_cnode_t* hcl_makecnodedcstar (hcl_t* hcl, int flags, const hcl_loc_t* loc, c return make_cnode(hcl, HCL_CNODE_DCSTAR, flags, loc, tok); } -hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, const hcl_ooch_t v) +hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, hcl_ooch_t v) { hcl_cnode_t* c = make_cnode(hcl, HCL_CNODE_CHARLIT, flags, loc, tok); if (HCL_UNLIKELY(!c)) return HCL_NULL; @@ -99,6 +99,14 @@ hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, return c; } +hcl_cnode_t* hcl_makecnodebchrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, hcl_oob_t v) +{ + hcl_cnode_t* c = make_cnode(hcl, HCL_CNODE_BCHRLIT, flags, loc, tok); + if (HCL_UNLIKELY(!c)) return HCL_NULL; + c->u.bchrlit.v = v; + return c; +} + hcl_cnode_t* hcl_makecnodesymbol (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok) { hcl_cnode_t* c = make_cnode(hcl, HCL_CNODE_SYMBOL, flags, loc, tok); @@ -120,6 +128,11 @@ hcl_cnode_t* hcl_makecnodestrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, c return make_cnode(hcl, HCL_CNODE_STRLIT, flags, loc, tok); } +hcl_cnode_t* hcl_makecnodebstrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok) +{ + return make_cnode(hcl, HCL_CNODE_BSTRLIT, flags, loc, tok); +} + hcl_cnode_t* hcl_makecnodenumlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok) { return make_cnode(hcl, HCL_CNODE_NUMLIT, flags, loc, tok); diff --git a/lib/comp.c b/lib/comp.c index 6e43b2f..54a8c6d 100644 --- a/lib/comp.c +++ b/lib/comp.c @@ -4436,11 +4436,20 @@ redo: lit = HCL_CHAR_TO_OOP(oprnd->u.charlit.v); goto literal; + case HCL_CNODE_BCHRLIT: /* byte character still converts to charcter */ + lit = HCL_CHAR_TO_OOP((hcl_ooch_t)oprnd->u.bchrlit.v); + goto literal; + case HCL_CNODE_STRLIT: lit = hcl_makestring(hcl, HCL_CNODE_GET_TOKPTR(oprnd), HCL_CNODE_GET_TOKLEN(oprnd), 0); if (HCL_UNLIKELY(!lit)) return -1; goto literal; + case HCL_CNODE_BSTRLIT: + lit = hcl_makebytestring(hcl, HCL_CNODE_GET_TOKPTR(oprnd), HCL_CNODE_GET_TOKLEN(oprnd), 0); + if (HCL_UNLIKELY(!lit)) return -1; + goto literal; + case HCL_CNODE_NUMLIT: lit = string_to_num(hcl, HCL_CNODE_GET_TOK(oprnd), HCL_CNODE_GET_LOC(oprnd), 0); if (HCL_UNLIKELY(!lit)) return -1; diff --git a/lib/hcl-prv.h b/lib/hcl-prv.h index a9f9d5a..3db9d17 100644 --- a/lib/hcl-prv.h +++ b/lib/hcl-prv.h @@ -159,8 +159,14 @@ enum hcl_tok_type_t { HCL_TOK_EOF, + + /* the following 4 items must be in this order for code + * in flx_quote_token() in read.c */ HCL_TOK_CHARLIT, + HCL_TOK_BCHRLIT, HCL_TOK_STRLIT, + HCL_TOK_BSTRLIT, + HCL_TOK_NUMLIT, HCL_TOK_RADNUMLIT, HCL_TOK_FPDECLIT, @@ -227,9 +233,11 @@ struct hcl_link_t enum hcl_cnode_type_t { HCL_CNODE_CHARLIT, + HCL_CNODE_BCHRLIT, HCL_CNODE_SYMBOL, HCL_CNODE_DSYMBOL, /* dotted symbol */ HCL_CNODE_STRLIT, + HCL_CNODE_BSTRLIT, HCL_CNODE_NUMLIT, HCL_CNODE_RADNUMLIT, HCL_CNODE_FPDECLIT, @@ -300,6 +308,10 @@ struct hcl_cnode_t hcl_ooch_t v; } charlit; struct + { + hcl_oob_t v; + } bchrlit; + struct { hcl_syncode_t syncode; /* special if non-zero */ } symbol; @@ -1750,10 +1762,12 @@ hcl_cnode_t* hcl_makecnodesuper (hcl_t* hcl, int flags, const hcl_loc_t* loc, co hcl_cnode_t* hcl_makecnodeellipsis (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); hcl_cnode_t* hcl_makecnodetrpcolons (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); hcl_cnode_t* hcl_makecnodedcstar (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); -hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, const hcl_ooch_t v); +hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, hcl_ooch_t v); +hcl_cnode_t* hcl_makecnodebchrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, hcl_oob_t v); hcl_cnode_t* hcl_makecnodesymbol (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); hcl_cnode_t* hcl_makecnodedsymbol (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, int is_cla); hcl_cnode_t* hcl_makecnodestrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); +hcl_cnode_t* hcl_makecnodebstrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); hcl_cnode_t* hcl_makecnodenumlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); hcl_cnode_t* hcl_makecnoderadnumlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); hcl_cnode_t* hcl_makecnodefpdeclit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); diff --git a/lib/hcl.h b/lib/hcl.h index 2286ae4..19b1e5d 100644 --- a/lib/hcl.h +++ b/lib/hcl.h @@ -2676,6 +2676,20 @@ HCL_EXPORT hcl_oop_t hcl_makebytearray ( hcl_oow_t len ); +HCL_EXPORT hcl_oop_t hcl_makebytestringwithbytes ( + hcl_t* hcl, + const hcl_oob_t* ptr, + hcl_oow_t len, + int ngc +); + +HCL_EXPORT hcl_oop_t hcl_makebytestring ( + hcl_t* hcl, + const hcl_ooch_t* ptr, + hcl_oow_t len, + int ngc +); + HCL_EXPORT hcl_oop_t hcl_makestring ( hcl_t* hcl, const hcl_ooch_t* ptr, diff --git a/lib/obj.c b/lib/obj.c index dfd02a7..5933b70 100644 --- a/lib/obj.c +++ b/lib/obj.c @@ -323,6 +323,34 @@ hcl_oop_t hcl_makebytearray (hcl_t* hcl, const hcl_oob_t* ptr, hcl_oow_t size) return hcl_allocbyteobj(hcl, HCL_BRAND_BYTE_ARRAY, ptr, size); } +hcl_oop_t hcl_makebytestringwithbytes (hcl_t* hcl, const hcl_oob_t* ptr, hcl_oow_t len, int ngc) +{ + return alloc_numeric_array(hcl, HCL_BRAND_BYTE_ARRAY, ptr, len, HCL_OBJ_TYPE_BYTE, HCL_SIZEOF(hcl_oob_t), 1, ngc); +} + +hcl_oop_t hcl_makebytestring (hcl_t* hcl, const hcl_ooch_t* ptr, hcl_oow_t len, int ngc) +{ + /* a byte string is a byte array with an extra null at the back. + * the input to this function, however, is the pointer to hcl_ooch_t data + * because this function is mainly used to convert a token to a byte string. + * the token in the compiler is stored as a hcl_ooch_t string. */ + + hcl_oop_byte_t b; + hcl_oow_t i; + hcl_oob_t v; + + b = alloc_numeric_array(hcl, HCL_BRAND_BYTE_ARRAY, HCL_NULL, len, HCL_OBJ_TYPE_BYTE, HCL_SIZEOF(hcl_oob_t), 1, ngc); + if (HCL_UNLIKELY(!b)) return HCL_NULL; + + for (i = 0; i < len; i++) + { + v = ptr[i] & 0xFF; + HCL_OBJ_SET_BYTE_VAL(b, i, v); + } + + return (hcl_oop_t)b; +} + hcl_oop_t hcl_makestring (hcl_t* hcl, const hcl_ooch_t* ptr, hcl_oow_t len, int ngc) { /*return hcl_alloccharobj(hcl, HCL_BRAND_STRING, ptr, len);*/ diff --git a/lib/print.c b/lib/print.c index efa9299..adc8174 100644 --- a/lib/print.c +++ b/lib/print.c @@ -798,9 +798,11 @@ void hcl_dumpcnode (hcl_t* hcl, hcl_cnode_t* cnode, int newline) switch (t) { case HCL_CNODE_CHARLIT: + case HCL_CNODE_BCHRLIT: case HCL_CNODE_SYMBOL: case HCL_CNODE_DSYMBOL: case HCL_CNODE_STRLIT: + case HCL_CNODE_BSTRLIT: case HCL_CNODE_NUMLIT: case HCL_CNODE_RADNUMLIT: case HCL_CNODE_FPDECLIT: diff --git a/lib/read.c b/lib/read.c index 97c0992..8e084e7 100644 --- a/lib/read.c +++ b/lib/read.c @@ -614,7 +614,7 @@ static HCL_INLINE hcl_cnode_t* leave_list (hcl_t* hcl, hcl_loc_t* list_loc, int* fake_tok_ptr = &fake_tok; } - /* TODO: check the number of argumetns in advance??? */ + /* TODO: check the number of arguments in advance??? */ sym = hcl_makecnodesymbol(hcl, 0, &loc, fake_tok_ptr); if (HCL_UNLIKELY(!sym)) { @@ -1476,6 +1476,10 @@ static int feed_process_token (hcl_t* hcl) frd->obj = hcl_makecnodecharlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl), TOKEN_NAME_CHAR(hcl, 0)); goto auto_xlist; + case HCL_TOK_BCHRLIT: + frd->obj = hcl_makecnodebchrlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl), (hcl_oob_t)TOKEN_NAME_CHAR(hcl, 0)); + goto auto_xlist; + case HCL_TOK_NUMLIT: frd->obj = hcl_makecnodenumlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); goto auto_xlist; @@ -1498,6 +1502,10 @@ static int feed_process_token (hcl_t* hcl) frd->obj = hcl_makecnodestrlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); goto auto_xlist; + case HCL_TOK_BSTRLIT: + frd->obj = hcl_makecnodebstrlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + goto auto_xlist; + case HCL_TOK_IDENT: frd->obj = hcl_makecnodesymbol(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); goto auto_xlist; @@ -2348,9 +2356,16 @@ not_consumed: static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */ { hcl_flx_qt_t* qt = FLX_QT(hcl); + hcl_loc_t synerr_loc = *TOKEN_LOC(hcl); if (c == HCL_OOCI_EOF) goto invalid_token; + if (qt->is_byte && c > 0xFF) + { + synerr_loc = *FLX_LOC(hcl); + goto invalid_token; + } + if (qt->escaped == 3) { if (c >= '0' && c <= '7') @@ -2424,8 +2439,12 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */ if (qt->escaped == 0 && c == qt->end_char) { /* terminating quote */ -/* TODO: byte string literal or byte literal by checking qt->is_byte... */ - FEED_WRAP_UP (hcl, qt->tok_type); /* HCL_TOK_STRLIT or HCL_TOK_CHARLIT */ + + /* qt->tok_type + qt->is_byte assumes that the token types for + * byte-string and byte-character literals are 1 greater than + * string and charcter literals. * see the definition of + * hcl_tok_type_t in hcl-prv.h */ + FEED_WRAP_UP (hcl, qt->tok_type + qt->is_byte); /* HCL_TOK_STRLIT or HCL_TOK_CHARLIT */ if (TOKEN_NAME_LEN(hcl) < qt->min_len) goto invalid_token; goto consumed; } @@ -2462,9 +2481,15 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */ goto consumed; } #if (HCL_SIZEOF_OOCH_T >= 2) - else if (c == 'u') + else if (c == 'u' && !qt->is_byte) { - if (qt->is_byte) goto invalid_token; + #if 0 + if (qt->is_byte) + { + synerr_loc = *FLX_LOC(hcl); + goto invalid_token; + } + #endif qt->escaped = 4; qt->digit_count = 0; qt->c_acc = 0; @@ -2472,9 +2497,15 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */ } #endif #if (HCL_SIZEOF_OOCH_T >= 4) - else if (c == 'U') + else if (c == 'U' && !qt->is_byte) { - if (qt->is_byte) goto invalid_token; + #if 0 + if (qt->is_byte) + { + synerr_loc = *FLX_LOC(hcl); + goto invalid_token; + } + #endif qt->escaped = 8; qt->digit_count = 0; qt->c_acc = 0; @@ -2501,8 +2532,7 @@ consumed: return 1; invalid_token: -/* TODO: more accurate syntax error code instead of just synerr_code.... */ - hcl_setsynerr (hcl, qt->synerr_code, TOKEN_LOC(hcl) /*FLX_LOC(hcl) instead?*/, HCL_NULL); + hcl_setsynerr (hcl, qt->synerr_code, &synerr_loc, HCL_NULL); return -1; } diff --git a/t/Makefile.am b/t/Makefile.am index a6bae25..5869246 100644 --- a/t/Makefile.am +++ b/t/Makefile.am @@ -19,6 +19,7 @@ check_ERRORS = \ feed-5002.err \ feed-5003.err \ feed-5004.err \ + feed-5005.err \ mlist-5001.err \ var-5001.err \ var-5002.err \ diff --git a/t/Makefile.in b/t/Makefile.in index 948874b..2d6a627 100644 --- a/t/Makefile.in +++ b/t/Makefile.in @@ -490,6 +490,7 @@ check_ERRORS = \ feed-5002.err \ feed-5003.err \ feed-5004.err \ + feed-5005.err \ mlist-5001.err \ var-5001.err \ var-5002.err \ diff --git a/t/feed-5005.err b/t/feed-5005.err new file mode 100644 index 0000000..85ec90f --- /dev/null +++ b/t/feed-5005.err @@ -0,0 +1,4 @@ +## a code point greater than 255 is illegal in the character literal prefix fixed with b. + +printf "[%c] [#x%x] [%d]\n" '★' '★' #x2605; +printf "[%c]\n" b'★'; ##ERROR: syntax error - wrong character literal