From f8c84dc2f0fc88924a28096ee21a3f33de8b202b Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Sat, 21 Sep 2024 00:51:19 +0900 Subject: [PATCH] changing the feed handler to recognize a symbol literal without double-quotes. --- lib/hcl-prv.h | 12 ++-- lib/read.c | 179 ++++++++++++++++++++++++++++++------------------ t/feed-5001.err | 14 +++- 3 files changed, 130 insertions(+), 75 deletions(-) diff --git a/lib/hcl-prv.h b/lib/hcl-prv.h index 4975cb0..b9fd710 100644 --- a/lib/hcl-prv.h +++ b/lib/hcl-prv.h @@ -769,8 +769,8 @@ struct hcl_flx_hi_t hcl_oow_t char_count; }; -typedef struct hcl_flx_hb_t hcl_flx_hb_t; /* intermediate state for #b */ -struct hcl_flx_hb_t +typedef struct hcl_flx_hbc_t hcl_flx_hbc_t; /* intermediate state for #b */ +struct hcl_flx_hbc_t { /* state data */ hcl_ooch_t start_c; @@ -860,11 +860,11 @@ enum hcl_flx_state_t HCL_FLX_DELIM_TOKEN, HCL_FLX_DOLLARED_IDENT, HCL_FLX_HMARKED_TOKEN, /* hash-marked token */ - HCL_FLX_HMARKED_B, /* #b - intermediate state before #b[ or #b-radixed binary number */ + HCL_FLX_HMARKED_BC, /* #b - intermediate state before #b[, #c[, or #b-radixed binary number */ HCL_FLX_HMARKED_BINOP, /* #++ - binary operator symbol */ HCL_FLX_HMARKED_CHAR, /* hash-marked character that begins with #\ */ - HCL_FLX_HMARKED_IDENT, /* hash-marked identifier like #include, etc */ HCL_FLX_HMARKED_NUMBER, /* hash-marked number - radixed number like #xABCD */ + HCL_FLX_HMARKED_IDENT, /* literal symbol */ HCL_FLX_PLAIN_IDENT, /* plain identifier */ HCL_FLX_BINOP, /* binary operator */ HCL_FLX_PLAIN_NUMBER, /* plain number */ @@ -955,8 +955,8 @@ struct hcl_compiler_t hcl_flx_dt_t dt; /* delimiter token */ hcl_flx_di_t di; /* dollar-signed identifier */ hcl_flx_hc_t hc; /* hash-marked character */ - hcl_flx_hi_t hi; /* hash-marked identifier */ - hcl_flx_hb_t hb; /* #b ... */ + hcl_flx_hi_t hi; /* hash-marked identifier - literal symbol */ + hcl_flx_hbc_t hbc; /* #b #c ... */ hcl_flx_hn_t hn; /* hash-marked number - radixed number */ hcl_flx_pi_t pi; /* plain identifier */ hcl_flx_binop_t binop; /* binary operator */ diff --git a/lib/read.c b/lib/read.c index bcdd4b1..487e16d 100644 --- a/lib/read.c +++ b/lib/read.c @@ -1408,9 +1408,19 @@ static int feed_process_token (hcl_t* hcl) /* the #include directive is an exception to the general expression rule. * use this exceptional code block to divert the major token processing */ - if (TOKEN_TYPE(hcl) != HCL_TOK_STRLIT) + if (TOKEN_TYPE(hcl) == HCL_TOK_EOL) { - hcl_setsynerr (hcl, HCL_SYNERR_STRING, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + hcl_setsynerrbfmt (hcl, HCL_SYNERR_STRING, TOKEN_LOC(hcl), HCL_NULL, + "%.*js target not specified", + vocas[VOCA_INCLUDE].len, vocas[VOCA_INCLUDE].str); + goto oops; + } + else if (TOKEN_TYPE(hcl) != HCL_TOK_STRLIT) + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_STRING, TOKEN_LOC(hcl), HCL_NULL, + "%.*js target expected in place of '%.*js'", + vocas[VOCA_INCLUDE].len, vocas[VOCA_INCLUDE].str, + TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); goto oops; } @@ -2142,9 +2152,9 @@ static int feed_continue_with_char (hcl_t* hcl, hcl_ooci_t c, hcl_flx_state_t st #define FLX_DT(hcl) (&((hcl)->c->feed.lx.u.dt)) #define FLX_DI(hcl) (&((hcl)->c->feed.lx.u.di)) #define FLX_HC(hcl) (&((hcl)->c->feed.lx.u.hc)) -#define FLX_HI(hcl) (&((hcl)->c->feed.lx.u.hi)) -#define FLX_HB(hcl) (&((hcl)->c->feed.lx.u.hb)) +#define FLX_HBC(hcl) (&((hcl)->c->feed.lx.u.hbc)) #define FLX_HN(hcl) (&((hcl)->c->feed.lx.u.hn)) +#define FLX_HI(hcl) (&((hcl)->c->feed.lx.u.hi)) #define FLX_PI(hcl) (&((hcl)->c->feed.lx.u.pi)) #define FLX_BINOP(hcl) (&((hcl)->c->feed.lx.u.binop)) #define FLX_PN(hcl) (&((hcl)->c->feed.lx.u.pn)) @@ -2167,10 +2177,10 @@ static HCL_INLINE void init_flx_hi (hcl_flx_hi_t* hi) HCL_MEMSET (hi, 0, HCL_SIZEOF(*hi)); } -static HCL_INLINE void init_flx_hb (hcl_flx_hb_t* hb, hcl_ooch_t start_c) +static HCL_INLINE void init_flx_hbc (hcl_flx_hbc_t* hbc, hcl_ooch_t start_c) { - HCL_MEMSET (hb, 0, HCL_SIZEOF(*hb)); - hb->start_c = start_c; + HCL_MEMSET (hbc, 0, HCL_SIZEOF(*hbc)); + hbc->start_c = start_c; } static HCL_INLINE void init_flx_hn (hcl_flx_hn_t* hn, hcl_tok_type_t tok_type, hcl_synerrnum_t synerr_code, int radix) @@ -2434,7 +2444,7 @@ static int flx_dollared_ident (hcl_t* hcl, hcl_ooci_t c) if (get_directive_token_type(hcl, &tok_type) <= -1) { hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), TOKEN_NAME(hcl), - "invalid dollar-signed literal %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); + "invalid dollar-prefixed identifier '%.*js'", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); return -1; } else @@ -2443,13 +2453,31 @@ static int flx_dollared_ident (hcl_t* hcl, hcl_ooci_t c) goto not_consumed; } } - else + else if (is_ident_char(c)) { - ident_char: + if (di->char_count == 0) + { + if (!is_lead_ident_char(c)) + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), HCL_NULL, + "'%c' prohibited as first character after '%.*js'", + c, TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); + return -1; + } + } + ADD_TOKEN_CHAR (hcl, c); di->char_count++; goto consumed; } + else + { + hcl_setsynerrbfmt ( + hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), HCL_NULL, + "invalid dollar-prefixed identifier character '%jc' after '%.*js'", c, + TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); + return -1; + } consumed: return 1; @@ -2519,15 +2547,10 @@ static int flx_hmarked_token (hcl_t* hcl, hcl_ooci_t c) case 'B': case 'c': /* character array */ case 'C': - #if 0 - init_flx_hn (FLX_HN(hcl), HCL_TOK_RADNUMLIT, HCL_SYNERR_NUMLIT, 2); - goto radixed_number; - #else /* if #b is followed by [, it is a starter for a byte array */ - init_flx_hb (FLX_HB(hcl), c); - FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_HMARKED_B); + init_flx_hbc (FLX_HBC(hcl), c); + FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_HMARKED_BC); break; - #endif case 'e': /* #eXXX - error literal */ init_flx_hn (FLX_HN(hcl), HCL_TOK_ERRLIT, HCL_SYNERR_ERRLIT, 10); @@ -2566,7 +2589,6 @@ static int flx_hmarked_token (hcl_t* hcl, hcl_ooci_t c) /* --------------------------- */ default: - /* the character used as case values above can never be the first character of a hash-marked identifier */ init_flx_hi (FLX_HI(hcl)); FEED_CONTINUE (hcl, HCL_FLX_HMARKED_IDENT); goto not_consumed; @@ -2677,50 +2699,9 @@ not_consumed: return 0; } -static int flx_hmarked_ident (hcl_t* hcl, hcl_ooci_t c) +static int flx_hmarked_bc (hcl_t* hcl, hcl_ooci_t c) { - hcl_flx_hi_t* hi = FLX_HI(hcl); - - if (is_delim_char(c)) - { - hcl_tok_type_t tok_type; - - if (hi->char_count == 0) - { - hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FLX_LOC(hcl), HCL_NULL, - "no valid character after hash sign"); - return -1; - } - - if (get_directive_token_type(hcl, &tok_type) <= -1) - { - hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), - "invalid hash-marked literal %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); - return -1; - } - else - { - FEED_WRAP_UP (hcl, tok_type); - goto not_consumed; - } - } - else - { - ADD_TOKEN_CHAR (hcl, c); - hi->char_count++; - goto consumed; - } - -consumed: - return 1; - -not_consumed: - return 0; -} - -static int flx_hmarked_b (hcl_t* hcl, hcl_ooci_t c) -{ - hcl_flx_hb_t* hb = FLX_HB(hcl); + hcl_flx_hbc_t* hb = FLX_HBC(hcl); if (c == '[') { @@ -2731,12 +2712,20 @@ static int flx_hmarked_b (hcl_t* hcl, hcl_ooci_t c) FEED_WRAP_UP_WITH_CHAR (hcl, c, tt); goto consumed; } - else + else if (hb->start_c == 'b' || hb->start_c == 'B') { + /* TODO: this part needs to be removed once 0x, 0b, 0o and etc are implemented */ init_flx_hn (FLX_HN(hcl), HCL_TOK_RADNUMLIT, HCL_SYNERR_NUMLIT, 2); FEED_CONTINUE (hcl, HCL_FLX_HMARKED_NUMBER); goto not_consumed; } + else + { + hcl_ooch_t start_c = hb->start_c; + reset_flx_token (hcl); + FEED_CONTINUE_WITH_CHAR (hcl, start_c, HCL_FLX_HMARKED_IDENT); + goto not_consumed; + } consumed: return 1; @@ -2783,8 +2772,8 @@ static int flx_hmarked_number (hcl_t* hcl, hcl_ooci_t c) { if (rn->digit_count == 0) { - hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), - "no valid digit after radix specifier in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); + hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), HCL_NULL, + "no valid digit after radix specifier '%.*js'", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); return -1; } else if (rn->invalid_digit_count > 0) @@ -2794,8 +2783,8 @@ static int flx_hmarked_number (hcl_t* hcl, hcl_ooci_t c) if (get_directive_token_type(hcl, &tok_type) <= -1) { - hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), - "neither valid radixed number nor valid directive %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); + hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), HCL_NULL, + "neither valid radixed number nor valid directive '%.*js'", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); return -1; } else @@ -2831,6 +2820,56 @@ not_consumed: return 0; } +static int flx_hmarked_ident (hcl_t* hcl, hcl_ooci_t c) +{ + hcl_flx_hi_t* hi = FLX_HI(hcl); + + /* hi->char_count doesn't include the first '#' */ + + if (is_delim_char(c)) + { + if (hi->char_count == 0) + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_SYMLIT, FLX_LOC(hcl), HCL_NULL, + "no valid character after hash sign"); + return -1; + } + + FEED_WRAP_UP (hcl, HCL_TOK_SYMLIT); + goto not_consumed; + } + else if (is_ident_char(c)) + { + if (hi->char_count == 0) + { + if (!is_lead_ident_char(c)) + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), HCL_NULL, + "'%c' prohibited as first character of symbol", c); + return -1; + } + } + + ADD_TOKEN_CHAR (hcl, c); + hi->char_count++; + goto consumed; + } + else + { + hcl_setsynerrbfmt ( + hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), HCL_NULL, + "invalid symbol character '%jc' after '%.*js'", c, + TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); + return -1; + } + +consumed: + return 1; + +not_consumed: + return 0; +} + static int flx_plain_ident (hcl_t* hcl, hcl_ooci_t c) /* identifier */ { hcl_flx_pi_t* pi = FLX_PI(hcl); @@ -3272,6 +3311,7 @@ static int flx_bc_prefix (hcl_t* hcl, hcl_ooci_t c) { hcl_flx_bcp_t* bcp = FLX_BCP(hcl); + if (c == '\"') /* b" B" c" C" */ { int is_byte = (bcp->start_c == 'b' || bcp->start_c == 'B'); @@ -3309,8 +3349,10 @@ not_consumed: return 0; } + /* ------------------------------------------------------------------------ */ + static int feed_char (hcl_t* hcl, hcl_ooci_t c) { /*hcl_logbfmt (hcl, HCL_LOG_STDERR, "FEED->[%jc] %d STATE->%d\n", c, c, FLX_STATE(hcl));*/ @@ -3323,11 +3365,12 @@ static int feed_char (hcl_t* hcl, hcl_ooci_t c) case HCL_FLX_DELIM_TOKEN: return flx_delim_token(hcl, c); case HCL_FLX_DOLLARED_IDENT: return flx_dollared_ident(hcl, c); case HCL_FLX_HMARKED_TOKEN: return flx_hmarked_token(hcl, c); - case HCL_FLX_HMARKED_B: return flx_hmarked_b(hcl, c); + case HCL_FLX_HMARKED_BC: return flx_hmarked_bc(hcl, c); case HCL_FLX_HMARKED_BINOP: return flx_hmarked_binop(hcl, c); case HCL_FLX_HMARKED_CHAR: return flx_hmarked_char(hcl, c); - case HCL_FLX_HMARKED_IDENT: return flx_hmarked_ident(hcl, c); case HCL_FLX_HMARKED_NUMBER: return flx_hmarked_number(hcl, c); + case HCL_FLX_HMARKED_IDENT: return flx_hmarked_ident(hcl, c); + case HCL_FLX_PLAIN_IDENT: return flx_plain_ident(hcl, c); case HCL_FLX_BINOP: return flx_binop(hcl, c); case HCL_FLX_PLAIN_NUMBER: return flx_plain_number(hcl, c); diff --git a/t/feed-5001.err b/t/feed-5001.err index 69eefae..4b4832d 100644 --- a/t/feed-5001.err +++ b/t/feed-5001.err @@ -1,3 +1,15 @@ +$?a ##ERROR: syntax error - '?' prohibited as first character after '$' + +--- + +$include ##ERROR: syntax error - $include target not specified + +--- + +$include 10 ##ERROR: syntax error - $include target expected in place of '10' + +--- + ## x := (+ 10 20) "aaaa"; ##ERROR: syntax error - too many rvalues @@ -67,7 +79,7 @@ printf "[%c]\n" b'★'; ##ERROR: syntax error - wrong character literal printf "%O\n" #b[ 10 20 30 ]; printf "%010b\n" #b0101; -printf "%O\n" #bxy; ##ERROR: syntax error - neither valid radixed number nor valid directive #bxy +printf "%O\n" #bxy; ##ERROR: syntax error - neither valid radixed number nor valid directive '#bxy' ---