changing the feed handler to recognize a symbol literal without double-quotes.
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
hyung-hwan 2024-09-21 00:51:19 +09:00
parent 3e6cfccb3b
commit f8c84dc2f0
3 changed files with 130 additions and 75 deletions

View File

@ -769,8 +769,8 @@ struct hcl_flx_hi_t
hcl_oow_t char_count; hcl_oow_t char_count;
}; };
typedef struct hcl_flx_hb_t hcl_flx_hb_t; /* intermediate state for #b */ typedef struct hcl_flx_hbc_t hcl_flx_hbc_t; /* intermediate state for #b */
struct hcl_flx_hb_t struct hcl_flx_hbc_t
{ {
/* state data */ /* state data */
hcl_ooch_t start_c; hcl_ooch_t start_c;
@ -860,11 +860,11 @@ enum hcl_flx_state_t
HCL_FLX_DELIM_TOKEN, HCL_FLX_DELIM_TOKEN,
HCL_FLX_DOLLARED_IDENT, HCL_FLX_DOLLARED_IDENT,
HCL_FLX_HMARKED_TOKEN, /* hash-marked token */ HCL_FLX_HMARKED_TOKEN, /* hash-marked token */
HCL_FLX_HMARKED_B, /* #b - intermediate state before #b[ or #b-radixed binary number */ HCL_FLX_HMARKED_BC, /* #b - intermediate state before #b[, #c[, or #b-radixed binary number */
HCL_FLX_HMARKED_BINOP, /* #++ - binary operator symbol */ HCL_FLX_HMARKED_BINOP, /* #++ - binary operator symbol */
HCL_FLX_HMARKED_CHAR, /* hash-marked character that begins with #\ */ HCL_FLX_HMARKED_CHAR, /* hash-marked character that begins with #\ */
HCL_FLX_HMARKED_IDENT, /* hash-marked identifier like #include, etc */
HCL_FLX_HMARKED_NUMBER, /* hash-marked number - radixed number like #xABCD */ HCL_FLX_HMARKED_NUMBER, /* hash-marked number - radixed number like #xABCD */
HCL_FLX_HMARKED_IDENT, /* literal symbol */
HCL_FLX_PLAIN_IDENT, /* plain identifier */ HCL_FLX_PLAIN_IDENT, /* plain identifier */
HCL_FLX_BINOP, /* binary operator */ HCL_FLX_BINOP, /* binary operator */
HCL_FLX_PLAIN_NUMBER, /* plain number */ HCL_FLX_PLAIN_NUMBER, /* plain number */
@ -955,8 +955,8 @@ struct hcl_compiler_t
hcl_flx_dt_t dt; /* delimiter token */ hcl_flx_dt_t dt; /* delimiter token */
hcl_flx_di_t di; /* dollar-signed identifier */ hcl_flx_di_t di; /* dollar-signed identifier */
hcl_flx_hc_t hc; /* hash-marked character */ hcl_flx_hc_t hc; /* hash-marked character */
hcl_flx_hi_t hi; /* hash-marked identifier */ hcl_flx_hi_t hi; /* hash-marked identifier - literal symbol */
hcl_flx_hb_t hb; /* #b ... */ hcl_flx_hbc_t hbc; /* #b #c ... */
hcl_flx_hn_t hn; /* hash-marked number - radixed number */ hcl_flx_hn_t hn; /* hash-marked number - radixed number */
hcl_flx_pi_t pi; /* plain identifier */ hcl_flx_pi_t pi; /* plain identifier */
hcl_flx_binop_t binop; /* binary operator */ hcl_flx_binop_t binop; /* binary operator */

View File

@ -1408,9 +1408,19 @@ static int feed_process_token (hcl_t* hcl)
/* the #include directive is an exception to the general expression rule. /* the #include directive is an exception to the general expression rule.
* use this exceptional code block to divert the major token processing */ * use this exceptional code block to divert the major token processing */
if (TOKEN_TYPE(hcl) != HCL_TOK_STRLIT) if (TOKEN_TYPE(hcl) == HCL_TOK_EOL)
{ {
hcl_setsynerr (hcl, HCL_SYNERR_STRING, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); hcl_setsynerrbfmt (hcl, HCL_SYNERR_STRING, TOKEN_LOC(hcl), HCL_NULL,
"%.*js target not specified",
vocas[VOCA_INCLUDE].len, vocas[VOCA_INCLUDE].str);
goto oops;
}
else if (TOKEN_TYPE(hcl) != HCL_TOK_STRLIT)
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_STRING, TOKEN_LOC(hcl), HCL_NULL,
"%.*js target expected in place of '%.*js'",
vocas[VOCA_INCLUDE].len, vocas[VOCA_INCLUDE].str,
TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
goto oops; goto oops;
} }
@ -2142,9 +2152,9 @@ static int feed_continue_with_char (hcl_t* hcl, hcl_ooci_t c, hcl_flx_state_t st
#define FLX_DT(hcl) (&((hcl)->c->feed.lx.u.dt)) #define FLX_DT(hcl) (&((hcl)->c->feed.lx.u.dt))
#define FLX_DI(hcl) (&((hcl)->c->feed.lx.u.di)) #define FLX_DI(hcl) (&((hcl)->c->feed.lx.u.di))
#define FLX_HC(hcl) (&((hcl)->c->feed.lx.u.hc)) #define FLX_HC(hcl) (&((hcl)->c->feed.lx.u.hc))
#define FLX_HI(hcl) (&((hcl)->c->feed.lx.u.hi)) #define FLX_HBC(hcl) (&((hcl)->c->feed.lx.u.hbc))
#define FLX_HB(hcl) (&((hcl)->c->feed.lx.u.hb))
#define FLX_HN(hcl) (&((hcl)->c->feed.lx.u.hn)) #define FLX_HN(hcl) (&((hcl)->c->feed.lx.u.hn))
#define FLX_HI(hcl) (&((hcl)->c->feed.lx.u.hi))
#define FLX_PI(hcl) (&((hcl)->c->feed.lx.u.pi)) #define FLX_PI(hcl) (&((hcl)->c->feed.lx.u.pi))
#define FLX_BINOP(hcl) (&((hcl)->c->feed.lx.u.binop)) #define FLX_BINOP(hcl) (&((hcl)->c->feed.lx.u.binop))
#define FLX_PN(hcl) (&((hcl)->c->feed.lx.u.pn)) #define FLX_PN(hcl) (&((hcl)->c->feed.lx.u.pn))
@ -2167,10 +2177,10 @@ static HCL_INLINE void init_flx_hi (hcl_flx_hi_t* hi)
HCL_MEMSET (hi, 0, HCL_SIZEOF(*hi)); HCL_MEMSET (hi, 0, HCL_SIZEOF(*hi));
} }
static HCL_INLINE void init_flx_hb (hcl_flx_hb_t* hb, hcl_ooch_t start_c) static HCL_INLINE void init_flx_hbc (hcl_flx_hbc_t* hbc, hcl_ooch_t start_c)
{ {
HCL_MEMSET (hb, 0, HCL_SIZEOF(*hb)); HCL_MEMSET (hbc, 0, HCL_SIZEOF(*hbc));
hb->start_c = start_c; hbc->start_c = start_c;
} }
static HCL_INLINE void init_flx_hn (hcl_flx_hn_t* hn, hcl_tok_type_t tok_type, hcl_synerrnum_t synerr_code, int radix) static HCL_INLINE void init_flx_hn (hcl_flx_hn_t* hn, hcl_tok_type_t tok_type, hcl_synerrnum_t synerr_code, int radix)
@ -2434,7 +2444,7 @@ static int flx_dollared_ident (hcl_t* hcl, hcl_ooci_t c)
if (get_directive_token_type(hcl, &tok_type) <= -1) if (get_directive_token_type(hcl, &tok_type) <= -1)
{ {
hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), TOKEN_NAME(hcl), hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
"invalid dollar-signed literal %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); "invalid dollar-prefixed identifier '%.*js'", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1; return -1;
} }
else else
@ -2443,13 +2453,31 @@ static int flx_dollared_ident (hcl_t* hcl, hcl_ooci_t c)
goto not_consumed; goto not_consumed;
} }
} }
else else if (is_ident_char(c))
{ {
ident_char: if (di->char_count == 0)
{
if (!is_lead_ident_char(c))
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), HCL_NULL,
"'%c' prohibited as first character after '%.*js'",
c, TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1;
}
}
ADD_TOKEN_CHAR (hcl, c); ADD_TOKEN_CHAR (hcl, c);
di->char_count++; di->char_count++;
goto consumed; goto consumed;
} }
else
{
hcl_setsynerrbfmt (
hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), HCL_NULL,
"invalid dollar-prefixed identifier character '%jc' after '%.*js'", c,
TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1;
}
consumed: consumed:
return 1; return 1;
@ -2519,15 +2547,10 @@ static int flx_hmarked_token (hcl_t* hcl, hcl_ooci_t c)
case 'B': case 'B':
case 'c': /* character array */ case 'c': /* character array */
case 'C': case 'C':
#if 0
init_flx_hn (FLX_HN(hcl), HCL_TOK_RADNUMLIT, HCL_SYNERR_NUMLIT, 2);
goto radixed_number;
#else
/* if #b is followed by [, it is a starter for a byte array */ /* if #b is followed by [, it is a starter for a byte array */
init_flx_hb (FLX_HB(hcl), c); init_flx_hbc (FLX_HBC(hcl), c);
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_HMARKED_B); FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_HMARKED_BC);
break; break;
#endif
case 'e': /* #eXXX - error literal */ case 'e': /* #eXXX - error literal */
init_flx_hn (FLX_HN(hcl), HCL_TOK_ERRLIT, HCL_SYNERR_ERRLIT, 10); init_flx_hn (FLX_HN(hcl), HCL_TOK_ERRLIT, HCL_SYNERR_ERRLIT, 10);
@ -2566,7 +2589,6 @@ static int flx_hmarked_token (hcl_t* hcl, hcl_ooci_t c)
/* --------------------------- */ /* --------------------------- */
default: default:
/* the character used as case values above can never be the first character of a hash-marked identifier */
init_flx_hi (FLX_HI(hcl)); init_flx_hi (FLX_HI(hcl));
FEED_CONTINUE (hcl, HCL_FLX_HMARKED_IDENT); FEED_CONTINUE (hcl, HCL_FLX_HMARKED_IDENT);
goto not_consumed; goto not_consumed;
@ -2677,50 +2699,9 @@ not_consumed:
return 0; return 0;
} }
static int flx_hmarked_ident (hcl_t* hcl, hcl_ooci_t c) static int flx_hmarked_bc (hcl_t* hcl, hcl_ooci_t c)
{ {
hcl_flx_hi_t* hi = FLX_HI(hcl); hcl_flx_hbc_t* hb = FLX_HBC(hcl);
if (is_delim_char(c))
{
hcl_tok_type_t tok_type;
if (hi->char_count == 0)
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FLX_LOC(hcl), HCL_NULL,
"no valid character after hash sign");
return -1;
}
if (get_directive_token_type(hcl, &tok_type) <= -1)
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
"invalid hash-marked literal %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1;
}
else
{
FEED_WRAP_UP (hcl, tok_type);
goto not_consumed;
}
}
else
{
ADD_TOKEN_CHAR (hcl, c);
hi->char_count++;
goto consumed;
}
consumed:
return 1;
not_consumed:
return 0;
}
static int flx_hmarked_b (hcl_t* hcl, hcl_ooci_t c)
{
hcl_flx_hb_t* hb = FLX_HB(hcl);
if (c == '[') if (c == '[')
{ {
@ -2731,12 +2712,20 @@ static int flx_hmarked_b (hcl_t* hcl, hcl_ooci_t c)
FEED_WRAP_UP_WITH_CHAR (hcl, c, tt); FEED_WRAP_UP_WITH_CHAR (hcl, c, tt);
goto consumed; goto consumed;
} }
else else if (hb->start_c == 'b' || hb->start_c == 'B')
{ {
/* TODO: this part needs to be removed once 0x, 0b, 0o and etc are implemented */
init_flx_hn (FLX_HN(hcl), HCL_TOK_RADNUMLIT, HCL_SYNERR_NUMLIT, 2); init_flx_hn (FLX_HN(hcl), HCL_TOK_RADNUMLIT, HCL_SYNERR_NUMLIT, 2);
FEED_CONTINUE (hcl, HCL_FLX_HMARKED_NUMBER); FEED_CONTINUE (hcl, HCL_FLX_HMARKED_NUMBER);
goto not_consumed; goto not_consumed;
} }
else
{
hcl_ooch_t start_c = hb->start_c;
reset_flx_token (hcl);
FEED_CONTINUE_WITH_CHAR (hcl, start_c, HCL_FLX_HMARKED_IDENT);
goto not_consumed;
}
consumed: consumed:
return 1; return 1;
@ -2783,8 +2772,8 @@ static int flx_hmarked_number (hcl_t* hcl, hcl_ooci_t c)
{ {
if (rn->digit_count == 0) if (rn->digit_count == 0)
{ {
hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), HCL_NULL,
"no valid digit after radix specifier in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); "no valid digit after radix specifier '%.*js'", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1; return -1;
} }
else if (rn->invalid_digit_count > 0) else if (rn->invalid_digit_count > 0)
@ -2794,8 +2783,8 @@ static int flx_hmarked_number (hcl_t* hcl, hcl_ooci_t c)
if (get_directive_token_type(hcl, &tok_type) <= -1) if (get_directive_token_type(hcl, &tok_type) <= -1)
{ {
hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), HCL_NULL,
"neither valid radixed number nor valid directive %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); "neither valid radixed number nor valid directive '%.*js'", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1; return -1;
} }
else else
@ -2831,6 +2820,56 @@ not_consumed:
return 0; return 0;
} }
static int flx_hmarked_ident (hcl_t* hcl, hcl_ooci_t c)
{
hcl_flx_hi_t* hi = FLX_HI(hcl);
/* hi->char_count doesn't include the first '#' */
if (is_delim_char(c))
{
if (hi->char_count == 0)
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_SYMLIT, FLX_LOC(hcl), HCL_NULL,
"no valid character after hash sign");
return -1;
}
FEED_WRAP_UP (hcl, HCL_TOK_SYMLIT);
goto not_consumed;
}
else if (is_ident_char(c))
{
if (hi->char_count == 0)
{
if (!is_lead_ident_char(c))
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), HCL_NULL,
"'%c' prohibited as first character of symbol", c);
return -1;
}
}
ADD_TOKEN_CHAR (hcl, c);
hi->char_count++;
goto consumed;
}
else
{
hcl_setsynerrbfmt (
hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), HCL_NULL,
"invalid symbol character '%jc' after '%.*js'", c,
TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1;
}
consumed:
return 1;
not_consumed:
return 0;
}
static int flx_plain_ident (hcl_t* hcl, hcl_ooci_t c) /* identifier */ static int flx_plain_ident (hcl_t* hcl, hcl_ooci_t c) /* identifier */
{ {
hcl_flx_pi_t* pi = FLX_PI(hcl); hcl_flx_pi_t* pi = FLX_PI(hcl);
@ -3272,6 +3311,7 @@ static int flx_bc_prefix (hcl_t* hcl, hcl_ooci_t c)
{ {
hcl_flx_bcp_t* bcp = FLX_BCP(hcl); hcl_flx_bcp_t* bcp = FLX_BCP(hcl);
if (c == '\"') /* b" B" c" C" */ if (c == '\"') /* b" B" c" C" */
{ {
int is_byte = (bcp->start_c == 'b' || bcp->start_c == 'B'); int is_byte = (bcp->start_c == 'b' || bcp->start_c == 'B');
@ -3309,8 +3349,10 @@ not_consumed:
return 0; return 0;
} }
/* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */
static int feed_char (hcl_t* hcl, hcl_ooci_t c) static int feed_char (hcl_t* hcl, hcl_ooci_t c)
{ {
/*hcl_logbfmt (hcl, HCL_LOG_STDERR, "FEED->[%jc] %d STATE->%d\n", c, c, FLX_STATE(hcl));*/ /*hcl_logbfmt (hcl, HCL_LOG_STDERR, "FEED->[%jc] %d STATE->%d\n", c, c, FLX_STATE(hcl));*/
@ -3323,11 +3365,12 @@ static int feed_char (hcl_t* hcl, hcl_ooci_t c)
case HCL_FLX_DELIM_TOKEN: return flx_delim_token(hcl, c); case HCL_FLX_DELIM_TOKEN: return flx_delim_token(hcl, c);
case HCL_FLX_DOLLARED_IDENT: return flx_dollared_ident(hcl, c); case HCL_FLX_DOLLARED_IDENT: return flx_dollared_ident(hcl, c);
case HCL_FLX_HMARKED_TOKEN: return flx_hmarked_token(hcl, c); case HCL_FLX_HMARKED_TOKEN: return flx_hmarked_token(hcl, c);
case HCL_FLX_HMARKED_B: return flx_hmarked_b(hcl, c); case HCL_FLX_HMARKED_BC: return flx_hmarked_bc(hcl, c);
case HCL_FLX_HMARKED_BINOP: return flx_hmarked_binop(hcl, c); case HCL_FLX_HMARKED_BINOP: return flx_hmarked_binop(hcl, c);
case HCL_FLX_HMARKED_CHAR: return flx_hmarked_char(hcl, c); case HCL_FLX_HMARKED_CHAR: return flx_hmarked_char(hcl, c);
case HCL_FLX_HMARKED_IDENT: return flx_hmarked_ident(hcl, c);
case HCL_FLX_HMARKED_NUMBER: return flx_hmarked_number(hcl, c); case HCL_FLX_HMARKED_NUMBER: return flx_hmarked_number(hcl, c);
case HCL_FLX_HMARKED_IDENT: return flx_hmarked_ident(hcl, c);
case HCL_FLX_PLAIN_IDENT: return flx_plain_ident(hcl, c); case HCL_FLX_PLAIN_IDENT: return flx_plain_ident(hcl, c);
case HCL_FLX_BINOP: return flx_binop(hcl, c); case HCL_FLX_BINOP: return flx_binop(hcl, c);
case HCL_FLX_PLAIN_NUMBER: return flx_plain_number(hcl, c); case HCL_FLX_PLAIN_NUMBER: return flx_plain_number(hcl, c);

View File

@ -1,3 +1,15 @@
$?a ##ERROR: syntax error - '?' prohibited as first character after '$'
---
$include ##ERROR: syntax error - $include target not specified
---
$include 10 ##ERROR: syntax error - $include target expected in place of '10'
---
## ##
x := (+ 10 20) "aaaa"; ##ERROR: syntax error - too many rvalues x := (+ 10 20) "aaaa"; ##ERROR: syntax error - too many rvalues
@ -67,7 +79,7 @@ printf "[%c]\n" b'★'; ##ERROR: syntax error - wrong character literal
printf "%O\n" #b[ 10 20 30 ]; printf "%O\n" #b[ 10 20 30 ];
printf "%010b\n" #b0101; printf "%010b\n" #b0101;
printf "%O\n" #bxy; ##ERROR: syntax error - neither valid radixed number nor valid directive #bxy printf "%O\n" #bxy; ##ERROR: syntax error - neither valid radixed number nor valid directive '#bxy'
--- ---