enhanced the tokenization code to check for hash-marked directives better

This commit is contained in:
hyung-hwan 2022-07-24 00:49:03 +00:00
parent 5abe10668b
commit f99841442a
2 changed files with 60 additions and 24 deletions

View File

@ -195,7 +195,8 @@ enum hcl_iotok_type_t
HCL_IOTOK_VBAR, /* | */ HCL_IOTOK_VBAR, /* | */
HCL_IOTOK_EOL, /* end of line */ HCL_IOTOK_EOL, /* end of line */
HCL_IOTOK_INCLUDE HCL_IOTOK_INCLUDE,
HCL_IOTOK_PRAGMA
}; };
typedef enum hcl_iotok_type_t hcl_iotok_type_t; typedef enum hcl_iotok_type_t hcl_iotok_type_t;
@ -556,8 +557,8 @@ struct hcl_flx_hn_t
int radix; int radix;
/* state data */ /* state data */
int invalid;
hcl_oow_t digit_count; hcl_oow_t digit_count;
hcl_oow_t invalid_digit_count;
}; };
typedef struct hcl_flx_qt_t hcl_flx_qt_t; /* quoted token */ typedef struct hcl_flx_qt_t hcl_flx_qt_t; /* quoted token */

View File

@ -46,6 +46,7 @@ static struct voca_t
} vocas[] = } vocas[] =
{ {
{ 8, { '#','i','n','c','l','u','d','e' } }, { 8, { '#','i','n','c','l','u','d','e' } },
{ 7, { '#','p','r','a','g','m','a' } },
{ 11, { '#','\\','b','a','c','k','s','p','a','c','e' } }, { 11, { '#','\\','b','a','c','k','s','p','a','c','e' } },
{ 10, { '#','\\','l','i','n','e','f','e','e','d' } }, { 10, { '#','\\','l','i','n','e','f','e','e','d' } },
{ 9, { '#','\\','n','e','w','l','i','n','e' } }, { 9, { '#','\\','n','e','w','l','i','n','e' } },
@ -63,6 +64,8 @@ static struct voca_t
enum voca_id_t enum voca_id_t
{ {
VOCA_INCLUDE, VOCA_INCLUDE,
VOCA_PRAGMA,
VOCA_BACKSPACE, VOCA_BACKSPACE,
VOCA_LINEFEED, VOCA_LINEFEED,
VOCA_NEWLINE, VOCA_NEWLINE,
@ -363,7 +366,6 @@ static int copy_string_to (hcl_t* hcl, const hcl_oocs_t* src, hcl_oocs_t* dst, h
return 0; return 0;
} }
#define GET_CHAR(hcl) \ #define GET_CHAR(hcl) \
do { if (get_char(hcl) <= -1) return -1; } while (0) do { if (get_char(hcl) <= -1) return -1; } while (0)
@ -432,6 +434,22 @@ static HCL_INLINE void unget_char (hcl_t* hcl, const hcl_iolxc_t* c)
hcl->c->ungot[hcl->c->nungots++] = *c; hcl->c->ungot[hcl->c->nungots++] = *c;
} }
static int get_directive_token_type (hcl_t* hcl, hcl_iotok_type_t* tok_type)
{
if (does_token_name_match(hcl, VOCA_INCLUDE))
{
*tok_type = HCL_IOTOK_INCLUDE;
return 0;
}
else if (does_token_name_match(hcl, VOCA_PRAGMA))
{
*tok_type = HCL_IOTOK_PRAGMA;
return 0;
}
return -1;
}
static int get_char (hcl_t* hcl) static int get_char (hcl_t* hcl)
{ {
hcl_ooci_t lc; hcl_ooci_t lc;
@ -959,6 +977,9 @@ static int get_hmarked_token (hcl_t* hcl)
break; break;
default: default:
{
hcl_iotok_type_t tok_type;
if (is_delimchar(c)) if (is_delimchar(c))
{ {
/* EOF, whitespace, etc */ /* EOF, whitespace, etc */
@ -975,20 +996,21 @@ static int get_hmarked_token (hcl_t* hcl)
} }
while (!is_delimchar(c)); while (!is_delimchar(c));
if (does_token_name_match(hcl, VOCA_INCLUDE)) if (get_directive_token_type(hcl, &tok_type) <= -1)
{
SET_TOKEN_TYPE (hcl, HCL_IOTOK_INCLUDE);
}
else
{ {
hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
"invalid hash-marked literal %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr); "invalid hash-marked literal %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr);
return -1; return -1;
} }
else
{
SET_TOKEN_TYPE (hcl, tok_type);
}
unget_char (hcl, &hcl->c->lxc); unget_char (hcl, &hcl->c->lxc);
break; break;
} }
}
return 0; return 0;
} }
@ -2859,6 +2881,8 @@ static int flx_hmarked_ident (hcl_t* hcl, hcl_ooci_t c)
if (is_delimchar(c)) if (is_delimchar(c))
{ {
hcl_iotok_type_t tok_type;
if (hi->char_count == 0) if (hi->char_count == 0)
{ {
hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FLX_LOC(hcl), HCL_NULL, hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FLX_LOC(hcl), HCL_NULL,
@ -2866,17 +2890,17 @@ static int flx_hmarked_ident (hcl_t* hcl, hcl_ooci_t c)
return -1; return -1;
} }
if (does_token_name_match(hcl, VOCA_INCLUDE)) if (get_directive_token_type(hcl, &tok_type) <= -1)
{
FEED_WRAP_UP (hcl, HCL_IOTOK_INCLUDE);
goto not_consumed;
}
else
{ {
hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
"invalid hash-marked literal %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); "invalid hash-marked literal %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1; return -1;
} }
else
{
FEED_WRAP_UP (hcl, tok_type);
goto not_consumed;
}
} }
else else
{ {
@ -2897,6 +2921,8 @@ static int flx_hmarked_number (hcl_t* hcl, hcl_ooci_t c)
hcl_flx_hn_t* rn = FLX_HN(hcl); hcl_flx_hn_t* rn = FLX_HN(hcl);
if (CHAR_TO_NUM(c, rn->radix) >= rn->radix) if (CHAR_TO_NUM(c, rn->radix) >= rn->radix)
{
if (is_delimchar(c))
{ {
if (rn->digit_count == 0) if (rn->digit_count == 0)
{ {
@ -2904,14 +2930,23 @@ static int flx_hmarked_number (hcl_t* hcl, hcl_ooci_t c)
"no valid digit after radix specifier in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); "no valid digit after radix specifier in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1; return -1;
} }
else if (is_delimchar(c)) else if (rn->invalid_digit_count > 0)
{ {
if (rn->invalid) /* invalid as a number, but this could be a hash-marked directive */
hcl_iotok_type_t tok_type;
if (get_directive_token_type(hcl, &tok_type) <= -1)
{ {
hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
"invalid digit in radixed number in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); "neither valid radixed number nor valid directive %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1; return -1;
} }
else
{
FEED_WRAP_UP (hcl, tok_type);
goto not_consumed;
}
}
FEED_WRAP_UP (hcl, rn->tok_type); FEED_WRAP_UP (hcl, rn->tok_type);
goto not_consumed; goto not_consumed;
@ -2920,7 +2955,7 @@ static int flx_hmarked_number (hcl_t* hcl, hcl_ooci_t c)
{ {
ADD_TOKEN_CHAR(hcl, c); ADD_TOKEN_CHAR(hcl, c);
rn->digit_count++; rn->digit_count++;
rn->invalid = 1; rn->invalid_digit_count++;
goto consumed; goto consumed;
} }
} }