enhanced the tokenization code to check for hash-marked directives better

This commit is contained in:
hyung-hwan 2022-07-24 00:49:03 +00:00
parent 5abe10668b
commit f99841442a
2 changed files with 60 additions and 24 deletions

View File

@ -195,7 +195,8 @@ enum hcl_iotok_type_t
HCL_IOTOK_VBAR, /* | */
HCL_IOTOK_EOL, /* end of line */
HCL_IOTOK_INCLUDE
HCL_IOTOK_INCLUDE,
HCL_IOTOK_PRAGMA
};
typedef enum hcl_iotok_type_t hcl_iotok_type_t;
@ -556,8 +557,8 @@ struct hcl_flx_hn_t
int radix;
/* state data */
int invalid;
hcl_oow_t digit_count;
hcl_oow_t invalid_digit_count;
};
typedef struct hcl_flx_qt_t hcl_flx_qt_t; /* quoted token */

View File

@ -46,6 +46,7 @@ static struct voca_t
} vocas[] =
{
{ 8, { '#','i','n','c','l','u','d','e' } },
{ 7, { '#','p','r','a','g','m','a' } },
{ 11, { '#','\\','b','a','c','k','s','p','a','c','e' } },
{ 10, { '#','\\','l','i','n','e','f','e','e','d' } },
{ 9, { '#','\\','n','e','w','l','i','n','e' } },
@ -63,6 +64,8 @@ static struct voca_t
enum voca_id_t
{
VOCA_INCLUDE,
VOCA_PRAGMA,
VOCA_BACKSPACE,
VOCA_LINEFEED,
VOCA_NEWLINE,
@ -363,7 +366,6 @@ static int copy_string_to (hcl_t* hcl, const hcl_oocs_t* src, hcl_oocs_t* dst, h
return 0;
}
#define GET_CHAR(hcl) \
do { if (get_char(hcl) <= -1) return -1; } while (0)
@ -432,6 +434,22 @@ static HCL_INLINE void unget_char (hcl_t* hcl, const hcl_iolxc_t* c)
hcl->c->ungot[hcl->c->nungots++] = *c;
}
static int get_directive_token_type (hcl_t* hcl, hcl_iotok_type_t* tok_type)
{
if (does_token_name_match(hcl, VOCA_INCLUDE))
{
*tok_type = HCL_IOTOK_INCLUDE;
return 0;
}
else if (does_token_name_match(hcl, VOCA_PRAGMA))
{
*tok_type = HCL_IOTOK_PRAGMA;
return 0;
}
return -1;
}
static int get_char (hcl_t* hcl)
{
hcl_ooci_t lc;
@ -959,6 +977,9 @@ static int get_hmarked_token (hcl_t* hcl)
break;
default:
{
hcl_iotok_type_t tok_type;
if (is_delimchar(c))
{
/* EOF, whitespace, etc */
@ -975,19 +996,20 @@ static int get_hmarked_token (hcl_t* hcl)
}
while (!is_delimchar(c));
if (does_token_name_match(hcl, VOCA_INCLUDE))
{
SET_TOKEN_TYPE (hcl, HCL_IOTOK_INCLUDE);
}
else
if (get_directive_token_type(hcl, &tok_type) <= -1)
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
"invalid hash-marked literal %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr);
return -1;
}
else
{
SET_TOKEN_TYPE (hcl, tok_type);
}
unget_char (hcl, &hcl->c->lxc);
break;
}
}
return 0;
@ -2859,6 +2881,8 @@ static int flx_hmarked_ident (hcl_t* hcl, hcl_ooci_t c)
if (is_delimchar(c))
{
hcl_iotok_type_t tok_type;
if (hi->char_count == 0)
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FLX_LOC(hcl), HCL_NULL,
@ -2866,17 +2890,17 @@ static int flx_hmarked_ident (hcl_t* hcl, hcl_ooci_t c)
return -1;
}
if (does_token_name_match(hcl, VOCA_INCLUDE))
{
FEED_WRAP_UP (hcl, HCL_IOTOK_INCLUDE);
goto not_consumed;
}
else
if (get_directive_token_type(hcl, &tok_type) <= -1)
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
"invalid hash-marked literal %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1;
}
else
{
FEED_WRAP_UP (hcl, tok_type);
goto not_consumed;
}
}
else
{
@ -2898,20 +2922,31 @@ static int flx_hmarked_number (hcl_t* hcl, hcl_ooci_t c)
if (CHAR_TO_NUM(c, rn->radix) >= rn->radix)
{
if (rn->digit_count == 0)
if (is_delimchar(c))
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
"no valid digit after radix specifier in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1;
}
else if (is_delimchar(c))
{
if (rn->invalid)
if (rn->digit_count == 0)
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
"invalid digit in radixed number in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
"no valid digit after radix specifier in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1;
}
else if (rn->invalid_digit_count > 0)
{
/* invalid as a number, but this could be a hash-marked directive */
hcl_iotok_type_t tok_type;
if (get_directive_token_type(hcl, &tok_type) <= -1)
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
"neither valid radixed number nor valid directive %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1;
}
else
{
FEED_WRAP_UP (hcl, tok_type);
goto not_consumed;
}
}
FEED_WRAP_UP (hcl, rn->tok_type);
goto not_consumed;
@ -2920,7 +2955,7 @@ static int flx_hmarked_number (hcl_t* hcl, hcl_ooci_t c)
{
ADD_TOKEN_CHAR(hcl, c);
rn->digit_count++;
rn->invalid = 1;
rn->invalid_digit_count++;
goto consumed;
}
}