improved the hash-marked identifier handling in the feed-based reader

This commit is contained in:
hyung-hwan 2022-07-23 10:09:36 +00:00
parent 3d6abc38bd
commit 87a6d152f8
2 changed files with 81 additions and 38 deletions

View File

@ -540,8 +540,15 @@ struct hcl_flx_hc_t
hcl_oow_t char_count; hcl_oow_t char_count;
}; };
typedef struct hcl_flx_rn_t hcl_flx_rn_t; /* radixed number */ typedef struct hcl_flx_hi_t hcl_flx_hi_t; /* hash-marked identifier */
struct hcl_flx_rn_t struct hcl_flx_hi_t
{
/* state data */
hcl_oow_t char_count;
};
typedef struct hcl_flx_hn_t hcl_flx_hn_t; /* hash-marked number - radixed number */
struct hcl_flx_hn_t
{ {
/* input data */ /* input data */
hcl_iotok_type_t tok_type; hcl_iotok_type_t tok_type;
@ -578,7 +585,8 @@ enum hcl_flx_state_t
HCL_FLX_DELIM_TOKEN, HCL_FLX_DELIM_TOKEN,
HCL_FLX_HASHED_TOKEN, /* hash-marked token */ HCL_FLX_HASHED_TOKEN, /* hash-marked token */
HCL_FLX_HASHED_CHAR, /* hash-marked character that begins with #\ */ HCL_FLX_HASHED_CHAR, /* hash-marked character that begins with #\ */
HCL_FLX_RADIXED_NUMBER, HCL_FLX_HASHED_IDENT, /* hash-marked identifier like #include, etc */
HCL_FLX_HASHED_NUMBER, /* hash-marked number - radixed number like #xABCD */
HCL_FLX_QUOTED_TOKEN HCL_FLX_QUOTED_TOKEN
}; };
typedef enum hcl_flx_state_t hcl_flx_state_t; typedef enum hcl_flx_state_t hcl_flx_state_t;
@ -639,7 +647,8 @@ struct hcl_compiler_t
{ {
hcl_flx_dt_t dt; /* delimiter token */ hcl_flx_dt_t dt; /* delimiter token */
hcl_flx_hc_t hc; /* hash-marked character */ hcl_flx_hc_t hc; /* hash-marked character */
hcl_flx_rn_t rn; /* radixed number */ hcl_flx_hi_t hi; /* hash-marked identifier */
hcl_flx_hn_t hn; /* hash-marked number - radixed number */
hcl_flx_qt_t qt; /* quoted token */ hcl_flx_qt_t qt; /* quoted token */
} u; } u;
} lx; } lx;
@ -698,8 +707,6 @@ struct hcl_compiler_t
hcl_clsblk_info_t* info; hcl_clsblk_info_t* info;
hcl_oow_t info_capa; hcl_oow_t info_capa;
} clsblk; /* class block */ } clsblk; /* class block */
}; };
#endif #endif

View File

@ -2366,23 +2366,27 @@ static int feed_continue_with_char (hcl_t* hcl, hcl_ooci_t c, hcl_flx_state_t st
/* short-cuts to lexer state data */ /* short-cuts to lexer state data */
#define FLX_DT(hcl) (&((hcl)->c->feed.lx.u.dt)) #define FLX_DT(hcl) (&((hcl)->c->feed.lx.u.dt))
#define FLX_HC(hcl) (&((hcl)->c->feed.lx.u.hc)) #define FLX_HC(hcl) (&((hcl)->c->feed.lx.u.hc))
#define FLX_RN(hcl) (&((hcl)->c->feed.lx.u.rn)) #define FLX_HI(hcl) (&((hcl)->c->feed.lx.u.hi))
#define FLX_HN(hcl) (&((hcl)->c->feed.lx.u.hn))
#define FLX_QT(hcl) (&((hcl)->c->feed.lx.u.qt)) #define FLX_QT(hcl) (&((hcl)->c->feed.lx.u.qt))
static HCL_INLINE void init_flx_rn (hcl_flx_rn_t* rn, hcl_iotok_type_t tok_type, hcl_synerrnum_t synerr_code, int radix)
{
HCL_MEMSET (rn, 0, HCL_SIZEOF(*rn));
rn->tok_type = tok_type;
rn->synerr_code = synerr_code;
rn->radix = radix;
}
static HCL_INLINE void init_flx_hc (hcl_flx_hc_t* hc) static HCL_INLINE void init_flx_hc (hcl_flx_hc_t* hc)
{ {
HCL_MEMSET (hc, 0, HCL_SIZEOF(*hc)); HCL_MEMSET (hc, 0, HCL_SIZEOF(*hc));
} }
static HCL_INLINE void init_flx_hi (hcl_flx_hi_t* hi)
{
HCL_MEMSET (hi, 0, HCL_SIZEOF(*hi));
}
static HCL_INLINE void init_flx_hn (hcl_flx_hn_t* rn, hcl_iotok_type_t tok_type, hcl_synerrnum_t synerr_code, int radix)
{
HCL_MEMSET (rn, 0, HCL_SIZEOF(*rn));
rn->tok_type = tok_type;
rn->synerr_code = synerr_code;
rn->radix = radix;
}
static HCL_INLINE void init_flx_qt (hcl_flx_qt_t* qt, hcl_iotok_type_t tok_type, hcl_synerrnum_t synerr_code, hcl_ooch_t end_char, hcl_ooch_t esc_char, hcl_oow_t min_len, hcl_oow_t max_len) static HCL_INLINE void init_flx_qt (hcl_flx_qt_t* qt, hcl_iotok_type_t tok_type, hcl_synerrnum_t synerr_code, hcl_ooch_t end_char, hcl_ooch_t esc_char, hcl_oow_t min_len, hcl_oow_t max_len)
{ {
@ -2395,7 +2399,6 @@ static HCL_INLINE void init_flx_qt (hcl_flx_qt_t* qt, hcl_iotok_type_t tok_type,
qt->max_len = max_len; qt->max_len = max_len;
} }
static int flx_start (hcl_t* hcl, hcl_ooci_t c) static int flx_start (hcl_t* hcl, hcl_ooci_t c)
{ {
HCL_ASSERT (hcl, FLX_STATE(hcl) == HCL_FLX_START); HCL_ASSERT (hcl, FLX_STATE(hcl) == HCL_FLX_START);
@ -2702,26 +2705,26 @@ static int flx_hashed_token (hcl_t* hcl, hcl_ooci_t c)
/* --------------------------- */ /* --------------------------- */
case 'x': case 'x':
init_flx_rn (FLX_RN(hcl), HCL_IOTOK_RADNUMLIT, HCL_SYNERR_NUMLIT, 16); init_flx_hn (FLX_HN(hcl), HCL_IOTOK_RADNUMLIT, HCL_SYNERR_NUMLIT, 16);
goto radixed_number; goto radixed_number;
case 'o': case 'o':
init_flx_rn (FLX_RN(hcl), HCL_IOTOK_RADNUMLIT, HCL_SYNERR_NUMLIT, 8); init_flx_hn (FLX_HN(hcl), HCL_IOTOK_RADNUMLIT, HCL_SYNERR_NUMLIT, 8);
goto radixed_number; goto radixed_number;
case 'b': case 'b':
init_flx_rn (FLX_RN(hcl), HCL_IOTOK_RADNUMLIT, HCL_SYNERR_NUMLIT, 2); init_flx_hn (FLX_HN(hcl), HCL_IOTOK_RADNUMLIT, HCL_SYNERR_NUMLIT, 2);
goto radixed_number; goto radixed_number;
case 'e': case 'e':
init_flx_rn (FLX_RN(hcl), HCL_IOTOK_ERRLIT, HCL_SYNERR_ERRLIT, 10); init_flx_hn (FLX_HN(hcl), HCL_IOTOK_ERRLIT, HCL_SYNERR_ERRLIT, 10);
goto radixed_number; goto radixed_number;
case 'p': case 'p':
init_flx_rn (FLX_RN(hcl), HCL_IOTOK_SMPTRLIT, HCL_SYNERR_SMPTRLIT, 16); init_flx_hn (FLX_HN(hcl), HCL_IOTOK_SMPTRLIT, HCL_SYNERR_SMPTRLIT, 16);
radixed_number: radixed_number:
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_RADIXED_NUMBER); FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_HASHED_NUMBER);
break; goto consumed;
/* --------------------------- */ /* --------------------------- */
case '\\': case '\\':
@ -2738,17 +2741,11 @@ static int flx_hashed_token (hcl_t* hcl, hcl_ooci_t c)
goto consumed; goto consumed;
/* --------------------------- */ /* --------------------------- */
/* TODO: #include... etc more directives? */
default: default:
// TODO: fix this part /* the character used as case values above can never be the first character of a hash-marked identifier */
if (is_spacechar(c) || c == HCL_UCI_EOF) init_flx_hi (FLX_HI(hcl));
hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FLX_LOC(hcl), HCL_NULL, FEED_CONTINUE (hcl, HCL_FLX_HASHED_IDENT);
"no character after the hash sign"); goto not_consumed;
else
hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FLX_LOC(hcl), HCL_NULL,
"invalid character after the hash sign - %jc", c);
return -1;
} }
consumed: consumed:
@ -2840,7 +2837,6 @@ static int flx_hashed_char (hcl_t* hcl, hcl_ooci_t c)
CLEAR_TOKEN_NAME (hcl); CLEAR_TOKEN_NAME (hcl);
ADD_TOKEN_CHAR (hcl, c); ADD_TOKEN_CHAR (hcl, c);
FEED_WRAP_UP (hcl, HCL_IOTOK_CHARLIT); FEED_WRAP_UP (hcl, HCL_IOTOK_CHARLIT);
goto not_consumed; goto not_consumed;
} }
else else
@ -2857,9 +2853,48 @@ not_consumed:
return 0; return 0;
} }
static int flx_radixed_number (hcl_t* hcl, hcl_ooci_t c) static int flx_hashed_ident (hcl_t* hcl, hcl_ooci_t c)
{ {
hcl_flx_rn_t* rn = FLX_RN(hcl); hcl_flx_hi_t* hi = FLX_HI(hcl);
if (is_delimchar(c))
{
if (hi->char_count == 0)
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FLX_LOC(hcl), HCL_NULL,
"no valid character after the hash sign");
return -1;
}
if (does_token_name_match(hcl, VOCA_INCLUDE))
{
FEED_WRAP_UP (hcl, HCL_IOTOK_INCLUDE);
goto not_consumed;
}
else
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
"invalid hashed literal %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1;
}
}
else
{
ADD_TOKEN_CHAR (hcl, c);
hi->char_count++;
goto consumed;
}
consumed:
return 1;
not_consumed:
return 0;
}
static int flx_hashed_number (hcl_t* hcl, hcl_ooci_t c)
{
hcl_flx_hn_t* rn = FLX_HN(hcl);
if (CHAR_TO_NUM(c, rn->radix) >= rn->radix) if (CHAR_TO_NUM(c, rn->radix) >= rn->radix)
{ {
@ -3078,7 +3113,8 @@ static int feed_char (hcl_t* hcl, hcl_ooci_t c)
case HCL_FLX_DELIM_TOKEN: return flx_delim_token(hcl, c); case HCL_FLX_DELIM_TOKEN: return flx_delim_token(hcl, c);
case HCL_FLX_HASHED_TOKEN: return flx_hashed_token(hcl, c); case HCL_FLX_HASHED_TOKEN: return flx_hashed_token(hcl, c);
case HCL_FLX_HASHED_CHAR: return flx_hashed_char(hcl, c); case HCL_FLX_HASHED_CHAR: return flx_hashed_char(hcl, c);
case HCL_FLX_RADIXED_NUMBER: return flx_radixed_number(hcl, c); case HCL_FLX_HASHED_IDENT: return flx_hashed_ident(hcl, c);
case HCL_FLX_HASHED_NUMBER: return flx_hashed_number(hcl, c);
case HCL_FLX_QUOTED_TOKEN: return flx_quoted_token(hcl, c); case HCL_FLX_QUOTED_TOKEN: return flx_quoted_token(hcl, c);
/* /*
case HCL_FLX_DIRECTIVE: case HCL_FLX_DIRECTIVE: