WIP - handling u and b prefix
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
hyung-hwan 2024-01-24 18:38:33 +09:00
parent b8e0b3578b
commit 4d0d50dea9
2 changed files with 22 additions and 24 deletions

View File

@ -612,10 +612,11 @@ struct hcl_flx_qt_t
hcl_ooch_t esc_char; hcl_ooch_t esc_char;
hcl_oow_t min_len; hcl_oow_t min_len;
hcl_oow_t max_len; hcl_oow_t max_len;
int regex; unsigned int is_byte: 1;
unsigned int regex: 1;
/* state data */ /* state data */
int escaped; unsigned int escaped: 4; /* must be large enough to hold 1, 2, 4, 8 */
int digit_count; int digit_count;
hcl_ooci_t c_acc; hcl_ooci_t c_acc;
}; };

View File

@ -1739,7 +1739,7 @@ static HCL_INLINE void init_flx_hn (hcl_flx_hn_t* hn, hcl_tok_type_t tok_type, h
hn->radix = radix; hn->radix = radix;
} }
static HCL_INLINE void init_flx_qt (hcl_flx_qt_t* qt, hcl_tok_type_t tok_type, hcl_synerrnum_t synerr_code, hcl_ooch_t end_char, hcl_ooch_t esc_char, hcl_oow_t min_len, hcl_oow_t max_len) static HCL_INLINE void init_flx_qt (hcl_flx_qt_t* qt, hcl_tok_type_t tok_type, hcl_synerrnum_t synerr_code, hcl_ooch_t end_char, hcl_ooch_t esc_char, hcl_oow_t min_len, hcl_oow_t max_len, int is_byte)
{ {
HCL_MEMSET (qt, 0, HCL_SIZEOF(*qt)); HCL_MEMSET (qt, 0, HCL_SIZEOF(*qt));
qt->tok_type = tok_type; qt->tok_type = tok_type;
@ -1748,6 +1748,7 @@ static HCL_INLINE void init_flx_qt (hcl_flx_qt_t* qt, hcl_tok_type_t tok_type, h
qt->esc_char = esc_char; qt->esc_char = esc_char;
qt->min_len = min_len; qt->min_len = min_len;
qt->max_len = max_len; qt->max_len = max_len;
qt->is_byte = is_byte;
} }
static HCL_INLINE void init_flx_pi (hcl_flx_pi_t* pi) static HCL_INLINE void init_flx_pi (hcl_flx_pi_t* pi)
@ -1838,12 +1839,12 @@ static int flx_start (hcl_t* hcl, hcl_ooci_t c)
goto consumed; goto consumed;
case '\"': case '\"':
init_flx_qt (FLX_QT(hcl), HCL_TOK_STRLIT, HCL_SYNERR_STRLIT, c, '\\', 0, HCL_TYPE_MAX(hcl_oow_t)); init_flx_qt (FLX_QT(hcl), HCL_TOK_STRLIT, HCL_SYNERR_STRLIT, c, '\\', 0, HCL_TYPE_MAX(hcl_oow_t), 0);
FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard the quote itself. move on the the QUOTED_TOKEN state */ FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard the quote itself. move on the the QUOTED_TOKEN state */
goto consumed; goto consumed;
case '\'': case '\'':
init_flx_qt (FLX_QT(hcl), HCL_TOK_CHARLIT, HCL_SYNERR_CHARLIT, c, '\\', 1, 1); init_flx_qt (FLX_QT(hcl), HCL_TOK_CHARLIT, HCL_SYNERR_CHARLIT, c, '\\', 1, 1, 0);
FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard the quote itself. move on the the QUOTED_TOKEN state */ FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard the quote itself. move on the the QUOTED_TOKEN state */
goto consumed; goto consumed;
@ -2348,11 +2349,7 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */
{ {
hcl_flx_qt_t* qt = FLX_QT(hcl); hcl_flx_qt_t* qt = FLX_QT(hcl);
if (c == HCL_OOCI_EOF) if (c == HCL_OOCI_EOF) goto invalid_token;
{
hcl_setsynerr (hcl, qt->synerr_code, TOKEN_LOC(hcl) /*FLX_LOC(hcl) instead?*/, HCL_NULL);
return -1;
}
if (qt->escaped == 3) if (qt->escaped == 3)
{ {
@ -2427,12 +2424,9 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */
if (qt->escaped == 0 && c == qt->end_char) if (qt->escaped == 0 && c == qt->end_char)
{ {
/* terminating quote */ /* terminating quote */
/* TODO: byte string literal or byte literal by checking qt->is_byte... */
FEED_WRAP_UP (hcl, qt->tok_type); /* HCL_TOK_STRLIT or HCL_TOK_CHARLIT */ FEED_WRAP_UP (hcl, qt->tok_type); /* HCL_TOK_STRLIT or HCL_TOK_CHARLIT */
if (TOKEN_NAME_LEN(hcl) < qt->min_len) if (TOKEN_NAME_LEN(hcl) < qt->min_len) goto invalid_token;
{
hcl_setsynerr (hcl, qt->synerr_code, TOKEN_LOC(hcl), HCL_NULL);
return -1;
}
goto consumed; goto consumed;
} }
@ -2470,6 +2464,7 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */
#if (HCL_SIZEOF_OOCH_T >= 2) #if (HCL_SIZEOF_OOCH_T >= 2)
else if (c == 'u') else if (c == 'u')
{ {
if (qt->is_byte) goto invalid_token;
qt->escaped = 4; qt->escaped = 4;
qt->digit_count = 0; qt->digit_count = 0;
qt->c_acc = 0; qt->c_acc = 0;
@ -2479,6 +2474,7 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */
#if (HCL_SIZEOF_OOCH_T >= 4) #if (HCL_SIZEOF_OOCH_T >= 4)
else if (c == 'U') else if (c == 'U')
{ {
if (qt->is_byte) goto invalid_token;
qt->escaped = 8; qt->escaped = 8;
qt->digit_count = 0; qt->digit_count = 0;
qt->c_acc = 0; qt->c_acc = 0;
@ -2501,12 +2497,13 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */
ADD_TOKEN_CHAR (hcl, c); ADD_TOKEN_CHAR (hcl, c);
consumed: consumed:
if (TOKEN_NAME_LEN(hcl) > qt->max_len) if (TOKEN_NAME_LEN(hcl) > qt->max_len) goto invalid_token;
{
hcl_setsynerr (hcl, qt->synerr_code, TOKEN_LOC(hcl), HCL_NULL);
return -1;
}
return 1; return 1;
invalid_token:
/* TODO: more accurate syntax error code instead of just synerr_code.... */
hcl_setsynerr (hcl, qt->synerr_code, TOKEN_LOC(hcl) /*FLX_LOC(hcl) instead?*/, HCL_NULL);
return -1;
} }
static int flx_signed_token (hcl_t* hcl, hcl_ooci_t c) static int flx_signed_token (hcl_t* hcl, hcl_ooci_t c)
@ -2585,17 +2582,17 @@ static int flx_bu (hcl_t* hcl, hcl_ooci_t c)
if (c == '\"') if (c == '\"')
{ {
/* TODO: determine type based on the start_c */ int is_byte = (bu->start_c == 'b' || bu->start_c == 'B');
reset_flx_token (hcl); reset_flx_token (hcl);
init_flx_qt (FLX_QT(hcl), HCL_TOK_STRLIT, HCL_SYNERR_STRLIT, c, '\\', 0, HCL_TYPE_MAX(hcl_oow_t)); init_flx_qt (FLX_QT(hcl), HCL_TOK_STRLIT, HCL_SYNERR_STRLIT, c, '\\', 0, HCL_TYPE_MAX(hcl_oow_t), is_byte);
FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard prefix, quote and move on */ FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard prefix, quote and move on */
goto consumed; goto consumed;
} }
else if (c == '\'') else if (c == '\'')
{ {
/* TODO: determine type based on the start_c */ int is_byte = (bu->start_c == 'b' || bu->start_c == 'B');
reset_flx_token (hcl); reset_flx_token (hcl);
init_flx_qt (FLX_QT(hcl), HCL_TOK_CHARLIT, HCL_SYNERR_CHARLIT, c, '\\', 1, 1); init_flx_qt (FLX_QT(hcl), HCL_TOK_CHARLIT, HCL_SYNERR_CHARLIT, c, '\\', 1, 1, is_byte);
FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* dicard prefix, quote, and move on */ FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* dicard prefix, quote, and move on */
goto consumed; goto consumed;
} }