enhanced the reader and compiler to treat characters and strings prefixed with b and u as a limited range character and a byte array with internal terminating null at the back

This commit is contained in:
hyung-hwan 2024-01-25 23:48:06 +09:00
parent 4d0d50dea9
commit aaa6e35787
10 changed files with 127 additions and 11 deletions

View File

@ -91,7 +91,7 @@ hcl_cnode_t* hcl_makecnodedcstar (hcl_t* hcl, int flags, const hcl_loc_t* loc, c
return make_cnode(hcl, HCL_CNODE_DCSTAR, flags, loc, tok);
}
hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, const hcl_ooch_t v)
hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, hcl_ooch_t v)
{
hcl_cnode_t* c = make_cnode(hcl, HCL_CNODE_CHARLIT, flags, loc, tok);
if (HCL_UNLIKELY(!c)) return HCL_NULL;
@ -99,6 +99,14 @@ hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc,
return c;
}
hcl_cnode_t* hcl_makecnodebchrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, hcl_oob_t v)
{
hcl_cnode_t* c = make_cnode(hcl, HCL_CNODE_BCHRLIT, flags, loc, tok);
if (HCL_UNLIKELY(!c)) return HCL_NULL;
c->u.bchrlit.v = v;
return c;
}
hcl_cnode_t* hcl_makecnodesymbol (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok)
{
hcl_cnode_t* c = make_cnode(hcl, HCL_CNODE_SYMBOL, flags, loc, tok);
@ -120,6 +128,11 @@ hcl_cnode_t* hcl_makecnodestrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, c
return make_cnode(hcl, HCL_CNODE_STRLIT, flags, loc, tok);
}
hcl_cnode_t* hcl_makecnodebstrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok)
{
return make_cnode(hcl, HCL_CNODE_BSTRLIT, flags, loc, tok);
}
hcl_cnode_t* hcl_makecnodenumlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok)
{
return make_cnode(hcl, HCL_CNODE_NUMLIT, flags, loc, tok);

View File

@ -4436,11 +4436,20 @@ redo:
lit = HCL_CHAR_TO_OOP(oprnd->u.charlit.v);
goto literal;
case HCL_CNODE_BCHRLIT: /* byte character still converts to charcter */
lit = HCL_CHAR_TO_OOP((hcl_ooch_t)oprnd->u.bchrlit.v);
goto literal;
case HCL_CNODE_STRLIT:
lit = hcl_makestring(hcl, HCL_CNODE_GET_TOKPTR(oprnd), HCL_CNODE_GET_TOKLEN(oprnd), 0);
if (HCL_UNLIKELY(!lit)) return -1;
goto literal;
case HCL_CNODE_BSTRLIT:
lit = hcl_makebytestring(hcl, HCL_CNODE_GET_TOKPTR(oprnd), HCL_CNODE_GET_TOKLEN(oprnd), 0);
if (HCL_UNLIKELY(!lit)) return -1;
goto literal;
case HCL_CNODE_NUMLIT:
lit = string_to_num(hcl, HCL_CNODE_GET_TOK(oprnd), HCL_CNODE_GET_LOC(oprnd), 0);
if (HCL_UNLIKELY(!lit)) return -1;

View File

@ -159,8 +159,14 @@
enum hcl_tok_type_t
{
HCL_TOK_EOF,
/* the following 4 items must be in this order for code
* in flx_quote_token() in read.c */
HCL_TOK_CHARLIT,
HCL_TOK_BCHRLIT,
HCL_TOK_STRLIT,
HCL_TOK_BSTRLIT,
HCL_TOK_NUMLIT,
HCL_TOK_RADNUMLIT,
HCL_TOK_FPDECLIT,
@ -227,9 +233,11 @@ struct hcl_link_t
enum hcl_cnode_type_t
{
HCL_CNODE_CHARLIT,
HCL_CNODE_BCHRLIT,
HCL_CNODE_SYMBOL,
HCL_CNODE_DSYMBOL, /* dotted symbol */
HCL_CNODE_STRLIT,
HCL_CNODE_BSTRLIT,
HCL_CNODE_NUMLIT,
HCL_CNODE_RADNUMLIT,
HCL_CNODE_FPDECLIT,
@ -300,6 +308,10 @@ struct hcl_cnode_t
hcl_ooch_t v;
} charlit;
struct
{
hcl_oob_t v;
} bchrlit;
struct
{
hcl_syncode_t syncode; /* special if non-zero */
} symbol;
@ -1750,10 +1762,12 @@ hcl_cnode_t* hcl_makecnodesuper (hcl_t* hcl, int flags, const hcl_loc_t* loc, co
hcl_cnode_t* hcl_makecnodeellipsis (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
hcl_cnode_t* hcl_makecnodetrpcolons (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
hcl_cnode_t* hcl_makecnodedcstar (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, const hcl_ooch_t v);
hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, hcl_ooch_t v);
hcl_cnode_t* hcl_makecnodebchrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, hcl_oob_t v);
hcl_cnode_t* hcl_makecnodesymbol (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
hcl_cnode_t* hcl_makecnodedsymbol (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, int is_cla);
hcl_cnode_t* hcl_makecnodestrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
hcl_cnode_t* hcl_makecnodebstrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
hcl_cnode_t* hcl_makecnodenumlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
hcl_cnode_t* hcl_makecnoderadnumlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
hcl_cnode_t* hcl_makecnodefpdeclit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);

View File

@ -2676,6 +2676,20 @@ HCL_EXPORT hcl_oop_t hcl_makebytearray (
hcl_oow_t len
);
HCL_EXPORT hcl_oop_t hcl_makebytestringwithbytes (
hcl_t* hcl,
const hcl_oob_t* ptr,
hcl_oow_t len,
int ngc
);
HCL_EXPORT hcl_oop_t hcl_makebytestring (
hcl_t* hcl,
const hcl_ooch_t* ptr,
hcl_oow_t len,
int ngc
);
HCL_EXPORT hcl_oop_t hcl_makestring (
hcl_t* hcl,
const hcl_ooch_t* ptr,

View File

@ -323,6 +323,34 @@ hcl_oop_t hcl_makebytearray (hcl_t* hcl, const hcl_oob_t* ptr, hcl_oow_t size)
return hcl_allocbyteobj(hcl, HCL_BRAND_BYTE_ARRAY, ptr, size);
}
hcl_oop_t hcl_makebytestringwithbytes (hcl_t* hcl, const hcl_oob_t* ptr, hcl_oow_t len, int ngc)
{
return alloc_numeric_array(hcl, HCL_BRAND_BYTE_ARRAY, ptr, len, HCL_OBJ_TYPE_BYTE, HCL_SIZEOF(hcl_oob_t), 1, ngc);
}
hcl_oop_t hcl_makebytestring (hcl_t* hcl, const hcl_ooch_t* ptr, hcl_oow_t len, int ngc)
{
/* a byte string is a byte array with an extra null at the back.
* the input to this function, however, is the pointer to hcl_ooch_t data
* because this function is mainly used to convert a token to a byte string.
* the token in the compiler is stored as a hcl_ooch_t string. */
hcl_oop_byte_t b;
hcl_oow_t i;
hcl_oob_t v;
b = alloc_numeric_array(hcl, HCL_BRAND_BYTE_ARRAY, HCL_NULL, len, HCL_OBJ_TYPE_BYTE, HCL_SIZEOF(hcl_oob_t), 1, ngc);
if (HCL_UNLIKELY(!b)) return HCL_NULL;
for (i = 0; i < len; i++)
{
v = ptr[i] & 0xFF;
HCL_OBJ_SET_BYTE_VAL(b, i, v);
}
return (hcl_oop_t)b;
}
hcl_oop_t hcl_makestring (hcl_t* hcl, const hcl_ooch_t* ptr, hcl_oow_t len, int ngc)
{
/*return hcl_alloccharobj(hcl, HCL_BRAND_STRING, ptr, len);*/

View File

@ -798,9 +798,11 @@ void hcl_dumpcnode (hcl_t* hcl, hcl_cnode_t* cnode, int newline)
switch (t)
{
case HCL_CNODE_CHARLIT:
case HCL_CNODE_BCHRLIT:
case HCL_CNODE_SYMBOL:
case HCL_CNODE_DSYMBOL:
case HCL_CNODE_STRLIT:
case HCL_CNODE_BSTRLIT:
case HCL_CNODE_NUMLIT:
case HCL_CNODE_RADNUMLIT:
case HCL_CNODE_FPDECLIT:

View File

@ -614,7 +614,7 @@ static HCL_INLINE hcl_cnode_t* leave_list (hcl_t* hcl, hcl_loc_t* list_loc, int*
fake_tok_ptr = &fake_tok;
}
/* TODO: check the number of argumetns in advance??? */
/* TODO: check the number of arguments in advance??? */
sym = hcl_makecnodesymbol(hcl, 0, &loc, fake_tok_ptr);
if (HCL_UNLIKELY(!sym))
{
@ -1476,6 +1476,10 @@ static int feed_process_token (hcl_t* hcl)
frd->obj = hcl_makecnodecharlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl), TOKEN_NAME_CHAR(hcl, 0));
goto auto_xlist;
case HCL_TOK_BCHRLIT:
frd->obj = hcl_makecnodebchrlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl), (hcl_oob_t)TOKEN_NAME_CHAR(hcl, 0));
goto auto_xlist;
case HCL_TOK_NUMLIT:
frd->obj = hcl_makecnodenumlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
goto auto_xlist;
@ -1498,6 +1502,10 @@ static int feed_process_token (hcl_t* hcl)
frd->obj = hcl_makecnodestrlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
goto auto_xlist;
case HCL_TOK_BSTRLIT:
frd->obj = hcl_makecnodebstrlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
goto auto_xlist;
case HCL_TOK_IDENT:
frd->obj = hcl_makecnodesymbol(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
goto auto_xlist;
@ -2348,9 +2356,16 @@ not_consumed:
static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */
{
hcl_flx_qt_t* qt = FLX_QT(hcl);
hcl_loc_t synerr_loc = *TOKEN_LOC(hcl);
if (c == HCL_OOCI_EOF) goto invalid_token;
if (qt->is_byte && c > 0xFF)
{
synerr_loc = *FLX_LOC(hcl);
goto invalid_token;
}
if (qt->escaped == 3)
{
if (c >= '0' && c <= '7')
@ -2424,8 +2439,12 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */
if (qt->escaped == 0 && c == qt->end_char)
{
/* terminating quote */
/* TODO: byte string literal or byte literal by checking qt->is_byte... */
FEED_WRAP_UP (hcl, qt->tok_type); /* HCL_TOK_STRLIT or HCL_TOK_CHARLIT */
/* qt->tok_type + qt->is_byte assumes that the token types for
* byte-string and byte-character literals are 1 greater than
* string and charcter literals. * see the definition of
* hcl_tok_type_t in hcl-prv.h */
FEED_WRAP_UP (hcl, qt->tok_type + qt->is_byte); /* HCL_TOK_STRLIT or HCL_TOK_CHARLIT */
if (TOKEN_NAME_LEN(hcl) < qt->min_len) goto invalid_token;
goto consumed;
}
@ -2462,9 +2481,15 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */
goto consumed;
}
#if (HCL_SIZEOF_OOCH_T >= 2)
else if (c == 'u')
else if (c == 'u' && !qt->is_byte)
{
if (qt->is_byte) goto invalid_token;
#if 0
if (qt->is_byte)
{
synerr_loc = *FLX_LOC(hcl);
goto invalid_token;
}
#endif
qt->escaped = 4;
qt->digit_count = 0;
qt->c_acc = 0;
@ -2472,9 +2497,15 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */
}
#endif
#if (HCL_SIZEOF_OOCH_T >= 4)
else if (c == 'U')
else if (c == 'U' && !qt->is_byte)
{
if (qt->is_byte) goto invalid_token;
#if 0
if (qt->is_byte)
{
synerr_loc = *FLX_LOC(hcl);
goto invalid_token;
}
#endif
qt->escaped = 8;
qt->digit_count = 0;
qt->c_acc = 0;
@ -2501,8 +2532,7 @@ consumed:
return 1;
invalid_token:
/* TODO: more accurate syntax error code instead of just synerr_code.... */
hcl_setsynerr (hcl, qt->synerr_code, TOKEN_LOC(hcl) /*FLX_LOC(hcl) instead?*/, HCL_NULL);
hcl_setsynerr (hcl, qt->synerr_code, &synerr_loc, HCL_NULL);
return -1;
}

View File

@ -19,6 +19,7 @@ check_ERRORS = \
feed-5002.err \
feed-5003.err \
feed-5004.err \
feed-5005.err \
mlist-5001.err \
var-5001.err \
var-5002.err \

View File

@ -490,6 +490,7 @@ check_ERRORS = \
feed-5002.err \
feed-5003.err \
feed-5004.err \
feed-5005.err \
mlist-5001.err \
var-5001.err \
var-5002.err \

4
t/feed-5005.err Normal file
View File

@ -0,0 +1,4 @@
## a code point greater than 255 is illegal in the character literal prefix fixed with b.
printf "[%c] [#x%x] [%d]\n" '★' '★' #x2605;
printf "[%c]\n" b'★'; ##ERROR: syntax error - wrong character literal