enhanced the reader and compiler to treat characters and strings prefixed with b and u as a limited range character and a byte array with internal terminating null at the back

This commit is contained in:
hyung-hwan 2024-01-25 23:48:06 +09:00
parent 4d0d50dea9
commit aaa6e35787
10 changed files with 127 additions and 11 deletions

View File

@ -91,7 +91,7 @@ hcl_cnode_t* hcl_makecnodedcstar (hcl_t* hcl, int flags, const hcl_loc_t* loc, c
return make_cnode(hcl, HCL_CNODE_DCSTAR, flags, loc, tok); return make_cnode(hcl, HCL_CNODE_DCSTAR, flags, loc, tok);
} }
hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, const hcl_ooch_t v) hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, hcl_ooch_t v)
{ {
hcl_cnode_t* c = make_cnode(hcl, HCL_CNODE_CHARLIT, flags, loc, tok); hcl_cnode_t* c = make_cnode(hcl, HCL_CNODE_CHARLIT, flags, loc, tok);
if (HCL_UNLIKELY(!c)) return HCL_NULL; if (HCL_UNLIKELY(!c)) return HCL_NULL;
@ -99,6 +99,14 @@ hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc,
return c; return c;
} }
hcl_cnode_t* hcl_makecnodebchrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, hcl_oob_t v)
{
hcl_cnode_t* c = make_cnode(hcl, HCL_CNODE_BCHRLIT, flags, loc, tok);
if (HCL_UNLIKELY(!c)) return HCL_NULL;
c->u.bchrlit.v = v;
return c;
}
hcl_cnode_t* hcl_makecnodesymbol (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok) hcl_cnode_t* hcl_makecnodesymbol (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok)
{ {
hcl_cnode_t* c = make_cnode(hcl, HCL_CNODE_SYMBOL, flags, loc, tok); hcl_cnode_t* c = make_cnode(hcl, HCL_CNODE_SYMBOL, flags, loc, tok);
@ -120,6 +128,11 @@ hcl_cnode_t* hcl_makecnodestrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, c
return make_cnode(hcl, HCL_CNODE_STRLIT, flags, loc, tok); return make_cnode(hcl, HCL_CNODE_STRLIT, flags, loc, tok);
} }
hcl_cnode_t* hcl_makecnodebstrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok)
{
return make_cnode(hcl, HCL_CNODE_BSTRLIT, flags, loc, tok);
}
hcl_cnode_t* hcl_makecnodenumlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok) hcl_cnode_t* hcl_makecnodenumlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok)
{ {
return make_cnode(hcl, HCL_CNODE_NUMLIT, flags, loc, tok); return make_cnode(hcl, HCL_CNODE_NUMLIT, flags, loc, tok);

View File

@ -4436,11 +4436,20 @@ redo:
lit = HCL_CHAR_TO_OOP(oprnd->u.charlit.v); lit = HCL_CHAR_TO_OOP(oprnd->u.charlit.v);
goto literal; goto literal;
case HCL_CNODE_BCHRLIT: /* byte character still converts to charcter */
lit = HCL_CHAR_TO_OOP((hcl_ooch_t)oprnd->u.bchrlit.v);
goto literal;
case HCL_CNODE_STRLIT: case HCL_CNODE_STRLIT:
lit = hcl_makestring(hcl, HCL_CNODE_GET_TOKPTR(oprnd), HCL_CNODE_GET_TOKLEN(oprnd), 0); lit = hcl_makestring(hcl, HCL_CNODE_GET_TOKPTR(oprnd), HCL_CNODE_GET_TOKLEN(oprnd), 0);
if (HCL_UNLIKELY(!lit)) return -1; if (HCL_UNLIKELY(!lit)) return -1;
goto literal; goto literal;
case HCL_CNODE_BSTRLIT:
lit = hcl_makebytestring(hcl, HCL_CNODE_GET_TOKPTR(oprnd), HCL_CNODE_GET_TOKLEN(oprnd), 0);
if (HCL_UNLIKELY(!lit)) return -1;
goto literal;
case HCL_CNODE_NUMLIT: case HCL_CNODE_NUMLIT:
lit = string_to_num(hcl, HCL_CNODE_GET_TOK(oprnd), HCL_CNODE_GET_LOC(oprnd), 0); lit = string_to_num(hcl, HCL_CNODE_GET_TOK(oprnd), HCL_CNODE_GET_LOC(oprnd), 0);
if (HCL_UNLIKELY(!lit)) return -1; if (HCL_UNLIKELY(!lit)) return -1;

View File

@ -159,8 +159,14 @@
enum hcl_tok_type_t enum hcl_tok_type_t
{ {
HCL_TOK_EOF, HCL_TOK_EOF,
/* the following 4 items must be in this order for code
* in flx_quote_token() in read.c */
HCL_TOK_CHARLIT, HCL_TOK_CHARLIT,
HCL_TOK_BCHRLIT,
HCL_TOK_STRLIT, HCL_TOK_STRLIT,
HCL_TOK_BSTRLIT,
HCL_TOK_NUMLIT, HCL_TOK_NUMLIT,
HCL_TOK_RADNUMLIT, HCL_TOK_RADNUMLIT,
HCL_TOK_FPDECLIT, HCL_TOK_FPDECLIT,
@ -227,9 +233,11 @@ struct hcl_link_t
enum hcl_cnode_type_t enum hcl_cnode_type_t
{ {
HCL_CNODE_CHARLIT, HCL_CNODE_CHARLIT,
HCL_CNODE_BCHRLIT,
HCL_CNODE_SYMBOL, HCL_CNODE_SYMBOL,
HCL_CNODE_DSYMBOL, /* dotted symbol */ HCL_CNODE_DSYMBOL, /* dotted symbol */
HCL_CNODE_STRLIT, HCL_CNODE_STRLIT,
HCL_CNODE_BSTRLIT,
HCL_CNODE_NUMLIT, HCL_CNODE_NUMLIT,
HCL_CNODE_RADNUMLIT, HCL_CNODE_RADNUMLIT,
HCL_CNODE_FPDECLIT, HCL_CNODE_FPDECLIT,
@ -300,6 +308,10 @@ struct hcl_cnode_t
hcl_ooch_t v; hcl_ooch_t v;
} charlit; } charlit;
struct struct
{
hcl_oob_t v;
} bchrlit;
struct
{ {
hcl_syncode_t syncode; /* special if non-zero */ hcl_syncode_t syncode; /* special if non-zero */
} symbol; } symbol;
@ -1750,10 +1762,12 @@ hcl_cnode_t* hcl_makecnodesuper (hcl_t* hcl, int flags, const hcl_loc_t* loc, co
hcl_cnode_t* hcl_makecnodeellipsis (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); hcl_cnode_t* hcl_makecnodeellipsis (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
hcl_cnode_t* hcl_makecnodetrpcolons (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); hcl_cnode_t* hcl_makecnodetrpcolons (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
hcl_cnode_t* hcl_makecnodedcstar (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); hcl_cnode_t* hcl_makecnodedcstar (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, const hcl_ooch_t v); hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, hcl_ooch_t v);
hcl_cnode_t* hcl_makecnodebchrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, hcl_oob_t v);
hcl_cnode_t* hcl_makecnodesymbol (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); hcl_cnode_t* hcl_makecnodesymbol (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
hcl_cnode_t* hcl_makecnodedsymbol (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, int is_cla); hcl_cnode_t* hcl_makecnodedsymbol (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, int is_cla);
hcl_cnode_t* hcl_makecnodestrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); hcl_cnode_t* hcl_makecnodestrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
hcl_cnode_t* hcl_makecnodebstrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
hcl_cnode_t* hcl_makecnodenumlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); hcl_cnode_t* hcl_makecnodenumlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
hcl_cnode_t* hcl_makecnoderadnumlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); hcl_cnode_t* hcl_makecnoderadnumlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
hcl_cnode_t* hcl_makecnodefpdeclit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); hcl_cnode_t* hcl_makecnodefpdeclit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);

View File

@ -2676,6 +2676,20 @@ HCL_EXPORT hcl_oop_t hcl_makebytearray (
hcl_oow_t len hcl_oow_t len
); );
HCL_EXPORT hcl_oop_t hcl_makebytestringwithbytes (
hcl_t* hcl,
const hcl_oob_t* ptr,
hcl_oow_t len,
int ngc
);
HCL_EXPORT hcl_oop_t hcl_makebytestring (
hcl_t* hcl,
const hcl_ooch_t* ptr,
hcl_oow_t len,
int ngc
);
HCL_EXPORT hcl_oop_t hcl_makestring ( HCL_EXPORT hcl_oop_t hcl_makestring (
hcl_t* hcl, hcl_t* hcl,
const hcl_ooch_t* ptr, const hcl_ooch_t* ptr,

View File

@ -323,6 +323,34 @@ hcl_oop_t hcl_makebytearray (hcl_t* hcl, const hcl_oob_t* ptr, hcl_oow_t size)
return hcl_allocbyteobj(hcl, HCL_BRAND_BYTE_ARRAY, ptr, size); return hcl_allocbyteobj(hcl, HCL_BRAND_BYTE_ARRAY, ptr, size);
} }
hcl_oop_t hcl_makebytestringwithbytes (hcl_t* hcl, const hcl_oob_t* ptr, hcl_oow_t len, int ngc)
{
return alloc_numeric_array(hcl, HCL_BRAND_BYTE_ARRAY, ptr, len, HCL_OBJ_TYPE_BYTE, HCL_SIZEOF(hcl_oob_t), 1, ngc);
}
hcl_oop_t hcl_makebytestring (hcl_t* hcl, const hcl_ooch_t* ptr, hcl_oow_t len, int ngc)
{
/* a byte string is a byte array with an extra null at the back.
* the input to this function, however, is the pointer to hcl_ooch_t data
* because this function is mainly used to convert a token to a byte string.
* the token in the compiler is stored as a hcl_ooch_t string. */
hcl_oop_byte_t b;
hcl_oow_t i;
hcl_oob_t v;
b = alloc_numeric_array(hcl, HCL_BRAND_BYTE_ARRAY, HCL_NULL, len, HCL_OBJ_TYPE_BYTE, HCL_SIZEOF(hcl_oob_t), 1, ngc);
if (HCL_UNLIKELY(!b)) return HCL_NULL;
for (i = 0; i < len; i++)
{
v = ptr[i] & 0xFF;
HCL_OBJ_SET_BYTE_VAL(b, i, v);
}
return (hcl_oop_t)b;
}
hcl_oop_t hcl_makestring (hcl_t* hcl, const hcl_ooch_t* ptr, hcl_oow_t len, int ngc) hcl_oop_t hcl_makestring (hcl_t* hcl, const hcl_ooch_t* ptr, hcl_oow_t len, int ngc)
{ {
/*return hcl_alloccharobj(hcl, HCL_BRAND_STRING, ptr, len);*/ /*return hcl_alloccharobj(hcl, HCL_BRAND_STRING, ptr, len);*/

View File

@ -798,9 +798,11 @@ void hcl_dumpcnode (hcl_t* hcl, hcl_cnode_t* cnode, int newline)
switch (t) switch (t)
{ {
case HCL_CNODE_CHARLIT: case HCL_CNODE_CHARLIT:
case HCL_CNODE_BCHRLIT:
case HCL_CNODE_SYMBOL: case HCL_CNODE_SYMBOL:
case HCL_CNODE_DSYMBOL: case HCL_CNODE_DSYMBOL:
case HCL_CNODE_STRLIT: case HCL_CNODE_STRLIT:
case HCL_CNODE_BSTRLIT:
case HCL_CNODE_NUMLIT: case HCL_CNODE_NUMLIT:
case HCL_CNODE_RADNUMLIT: case HCL_CNODE_RADNUMLIT:
case HCL_CNODE_FPDECLIT: case HCL_CNODE_FPDECLIT:

View File

@ -614,7 +614,7 @@ static HCL_INLINE hcl_cnode_t* leave_list (hcl_t* hcl, hcl_loc_t* list_loc, int*
fake_tok_ptr = &fake_tok; fake_tok_ptr = &fake_tok;
} }
/* TODO: check the number of argumetns in advance??? */ /* TODO: check the number of arguments in advance??? */
sym = hcl_makecnodesymbol(hcl, 0, &loc, fake_tok_ptr); sym = hcl_makecnodesymbol(hcl, 0, &loc, fake_tok_ptr);
if (HCL_UNLIKELY(!sym)) if (HCL_UNLIKELY(!sym))
{ {
@ -1476,6 +1476,10 @@ static int feed_process_token (hcl_t* hcl)
frd->obj = hcl_makecnodecharlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl), TOKEN_NAME_CHAR(hcl, 0)); frd->obj = hcl_makecnodecharlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl), TOKEN_NAME_CHAR(hcl, 0));
goto auto_xlist; goto auto_xlist;
case HCL_TOK_BCHRLIT:
frd->obj = hcl_makecnodebchrlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl), (hcl_oob_t)TOKEN_NAME_CHAR(hcl, 0));
goto auto_xlist;
case HCL_TOK_NUMLIT: case HCL_TOK_NUMLIT:
frd->obj = hcl_makecnodenumlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); frd->obj = hcl_makecnodenumlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
goto auto_xlist; goto auto_xlist;
@ -1498,6 +1502,10 @@ static int feed_process_token (hcl_t* hcl)
frd->obj = hcl_makecnodestrlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); frd->obj = hcl_makecnodestrlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
goto auto_xlist; goto auto_xlist;
case HCL_TOK_BSTRLIT:
frd->obj = hcl_makecnodebstrlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
goto auto_xlist;
case HCL_TOK_IDENT: case HCL_TOK_IDENT:
frd->obj = hcl_makecnodesymbol(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); frd->obj = hcl_makecnodesymbol(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
goto auto_xlist; goto auto_xlist;
@ -2348,9 +2356,16 @@ not_consumed:
static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */
{ {
hcl_flx_qt_t* qt = FLX_QT(hcl); hcl_flx_qt_t* qt = FLX_QT(hcl);
hcl_loc_t synerr_loc = *TOKEN_LOC(hcl);
if (c == HCL_OOCI_EOF) goto invalid_token; if (c == HCL_OOCI_EOF) goto invalid_token;
if (qt->is_byte && c > 0xFF)
{
synerr_loc = *FLX_LOC(hcl);
goto invalid_token;
}
if (qt->escaped == 3) if (qt->escaped == 3)
{ {
if (c >= '0' && c <= '7') if (c >= '0' && c <= '7')
@ -2424,8 +2439,12 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */
if (qt->escaped == 0 && c == qt->end_char) if (qt->escaped == 0 && c == qt->end_char)
{ {
/* terminating quote */ /* terminating quote */
/* TODO: byte string literal or byte literal by checking qt->is_byte... */
FEED_WRAP_UP (hcl, qt->tok_type); /* HCL_TOK_STRLIT or HCL_TOK_CHARLIT */ /* qt->tok_type + qt->is_byte assumes that the token types for
* byte-string and byte-character literals are 1 greater than
* string and charcter literals. * see the definition of
* hcl_tok_type_t in hcl-prv.h */
FEED_WRAP_UP (hcl, qt->tok_type + qt->is_byte); /* HCL_TOK_STRLIT or HCL_TOK_CHARLIT */
if (TOKEN_NAME_LEN(hcl) < qt->min_len) goto invalid_token; if (TOKEN_NAME_LEN(hcl) < qt->min_len) goto invalid_token;
goto consumed; goto consumed;
} }
@ -2462,9 +2481,15 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */
goto consumed; goto consumed;
} }
#if (HCL_SIZEOF_OOCH_T >= 2) #if (HCL_SIZEOF_OOCH_T >= 2)
else if (c == 'u') else if (c == 'u' && !qt->is_byte)
{ {
if (qt->is_byte) goto invalid_token; #if 0
if (qt->is_byte)
{
synerr_loc = *FLX_LOC(hcl);
goto invalid_token;
}
#endif
qt->escaped = 4; qt->escaped = 4;
qt->digit_count = 0; qt->digit_count = 0;
qt->c_acc = 0; qt->c_acc = 0;
@ -2472,9 +2497,15 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */
} }
#endif #endif
#if (HCL_SIZEOF_OOCH_T >= 4) #if (HCL_SIZEOF_OOCH_T >= 4)
else if (c == 'U') else if (c == 'U' && !qt->is_byte)
{ {
if (qt->is_byte) goto invalid_token; #if 0
if (qt->is_byte)
{
synerr_loc = *FLX_LOC(hcl);
goto invalid_token;
}
#endif
qt->escaped = 8; qt->escaped = 8;
qt->digit_count = 0; qt->digit_count = 0;
qt->c_acc = 0; qt->c_acc = 0;
@ -2501,8 +2532,7 @@ consumed:
return 1; return 1;
invalid_token: invalid_token:
/* TODO: more accurate syntax error code instead of just synerr_code.... */ hcl_setsynerr (hcl, qt->synerr_code, &synerr_loc, HCL_NULL);
hcl_setsynerr (hcl, qt->synerr_code, TOKEN_LOC(hcl) /*FLX_LOC(hcl) instead?*/, HCL_NULL);
return -1; return -1;
} }

View File

@ -19,6 +19,7 @@ check_ERRORS = \
feed-5002.err \ feed-5002.err \
feed-5003.err \ feed-5003.err \
feed-5004.err \ feed-5004.err \
feed-5005.err \
mlist-5001.err \ mlist-5001.err \
var-5001.err \ var-5001.err \
var-5002.err \ var-5002.err \

View File

@ -490,6 +490,7 @@ check_ERRORS = \
feed-5002.err \ feed-5002.err \
feed-5003.err \ feed-5003.err \
feed-5004.err \ feed-5004.err \
feed-5005.err \
mlist-5001.err \ mlist-5001.err \
var-5001.err \ var-5001.err \
var-5002.err \ var-5002.err \

4
t/feed-5005.err Normal file
View File

@ -0,0 +1,4 @@
## a code point greater than 255 is illegal in the character literal prefix fixed with b.
printf "[%c] [#x%x] [%d]\n" '★' '★' #x2605;
printf "[%c]\n" b'★'; ##ERROR: syntax error - wrong character literal