enhanced the reader and compiler to treat characters and strings prefixed with b and u as a limited range character and a byte array with internal terminating null at the back
This commit is contained in:
parent
4d0d50dea9
commit
aaa6e35787
15
lib/cnode.c
15
lib/cnode.c
@ -91,7 +91,7 @@ hcl_cnode_t* hcl_makecnodedcstar (hcl_t* hcl, int flags, const hcl_loc_t* loc, c
|
||||
return make_cnode(hcl, HCL_CNODE_DCSTAR, flags, loc, tok);
|
||||
}
|
||||
|
||||
hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, const hcl_ooch_t v)
|
||||
hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, hcl_ooch_t v)
|
||||
{
|
||||
hcl_cnode_t* c = make_cnode(hcl, HCL_CNODE_CHARLIT, flags, loc, tok);
|
||||
if (HCL_UNLIKELY(!c)) return HCL_NULL;
|
||||
@ -99,6 +99,14 @@ hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc,
|
||||
return c;
|
||||
}
|
||||
|
||||
hcl_cnode_t* hcl_makecnodebchrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, hcl_oob_t v)
|
||||
{
|
||||
hcl_cnode_t* c = make_cnode(hcl, HCL_CNODE_BCHRLIT, flags, loc, tok);
|
||||
if (HCL_UNLIKELY(!c)) return HCL_NULL;
|
||||
c->u.bchrlit.v = v;
|
||||
return c;
|
||||
}
|
||||
|
||||
hcl_cnode_t* hcl_makecnodesymbol (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok)
|
||||
{
|
||||
hcl_cnode_t* c = make_cnode(hcl, HCL_CNODE_SYMBOL, flags, loc, tok);
|
||||
@ -120,6 +128,11 @@ hcl_cnode_t* hcl_makecnodestrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, c
|
||||
return make_cnode(hcl, HCL_CNODE_STRLIT, flags, loc, tok);
|
||||
}
|
||||
|
||||
hcl_cnode_t* hcl_makecnodebstrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok)
|
||||
{
|
||||
return make_cnode(hcl, HCL_CNODE_BSTRLIT, flags, loc, tok);
|
||||
}
|
||||
|
||||
hcl_cnode_t* hcl_makecnodenumlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok)
|
||||
{
|
||||
return make_cnode(hcl, HCL_CNODE_NUMLIT, flags, loc, tok);
|
||||
|
@ -4436,11 +4436,20 @@ redo:
|
||||
lit = HCL_CHAR_TO_OOP(oprnd->u.charlit.v);
|
||||
goto literal;
|
||||
|
||||
case HCL_CNODE_BCHRLIT: /* byte character still converts to charcter */
|
||||
lit = HCL_CHAR_TO_OOP((hcl_ooch_t)oprnd->u.bchrlit.v);
|
||||
goto literal;
|
||||
|
||||
case HCL_CNODE_STRLIT:
|
||||
lit = hcl_makestring(hcl, HCL_CNODE_GET_TOKPTR(oprnd), HCL_CNODE_GET_TOKLEN(oprnd), 0);
|
||||
if (HCL_UNLIKELY(!lit)) return -1;
|
||||
goto literal;
|
||||
|
||||
case HCL_CNODE_BSTRLIT:
|
||||
lit = hcl_makebytestring(hcl, HCL_CNODE_GET_TOKPTR(oprnd), HCL_CNODE_GET_TOKLEN(oprnd), 0);
|
||||
if (HCL_UNLIKELY(!lit)) return -1;
|
||||
goto literal;
|
||||
|
||||
case HCL_CNODE_NUMLIT:
|
||||
lit = string_to_num(hcl, HCL_CNODE_GET_TOK(oprnd), HCL_CNODE_GET_LOC(oprnd), 0);
|
||||
if (HCL_UNLIKELY(!lit)) return -1;
|
||||
|
@ -159,8 +159,14 @@
|
||||
enum hcl_tok_type_t
|
||||
{
|
||||
HCL_TOK_EOF,
|
||||
|
||||
/* the following 4 items must be in this order for code
|
||||
* in flx_quote_token() in read.c */
|
||||
HCL_TOK_CHARLIT,
|
||||
HCL_TOK_BCHRLIT,
|
||||
HCL_TOK_STRLIT,
|
||||
HCL_TOK_BSTRLIT,
|
||||
|
||||
HCL_TOK_NUMLIT,
|
||||
HCL_TOK_RADNUMLIT,
|
||||
HCL_TOK_FPDECLIT,
|
||||
@ -227,9 +233,11 @@ struct hcl_link_t
|
||||
enum hcl_cnode_type_t
|
||||
{
|
||||
HCL_CNODE_CHARLIT,
|
||||
HCL_CNODE_BCHRLIT,
|
||||
HCL_CNODE_SYMBOL,
|
||||
HCL_CNODE_DSYMBOL, /* dotted symbol */
|
||||
HCL_CNODE_STRLIT,
|
||||
HCL_CNODE_BSTRLIT,
|
||||
HCL_CNODE_NUMLIT,
|
||||
HCL_CNODE_RADNUMLIT,
|
||||
HCL_CNODE_FPDECLIT,
|
||||
@ -300,6 +308,10 @@ struct hcl_cnode_t
|
||||
hcl_ooch_t v;
|
||||
} charlit;
|
||||
struct
|
||||
{
|
||||
hcl_oob_t v;
|
||||
} bchrlit;
|
||||
struct
|
||||
{
|
||||
hcl_syncode_t syncode; /* special if non-zero */
|
||||
} symbol;
|
||||
@ -1750,10 +1762,12 @@ hcl_cnode_t* hcl_makecnodesuper (hcl_t* hcl, int flags, const hcl_loc_t* loc, co
|
||||
hcl_cnode_t* hcl_makecnodeellipsis (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
|
||||
hcl_cnode_t* hcl_makecnodetrpcolons (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
|
||||
hcl_cnode_t* hcl_makecnodedcstar (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
|
||||
hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, const hcl_ooch_t v);
|
||||
hcl_cnode_t* hcl_makecnodecharlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, hcl_ooch_t v);
|
||||
hcl_cnode_t* hcl_makecnodebchrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, hcl_oob_t v);
|
||||
hcl_cnode_t* hcl_makecnodesymbol (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
|
||||
hcl_cnode_t* hcl_makecnodedsymbol (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, int is_cla);
|
||||
hcl_cnode_t* hcl_makecnodestrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
|
||||
hcl_cnode_t* hcl_makecnodebstrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
|
||||
hcl_cnode_t* hcl_makecnodenumlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
|
||||
hcl_cnode_t* hcl_makecnoderadnumlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
|
||||
hcl_cnode_t* hcl_makecnodefpdeclit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok);
|
||||
|
14
lib/hcl.h
14
lib/hcl.h
@ -2676,6 +2676,20 @@ HCL_EXPORT hcl_oop_t hcl_makebytearray (
|
||||
hcl_oow_t len
|
||||
);
|
||||
|
||||
HCL_EXPORT hcl_oop_t hcl_makebytestringwithbytes (
|
||||
hcl_t* hcl,
|
||||
const hcl_oob_t* ptr,
|
||||
hcl_oow_t len,
|
||||
int ngc
|
||||
);
|
||||
|
||||
HCL_EXPORT hcl_oop_t hcl_makebytestring (
|
||||
hcl_t* hcl,
|
||||
const hcl_ooch_t* ptr,
|
||||
hcl_oow_t len,
|
||||
int ngc
|
||||
);
|
||||
|
||||
HCL_EXPORT hcl_oop_t hcl_makestring (
|
||||
hcl_t* hcl,
|
||||
const hcl_ooch_t* ptr,
|
||||
|
28
lib/obj.c
28
lib/obj.c
@ -323,6 +323,34 @@ hcl_oop_t hcl_makebytearray (hcl_t* hcl, const hcl_oob_t* ptr, hcl_oow_t size)
|
||||
return hcl_allocbyteobj(hcl, HCL_BRAND_BYTE_ARRAY, ptr, size);
|
||||
}
|
||||
|
||||
hcl_oop_t hcl_makebytestringwithbytes (hcl_t* hcl, const hcl_oob_t* ptr, hcl_oow_t len, int ngc)
|
||||
{
|
||||
return alloc_numeric_array(hcl, HCL_BRAND_BYTE_ARRAY, ptr, len, HCL_OBJ_TYPE_BYTE, HCL_SIZEOF(hcl_oob_t), 1, ngc);
|
||||
}
|
||||
|
||||
hcl_oop_t hcl_makebytestring (hcl_t* hcl, const hcl_ooch_t* ptr, hcl_oow_t len, int ngc)
|
||||
{
|
||||
/* a byte string is a byte array with an extra null at the back.
|
||||
* the input to this function, however, is the pointer to hcl_ooch_t data
|
||||
* because this function is mainly used to convert a token to a byte string.
|
||||
* the token in the compiler is stored as a hcl_ooch_t string. */
|
||||
|
||||
hcl_oop_byte_t b;
|
||||
hcl_oow_t i;
|
||||
hcl_oob_t v;
|
||||
|
||||
b = alloc_numeric_array(hcl, HCL_BRAND_BYTE_ARRAY, HCL_NULL, len, HCL_OBJ_TYPE_BYTE, HCL_SIZEOF(hcl_oob_t), 1, ngc);
|
||||
if (HCL_UNLIKELY(!b)) return HCL_NULL;
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
v = ptr[i] & 0xFF;
|
||||
HCL_OBJ_SET_BYTE_VAL(b, i, v);
|
||||
}
|
||||
|
||||
return (hcl_oop_t)b;
|
||||
}
|
||||
|
||||
hcl_oop_t hcl_makestring (hcl_t* hcl, const hcl_ooch_t* ptr, hcl_oow_t len, int ngc)
|
||||
{
|
||||
/*return hcl_alloccharobj(hcl, HCL_BRAND_STRING, ptr, len);*/
|
||||
|
@ -798,9 +798,11 @@ void hcl_dumpcnode (hcl_t* hcl, hcl_cnode_t* cnode, int newline)
|
||||
switch (t)
|
||||
{
|
||||
case HCL_CNODE_CHARLIT:
|
||||
case HCL_CNODE_BCHRLIT:
|
||||
case HCL_CNODE_SYMBOL:
|
||||
case HCL_CNODE_DSYMBOL:
|
||||
case HCL_CNODE_STRLIT:
|
||||
case HCL_CNODE_BSTRLIT:
|
||||
case HCL_CNODE_NUMLIT:
|
||||
case HCL_CNODE_RADNUMLIT:
|
||||
case HCL_CNODE_FPDECLIT:
|
||||
|
48
lib/read.c
48
lib/read.c
@ -614,7 +614,7 @@ static HCL_INLINE hcl_cnode_t* leave_list (hcl_t* hcl, hcl_loc_t* list_loc, int*
|
||||
fake_tok_ptr = &fake_tok;
|
||||
}
|
||||
|
||||
/* TODO: check the number of argumetns in advance??? */
|
||||
/* TODO: check the number of arguments in advance??? */
|
||||
sym = hcl_makecnodesymbol(hcl, 0, &loc, fake_tok_ptr);
|
||||
if (HCL_UNLIKELY(!sym))
|
||||
{
|
||||
@ -1476,6 +1476,10 @@ static int feed_process_token (hcl_t* hcl)
|
||||
frd->obj = hcl_makecnodecharlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl), TOKEN_NAME_CHAR(hcl, 0));
|
||||
goto auto_xlist;
|
||||
|
||||
case HCL_TOK_BCHRLIT:
|
||||
frd->obj = hcl_makecnodebchrlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl), (hcl_oob_t)TOKEN_NAME_CHAR(hcl, 0));
|
||||
goto auto_xlist;
|
||||
|
||||
case HCL_TOK_NUMLIT:
|
||||
frd->obj = hcl_makecnodenumlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
|
||||
goto auto_xlist;
|
||||
@ -1498,6 +1502,10 @@ static int feed_process_token (hcl_t* hcl)
|
||||
frd->obj = hcl_makecnodestrlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
|
||||
goto auto_xlist;
|
||||
|
||||
case HCL_TOK_BSTRLIT:
|
||||
frd->obj = hcl_makecnodebstrlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
|
||||
goto auto_xlist;
|
||||
|
||||
case HCL_TOK_IDENT:
|
||||
frd->obj = hcl_makecnodesymbol(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
|
||||
goto auto_xlist;
|
||||
@ -2348,9 +2356,16 @@ not_consumed:
|
||||
static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */
|
||||
{
|
||||
hcl_flx_qt_t* qt = FLX_QT(hcl);
|
||||
hcl_loc_t synerr_loc = *TOKEN_LOC(hcl);
|
||||
|
||||
if (c == HCL_OOCI_EOF) goto invalid_token;
|
||||
|
||||
if (qt->is_byte && c > 0xFF)
|
||||
{
|
||||
synerr_loc = *FLX_LOC(hcl);
|
||||
goto invalid_token;
|
||||
}
|
||||
|
||||
if (qt->escaped == 3)
|
||||
{
|
||||
if (c >= '0' && c <= '7')
|
||||
@ -2424,8 +2439,12 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */
|
||||
if (qt->escaped == 0 && c == qt->end_char)
|
||||
{
|
||||
/* terminating quote */
|
||||
/* TODO: byte string literal or byte literal by checking qt->is_byte... */
|
||||
FEED_WRAP_UP (hcl, qt->tok_type); /* HCL_TOK_STRLIT or HCL_TOK_CHARLIT */
|
||||
|
||||
/* qt->tok_type + qt->is_byte assumes that the token types for
|
||||
* byte-string and byte-character literals are 1 greater than
|
||||
* string and charcter literals. * see the definition of
|
||||
* hcl_tok_type_t in hcl-prv.h */
|
||||
FEED_WRAP_UP (hcl, qt->tok_type + qt->is_byte); /* HCL_TOK_STRLIT or HCL_TOK_CHARLIT */
|
||||
if (TOKEN_NAME_LEN(hcl) < qt->min_len) goto invalid_token;
|
||||
goto consumed;
|
||||
}
|
||||
@ -2462,9 +2481,15 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */
|
||||
goto consumed;
|
||||
}
|
||||
#if (HCL_SIZEOF_OOCH_T >= 2)
|
||||
else if (c == 'u')
|
||||
else if (c == 'u' && !qt->is_byte)
|
||||
{
|
||||
if (qt->is_byte) goto invalid_token;
|
||||
#if 0
|
||||
if (qt->is_byte)
|
||||
{
|
||||
synerr_loc = *FLX_LOC(hcl);
|
||||
goto invalid_token;
|
||||
}
|
||||
#endif
|
||||
qt->escaped = 4;
|
||||
qt->digit_count = 0;
|
||||
qt->c_acc = 0;
|
||||
@ -2472,9 +2497,15 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */
|
||||
}
|
||||
#endif
|
||||
#if (HCL_SIZEOF_OOCH_T >= 4)
|
||||
else if (c == 'U')
|
||||
else if (c == 'U' && !qt->is_byte)
|
||||
{
|
||||
if (qt->is_byte) goto invalid_token;
|
||||
#if 0
|
||||
if (qt->is_byte)
|
||||
{
|
||||
synerr_loc = *FLX_LOC(hcl);
|
||||
goto invalid_token;
|
||||
}
|
||||
#endif
|
||||
qt->escaped = 8;
|
||||
qt->digit_count = 0;
|
||||
qt->c_acc = 0;
|
||||
@ -2501,8 +2532,7 @@ consumed:
|
||||
return 1;
|
||||
|
||||
invalid_token:
|
||||
/* TODO: more accurate syntax error code instead of just synerr_code.... */
|
||||
hcl_setsynerr (hcl, qt->synerr_code, TOKEN_LOC(hcl) /*FLX_LOC(hcl) instead?*/, HCL_NULL);
|
||||
hcl_setsynerr (hcl, qt->synerr_code, &synerr_loc, HCL_NULL);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -19,6 +19,7 @@ check_ERRORS = \
|
||||
feed-5002.err \
|
||||
feed-5003.err \
|
||||
feed-5004.err \
|
||||
feed-5005.err \
|
||||
mlist-5001.err \
|
||||
var-5001.err \
|
||||
var-5002.err \
|
||||
|
@ -490,6 +490,7 @@ check_ERRORS = \
|
||||
feed-5002.err \
|
||||
feed-5003.err \
|
||||
feed-5004.err \
|
||||
feed-5005.err \
|
||||
mlist-5001.err \
|
||||
var-5001.err \
|
||||
var-5002.err \
|
||||
|
4
t/feed-5005.err
Normal file
4
t/feed-5005.err
Normal file
@ -0,0 +1,4 @@
|
||||
## a code point greater than 255 is illegal in the character literal prefix fixed with b.
|
||||
|
||||
printf "[%c] [#x%x] [%d]\n" '★' '★' #x2605;
|
||||
printf "[%c]\n" b'★'; ##ERROR: syntax error - wrong character literal
|
Loading…
x
Reference in New Issue
Block a user