fixed various reader issues
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
hyung-hwan 2024-09-18 21:41:00 +09:00
parent ec4a6facee
commit b6e6274666
10 changed files with 181 additions and 66 deletions

View File

@ -422,6 +422,7 @@ static void print_error (hcl_t* hcl, const hcl_bch_t* msghdr)
static void show_prompt (hcl_t* hcl, int level)
{
/* TODO: different prompt per level */
hcl_resetfeedloc (hcl);
hcl_logbfmt (hcl, HCL_LOG_STDOUT, "HCL> ");
hcl_logbfmt (hcl, HCL_LOG_STDOUT, HCL_NULL); /* flushing */
}

View File

@ -107,7 +107,6 @@ static char* synerrstr[] =
"out of integer range",
"wrong error literal",
"wrong smptr literal",
"wrong multi-segment identifer",
"invalid radix for a numeric literal",
"sudden end of input",

View File

@ -455,6 +455,7 @@ typedef enum hcl_cnode_flag_t hcl_cnode_flag_t;
#define HCL_CNODE_IS_SYMBOL(x) ((x)->cn_type == HCL_CNODE_SYMBOL)
#define HCL_CNODE_IS_SYMBOL_PLAIN(x) ((x)->cn_type == HCL_CNODE_SYMBOL && (x)->u.symbol.syncode == 0)
#define HCL_CNODE_IS_SYMBOL_PLAIN_IDENT(x) (HCL_CNODE_IS_SYMBOL_PLAIN(x) && !hcl_is_binop_char((x)->cn_tok.ptr[0]))
#define HCL_CNODE_IS_SYMBOL_PLAIN_BINOP(x) (HCL_CNODE_IS_SYMBOL_PLAIN(x) && hcl_is_binop_char((x)->cn_tok.ptr[0]))
#define HCL_CNODE_IS_SYMBOL_SYNCODED(x, code) ((x)->cn_type == HCL_CNODE_SYMBOL && (x)->u.symbol.syncode == (code))
#define HCL_CNODE_SYMBOL_SYNCODE(x) ((x)->u.symbol.syncode)

View File

@ -111,7 +111,6 @@ enum hcl_synerrnum_t
HCL_SYNERR_NUMRANGE, /* number range error */
HCL_SYNERR_ERRLIT, /* wrong error literal */
HCL_SYNERR_SMPTRLIT, /* wrong smptr literal */
HCL_SYNERR_MSEGIDENT, /* wrong multi-segment identifier */
HCL_SYNERR_RADIX, /* invalid radix for a numeric literal */
HCL_SYNERR_EOF, /* sudden end of input */
@ -2580,16 +2579,21 @@ HCL_EXPORT int hcl_feeduchars (
hcl_oow_t len
);
HCL_EXPORT int hcl_feedpending (
hcl_t* hcl
);
HCL_EXPORT int hcl_feedbchars (
hcl_t* hcl,
const hcl_bch_t* data,
hcl_oow_t len
);
HCL_EXPORT int hcl_feedpending (
hcl_t* hcl
);
HCL_EXPORT void hcl_resetfeedloc (
hcl_t* hcl
);
HCL_EXPORT int hcl_endfeed (
hcl_t* hcl
);

View File

@ -1218,7 +1218,7 @@ static hcl_pfrc_t pf_object_new (hcl_t* hcl, hcl_mod_t* mod, hcl_ooi_t nargs)
return HCL_PF_FAILURE;
}
if (nargs >= 1)
if (nargs >= 2)
{
int n;
hcl_oop_t sz;

View File

@ -260,7 +260,7 @@ static HCL_INLINE int is_alnumchar (hcl_ooci_t c)
}
#endif
static HCL_INLINE int is_delimchar (hcl_ooci_t c)
static HCL_INLINE int is_delim_char (hcl_ooci_t c)
{
return c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c == '}' ||
c == '|' || c == ',' || c == '.' || c == ':' || c == ';' ||
@ -273,12 +273,27 @@ static HCL_INLINE int is_delimchar (hcl_ooci_t c)
}
int hcl_is_binop_char (hcl_ooci_t c)
int hcl_is_binop_char (hcl_ooci_t c) /* not static HCL_INLINE for shared use with comp.c via HCL_CNODE_IS_SYMBOL_PLAIN() */
{
return c == '&' || c == '*' || c == '+' || c == '-' || c == '/' || c == '%' ||
c == '<' || c == '>' || c == '=' || c == '@' || c == '|' || c == '~';
}
static HCL_INLINE int is_lead_ident_char (hcl_ooci_t c)
{
return hcl_is_ooch_alpha(c) || c == '_';
}
static HCL_INLINE int is_ident_char (hcl_ooci_t c)
{
/* [NOTE]
* '-' is prohibited as the last character of an identifier or an identifier segment.
* see flx_plain_ident().
*/
return hcl_is_ooch_alnum(c) || c == '_' || c == '-' || c == '?';
}
/* TODO: remove GET_CHAR(), GET_CHAR_TO(), get_char(), _get_char() */
#define GET_CHAR(hcl) \
do { if (get_char(hcl) <= -1) return -1; } while (0)
@ -582,7 +597,7 @@ static HCL_INLINE hcl_cnode_t* leave_list (hcl_t* hcl, hcl_loc_t* list_loc, int*
/* no item after , : := or various binary operators */
if (concode == HCL_CONCODE_MLIST)
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_CALLABLE, TOKEN_LOC(hcl), HCL_NULL, "missing message after :");
hcl_setsynerrbfmt (hcl, HCL_SYNERR_CALLABLE, TOKEN_LOC(hcl), HCL_NULL, "missing message after receiver");
}
else if (concode == HCL_CONCODE_ALIST)
{
@ -633,8 +648,8 @@ static HCL_INLINE hcl_cnode_t* leave_list (hcl_t* hcl, hcl_loc_t* list_loc, int*
lval = HCL_CNODE_CONS_CAR(head);
if (lval && HCL_CNODE_IS_ELIST(lval))
{
/* invalid lvalue */
hcl_setsynerr (hcl, HCL_SYNERR_LVALUE, HCL_CNODE_GET_LOC(lval), HCL_CNODE_GET_TOK(lval));
/* invalid lvalue - for example, () := 20 */
hcl_setsynerrbfmt (hcl, HCL_SYNERR_LVALUE, HCL_CNODE_GET_LOC(lval), HCL_CNODE_GET_TOK(lval), "bad lvalue - blank expression");
goto oops;
}
else if (lval && HCL_CNODE_IS_CONS_CONCODED(lval, HCL_CONCODE_TUPLE))
@ -659,9 +674,9 @@ static HCL_INLINE hcl_cnode_t* leave_list (hcl_t* hcl, hcl_loc_t* list_loc, int*
/* check in advance if the array members are all plain symbols */
hcl_cnode_t* lcar;
lcar = HCL_CNODE_CONS_CAR(tmp);
if (!HCL_CNODE_IS_SYMBOL_PLAIN(lcar) && !HCL_CNODE_IS_DSYMBOL_CLA(lcar))
if (!HCL_CNODE_IS_SYMBOL_PLAIN_IDENT(lcar) && !HCL_CNODE_IS_DSYMBOL_CLA(lcar))
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_LVALUE, HCL_CNODE_GET_LOC(lval), HCL_CNODE_GET_TOK(lval), "invalid lvalue - not symbol in tuple");
hcl_setsynerrbfmt (hcl, HCL_SYNERR_LVALUE, HCL_CNODE_GET_LOC(lval), HCL_CNODE_GET_TOK(lval), "bad lvalue - invalid element in tuple");
goto oops;
}
}
@ -685,9 +700,9 @@ static HCL_INLINE hcl_cnode_t* leave_list (hcl_t* hcl, hcl_loc_t* list_loc, int*
}
else
{
if (!HCL_CNODE_IS_SYMBOL_PLAIN(lval) && !HCL_CNODE_IS_DSYMBOL_CLA(lval))
if (!HCL_CNODE_IS_SYMBOL_PLAIN_IDENT(lval) && !HCL_CNODE_IS_DSYMBOL_CLA(lval))
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_LVALUE, HCL_CNODE_GET_LOC(lval), HCL_CNODE_GET_TOK(lval), "invalid lvalue - not symbol");
hcl_setsynerrbfmt (hcl, HCL_SYNERR_LVALUE, HCL_CNODE_GET_LOC(lval), HCL_CNODE_GET_TOK(lval), "bad lvalue - invalid element");
goto oops;
}
#if defined(TRANSFORM_ALIST)
@ -737,33 +752,28 @@ static HCL_INLINE hcl_cnode_t* leave_list (hcl_t* hcl, hcl_loc_t* list_loc, int*
/* x binop y -> binop x y - BLIST contains BINOP in it */
hcl_cnode_t* x, * binop;
x = HCL_CNODE_CONS_CAR(head);
if (x && HCL_CNODE_IS_ELIST(x))
{
hcl_setsynerr (hcl, HCL_SYNERR_LVALUE, HCL_CNODE_GET_LOC(x), HCL_CNODE_GET_TOK(x));
goto oops;
}
/* swap x and binop */
/*x = HCL_CNODE_CONS_CAR(head);*/
binop = HCL_CNODE_CONS_CDR(head);
if (!binop || !HCL_CNODE_IS_CONS(binop) || !HCL_CNODE_CONS_CDR(binop) || !HCL_CNODE_IS_CONS(HCL_CNODE_CONS_CDR(binop)))
HCL_ASSERT (hcl, binop && HCL_CNODE_IS_CONS(binop));
HCL_ASSERT (hcl, count >= 2); /* the code in can_binop_list() ensures this condition */
if (count < 3)
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_NOVALUE, HCL_CNODE_GET_LOC(x), HCL_CNODE_GET_TOK(x), "no operand after binary operator");
/* for example, 1 + */
x = HCL_CNODE_CONS_CAR(binop);
/* with the transformation, the implementation supports two operands and a single binop in an expression. */
hcl_setsynerrbfmt (hcl, HCL_SYNERR_NOVALUE, HCL_CNODE_GET_LOC(x), HCL_NULL,
"no operand after binary selector '%.*js'", HCL_CNODE_GET_TOKLEN(x), HCL_CNODE_GET_TOKPTR(x));
goto oops;
}
/* TODO: support multiple operators and operands .. like 1 + 2 - 3
currently can_binop_list() disallows more operators.
but the check isn't complemete if more operands are added without an operator e.g. (1 + 2 3)
*/
/*HCL_ASSERT (hcl, count == 3);*/
if (count != 3)
else if (count > 3)
{
/* Currently, the implementation supports two operands and a single binop in an expression.
* If the implementation is enhanced to support more than one binop and more operands,
* this check must be removed. */
/* TODO: support more than two operands */
hcl_setsynerrbfmt (hcl, HCL_SYNERR_ARGFLOOD, HCL_CNODE_GET_LOC(tail), HCL_CNODE_GET_TOK(tail), "too many operands");
/* for example, 1 + 1 1 */
x = HCL_CNODE_CONS_CAR(tail);
/* with the transformation, the implementation supports two operands and a single binop in an expression. */
hcl_setsynerrbfmt (hcl, HCL_SYNERR_NOVALUE, HCL_CNODE_GET_LOC(x), HCL_NULL,
"redundant operand '%.*js'", HCL_CNODE_GET_TOKLEN(x), HCL_CNODE_GET_TOKPTR(x));
goto oops;
}
@ -1012,7 +1022,7 @@ static HCL_INLINE int can_binop_list (hcl_t* hcl)
if (cc != HCL_CONCODE_XLIST) return 0;
}
LIST_FLAG_SET_CONCODE(rstl->flagv, HCL_CONCODE_BLIST);
LIST_FLAG_SET_CONCODE(rstl->flagv, HCL_CONCODE_BLIST); /* switch to BLIST as long as a binary operator is seen */
rstl->flagv |= BINOPED;
/* TODO: must remember the actual binop operator token */
return 2;
@ -2002,7 +2012,7 @@ typedef struct delim_token_t delim_token_t;
static delim_token_t delim_token_tab[] =
{
/* [NOTE 1]
* if you add a new token, ensure the first character is listed in is_delimchar()
* if you add a new token, ensure the first character is listed in is_delim_char()
*
* [NOTE 2]
* for the implementation limitation in find_delim_token_char(),
@ -2014,7 +2024,7 @@ static delim_token_t delim_token_tab[] =
*
* [NOTE 3]
* don't list #(, #[, #{ here because of overlapping use of # for various purposes.
* however, # is included in is_delimchar().
* however, # is included in is_delim_char().
*/
{ "(", 1, HCL_TOK_LPAREN },
@ -2312,12 +2322,16 @@ static int flx_start (hcl_t* hcl, hcl_ooci_t c)
init_flx_binop (FLX_BINOP(hcl));
FEED_CONTINUE (hcl, HCL_FLX_BINOP);
}
else
else if (is_lead_ident_char(c))
{
/* TODO: limit identifier character - is_identchar(), is_identleadchar() */
init_flx_pi (FLX_PI(hcl));
FEED_CONTINUE (hcl, HCL_FLX_PLAIN_IDENT);
}
else
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_BACKSLASH, TOKEN_LOC(hcl), HCL_NULL, "invalid token character - %c", c);
return -1;
}
goto not_consumed;
}
@ -2391,7 +2405,7 @@ static int flx_dollared_ident (hcl_t* hcl, hcl_ooci_t c)
/* di->char_count doesn't include the first '$' */
if (is_delimchar(c))
if (is_delim_char(c))
{
hcl_tok_type_t tok_type;
@ -2553,7 +2567,7 @@ static int flx_hmarked_char (hcl_t* hcl, hcl_ooci_t c)
{
hcl_flx_hc_t* hc = FLX_HC(hcl);
if (is_delimchar(c))
if (is_delim_char(c))
{
if (hc->char_count == 0)
{
@ -2651,7 +2665,7 @@ static int flx_hmarked_ident (hcl_t* hcl, hcl_ooci_t c)
{
hcl_flx_hi_t* hi = FLX_HI(hcl);
if (is_delimchar(c))
if (is_delim_char(c))
{
hcl_tok_type_t tok_type;
@ -2722,7 +2736,7 @@ static int flx_hmarked_binop (hcl_t* hcl, hcl_ooci_t c)
ADD_TOKEN_CHAR(hcl, c);
goto consumed;
}
else if (is_delimchar(c))
else if (is_delim_char(c))
{
FEED_WRAP_UP(hcl, HCL_TOK_SYMLIT);
goto not_consumed;
@ -2749,7 +2763,7 @@ static int flx_hmarked_number (hcl_t* hcl, hcl_ooci_t c)
if (HCL_CHAR_TO_NUM(c, rn->radix) >= rn->radix)
{
if (is_delimchar(c))
if (is_delim_char(c))
{
if (rn->digit_count == 0)
{
@ -2788,7 +2802,7 @@ static int flx_hmarked_number (hcl_t* hcl, hcl_ooci_t c)
}
else
{
HCL_ASSERT (hcl, !is_delimchar(c));
HCL_ASSERT (hcl, !is_delim_char(c));
ADD_TOKEN_CHAR(hcl, c);
rn->digit_count++;
goto consumed;
@ -2805,7 +2819,7 @@ static int flx_plain_ident (hcl_t* hcl, hcl_ooci_t c) /* identifier */
{
hcl_flx_pi_t* pi = FLX_PI(hcl);
if (is_delimchar(c)) /* [NOTE] . is one of the delimiter character */
if (is_delim_char(c)) /* [NOTE] . is one of the delimiter character */
{
hcl_oow_t start;
hcl_oocs_t seg;
@ -2813,13 +2827,21 @@ static int flx_plain_ident (hcl_t* hcl, hcl_ooci_t c) /* identifier */
if (pi->seg_len == 0)
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_MSEGIDENT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), "blank segment");
hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), HCL_NULL,
"blank segment after '%.*js'", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1;
}
start = TOKEN_NAME_LEN(hcl) - pi->seg_len;
seg.ptr = &TOKEN_NAME_CHAR(hcl, start);
seg.len = pi->seg_len;
if (seg.ptr[seg.len - 1] == '-')
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
"'%c' prohibited as last character of identifier or identifier segment",
seg.ptr[seg.len - 1]);
return -1;
}
tok_type = classify_ident_token(hcl, &seg);
if (tok_type != HCL_TOK_IDENT)
{
@ -2830,6 +2852,7 @@ static int flx_plain_ident (hcl_t* hcl, hcl_ooci_t c) /* identifier */
}
else
{
/* for example, if.if.abc - flag the error after having consumed all the segments */
pi->non_ident_seg_count++;
pi->last_non_ident_type = tok_type;
}
@ -2856,7 +2879,7 @@ static int flx_plain_ident (hcl_t* hcl, hcl_ooci_t c) /* identifier */
}
else
{
hcl_setsynerr (hcl, HCL_SYNERR_MSEGIDENT, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), TOKEN_NAME(hcl), "wrong multi-segment identifier");
return -1;
}
}
@ -2868,13 +2891,32 @@ static int flx_plain_ident (hcl_t* hcl, hcl_ooci_t c) /* identifier */
FEED_WRAP_UP (hcl, tok_type);
goto not_consumed;
}
else
else if (is_ident_char(c))
{
if (pi->seg_len == 0)
{
if (!is_lead_ident_char(c))
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), HCL_NULL,
"'%c' prohibited as first character of identifier or identifier segment after '%.*js'",
c, TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1;
}
}
ADD_TOKEN_CHAR(hcl, c);
pi->char_count++;
pi->seg_len++;
goto consumed;
}
else
{
hcl_setsynerrbfmt (
hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), HCL_NULL,
"invalid identifier character '%jc' after '%.*js'", c,
TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1;
}
consumed:
return 1;
@ -2883,7 +2925,7 @@ not_consumed:
return 0;
}
static int flx_binop (hcl_t* hcl, hcl_ooci_t c) /* identifier */
static int flx_binop (hcl_t* hcl, hcl_ooci_t c) /* binary operator/selector */
{
#if 0
hcl_flx_binop_t* binop = FLX_BINOP(hcl);
@ -3496,6 +3538,13 @@ int hcl_feedpending (hcl_t* hcl)
return !(hcl->c->r.st == HCL_NULL && FLX_STATE(hcl) == HCL_FLX_START);
}
void hcl_resetfeedloc (hcl_t* hcl)
{
hcl->c->feed.lx.loc.line = 1;
hcl->c->feed.lx.loc.colm = 1;
hcl->c->feed.lx.loc.file = HCL_NULL;
}
int hcl_feed (hcl_t* hcl, const hcl_ooch_t* data, hcl_oow_t len)
{
/* TODO: need to return the number of processed characters?
@ -3518,7 +3567,11 @@ int hcl_feed (hcl_t* hcl, const hcl_ooch_t* data, hcl_oow_t len)
for (i = 0; i < len; )
{
x = feed_char(hcl, data[i]);
if (x <= -1) goto oops; /* TODO: return the number of processed characters via an argument? */
if (x <= -1)
{
feed_update_lx_loc (hcl, data[i]); /* update the location upon an error too */
goto oops; /* TODO: return the number of processed characters via an argument? */
}
if (x > 0)
{

View File

@ -6,7 +6,12 @@ x := (+ 10 20) "aaaa"; ##ERROR: syntax error - too many rvalues
##
x := (10 +); ##ERROR: syntax error - no operand after binary operator
x := (10 +); ##ERROR: syntax error - no operand after binary selector
---
10 + ##ERROR: syntax error - no operand after binary selector '+'
---
@ -82,8 +87,48 @@ defun :* fun1() { ##ERROR: syntax error - function name not symbol in defun
---
(10 + 20 30) ##ERROR: syntax error - too many operands
(10 + 20 30) ##ERROR: syntax error - redundant operand '30'
---
#**a ##ERROR: syntax error - invalid binary selector character 'a' after #**
---
abc- := 20 ##ERROR: syntax error - '-' prohibited as last character of identifier or identifier segment
---
self.g- := 20 ##ERROR: syntax error - '-' prohibited as last character of identifier or identifier segment
---
self.-g := 20 ##ERROR: syntax error - '-' prohibited as first character of identifier or identifier segment
---
if.abc := 20 ##ERROR: syntax error - wrong multi-segment identifier
---
abc. := 20 ##ERROR: syntax error - blank segment after 'abc.'
---
abc.? := 20 ##ERROR: syntax error - '?' prohibited as first character of identifier or identifier segment after 'abc.'
---
- := 20 ##ERROR: syntax error - bad lvalue - invalid element - -
---
+++ := 20 ##ERROR: syntax error - bad lvalue - invalid element - +++
##---
##1 + + +
##ASSERTION FAILURE: rstl->count == 3 at ../../../lib/read.c:1001
#
#
#HCL> 1 + 2 3 + 4
##ASSERTION FAILURE: rstl->count == 3 at ../../../lib/read.c:1002

View File

@ -1,5 +1,6 @@
## test class instantiation methods
fun UndefinedObject: ~= (oprnd) { return (nqv? self oprnd) } ## for if (a ~= nil) ...
fun Number: + (oprnd) { return (+ self oprnd) }
fun Number: - (oprnd) { return (- self oprnd) }
fun Number: * (oprnd) { return (* self oprnd) }
@ -44,14 +45,20 @@ class B :: A [ d e f ] {
};
};
a := ((B:newInstance 1 2 3):sum);
a := ((B:newInstance 1 2 3):sum)
if (a ~= 18) { printf "ERROR: a must be 18\n"; } \
else { printf "OK %d\n" a; };
else { printf "OK %d\n" a; }
b := (B:newInstance 2 3 4);
a := (b:get-a);
b := (B:newInstance 2 3 4)
a := (b:get-a)
if (a ~= 4) {printf "ERROR: a must be 4\n" } \
else { printf "OK %d\n" a };
else { printf "OK %d\n" a }
c := (object-new A)
a := (c:get-a)
if (a ~= nil) {printf "ERROR: a must be nil\n" } \
else { printf "OK %O\n" a }
a := (b:get-b);
if (a ~= 6) { printf "ERROR: a must be 6\n" } \

View File

@ -1 +1 @@
(1:) ##ERROR: syntax error - missing message after :
(1:) ##ERROR: syntax error - missing message after receiver

View File

@ -54,11 +54,12 @@ class A [ + ] { ##ERROR: syntax error - not variable name - +
---
fun xxx(x :: p q) { p := (x + 1); q := (x + 2) }
[a,[b]] := (xxx 20) ##ERROR: syntax error - invalid lvalue - not symbol in tuple
[a,[b]] := (xxx 20) ##ERROR: syntax error - bad lvalue - invalid element in tuple
printf "%d %d\n" a b
---
20 := 90 ##ERROR: syntax error - invalid lvalue - not symbol - 20
20 := 90 ##ERROR: syntax error - bad lvalue - invalid element - 20
---
@ -66,4 +67,8 @@ printf "%d %d\n" a b
---
[] := 10 ##ERROR: syntax error - invalid lvalue
[] := 10 ##ERROR: syntax error - bad lvalue - blank expression
---
+ + 100 ##ERROR: exception not handled - "unable to send + to #<PRIM> - '+' not found in Primitive"