From 7d9e1b4c0c14779a5f7754816ec17565e957fb5b Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Thu, 5 Sep 2024 01:12:20 +0900 Subject: [PATCH] renamed #include to $include. introduced the symbol literal --- lib/cnode.c | 5 +++ lib/comp.c | 5 +++ lib/err.c | 1 + lib/hcl-prv.h | 15 ++++++- lib/hcl.h | 1 + lib/print.c | 22 +++++++++- lib/read.c | 108 ++++++++++++++++++++++++++++++++++++-------------- 7 files changed, 125 insertions(+), 32 deletions(-) diff --git a/lib/cnode.c b/lib/cnode.c index b57587a..7efe202 100644 --- a/lib/cnode.c +++ b/lib/cnode.c @@ -153,6 +153,11 @@ hcl_cnode_t* hcl_makecnodebstrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, return hcl_makecnode(hcl, HCL_CNODE_BSTRLIT, flags, loc, tok); } +hcl_cnode_t* hcl_makecnodesymlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok) +{ + return hcl_makecnode(hcl, HCL_CNODE_SYMLIT, flags, loc, tok); +} + hcl_cnode_t* hcl_makecnodenumlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok) { return hcl_makecnode(hcl, HCL_CNODE_NUMLIT, flags, loc, tok); diff --git a/lib/comp.c b/lib/comp.c index bfd41ef..c83fcd5 100644 --- a/lib/comp.c +++ b/lib/comp.c @@ -4875,6 +4875,11 @@ redo: if (HCL_UNLIKELY(!lit)) return -1; goto literal; + case HCL_CNODE_SYMLIT: + lit = hcl_makesymbol(hcl, HCL_CNODE_GET_TOKPTR(oprnd), HCL_CNODE_GET_TOKLEN(oprnd)); + if (HCL_UNLIKELY(!lit)) return -1; + goto literal; + case HCL_CNODE_NUMLIT: lit = string_to_num(hcl, HCL_CNODE_GET_TOK(oprnd), HCL_CNODE_GET_LOC(oprnd), 0); if (HCL_UNLIKELY(!lit)) return -1; diff --git a/lib/err.c b/lib/err.c index 22407c8..a3baa1a 100644 --- a/lib/err.c +++ b/lib/err.c @@ -101,6 +101,7 @@ static char* synerrstr[] = "comment not closed", "wrong character literal", "wrong string literal", + "wrong symbol literal", "invalid hashed literal", "invalid numeric literal", "out of integer range", diff --git a/lib/hcl-prv.h b/lib/hcl-prv.h index 5535fdf..7731e49 100644 --- a/lib/hcl-prv.h +++ b/lib/hcl-prv.h @@ -282,12 +282,13 @@ enum hcl_tok_type_t { HCL_TOK_EOF, - /* the following 4 items must be in this order for code - * in flx_quote_token() in read.c */ + /* the following 5 items must be in this order for code + * in flx_quoted_token() in read.c */ HCL_TOK_CHARLIT, HCL_TOK_BCHRLIT, HCL_TOK_STRLIT, HCL_TOK_BSTRLIT, + HCL_TOK_SYMLIT, HCL_TOK_NUMLIT, HCL_TOK_RADNUMLIT, @@ -386,6 +387,7 @@ enum hcl_cnode_type_t HCL_CNODE_DSYMBOL, /* dotted symbol */ HCL_CNODE_STRLIT, HCL_CNODE_BSTRLIT, + HCL_CNODE_SYMLIT, HCL_CNODE_NUMLIT, HCL_CNODE_RADNUMLIT, HCL_CNODE_FPDECLIT, @@ -745,6 +747,12 @@ struct hcl_flx_dt_t int col_next; }; +typedef struct hcl_flx_di_t hcl_flx_di_t; /* dollared-signed identifier */ +struct hcl_flx_di_t +{ + /* state data */ + hcl_oow_t char_count; +}; typedef struct hcl_flx_hc_t hcl_flx_hc_t; /* hash-marked character like #\, #\newline */ struct hcl_flx_hc_t @@ -849,6 +857,7 @@ enum hcl_flx_state_t HCL_FLX_BACKSLASHED, HCL_FLX_COMMENT, HCL_FLX_DELIM_TOKEN, + HCL_FLX_DOLLARED_IDENT, HCL_FLX_HMARKED_TOKEN, /* hash-marked token */ HCL_FLX_HMARKED_B, /* #b - intermediate state before #b[ or #b-radixed binary number */ HCL_FLX_HMARKED_CHAR, /* hash-marked character that begins with #\ */ @@ -942,6 +951,7 @@ struct hcl_compiler_t union { hcl_flx_dt_t dt; /* delimiter token */ + hcl_flx_di_t di; /* dollar-signed identifier */ hcl_flx_hc_t hc; /* hash-marked character */ hcl_flx_hi_t hi; /* hash-marked identifier */ hcl_flx_hb_t hb; /* #b ... */ @@ -2009,6 +2019,7 @@ hcl_cnode_t* hcl_makecnodesymbol (hcl_t* hcl, int flags, const hcl_loc_t* loc, c hcl_cnode_t* hcl_makecnodedsymbol (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok, int is_cla); hcl_cnode_t* hcl_makecnodestrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); hcl_cnode_t* hcl_makecnodebstrlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); +hcl_cnode_t* hcl_makecnodesymlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); hcl_cnode_t* hcl_makecnodenumlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); hcl_cnode_t* hcl_makecnoderadnumlit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); hcl_cnode_t* hcl_makecnodefpdeclit (hcl_t* hcl, int flags, const hcl_loc_t* loc, const hcl_oocs_t* tok); diff --git a/lib/hcl.h b/lib/hcl.h index f461325..ce8dccd 100644 --- a/lib/hcl.h +++ b/lib/hcl.h @@ -105,6 +105,7 @@ enum hcl_synerrnum_t HCL_SYNERR_CMTNC, /* comment not closed */ HCL_SYNERR_CHARLIT, /* wrong character literal */ HCL_SYNERR_STRLIT, /* wrong string literal */ + HCL_SYNERR_SYMLIT, /* wrong symbol literal */ HCL_SYNERR_HASHLIT, /* wrong hashed literal */ HCL_SYNERR_NUMLIT , /* invalid numeric literal */ HCL_SYNERR_NUMRANGE, /* number range error */ diff --git a/lib/print.c b/lib/print.c index f437418..fa39f42 100644 --- a/lib/print.c +++ b/lib/print.c @@ -384,7 +384,8 @@ next: /* Any needs for special action if SYNT(obj) is true? * I simply treat the syntax symbol as a normal symbol * for printing currently. */ - if (hcl_bfmt_out(hcl, fmtout, "%.*js", HCL_OBJ_GET_SIZE(obj), HCL_OBJ_GET_CHAR_SLOT(obj)) <= -1) return -1; + /* TODO: escaping if needed */ + if (hcl_bfmt_out(hcl, fmtout, "#\"%.*js\"", HCL_OBJ_GET_SIZE(obj), HCL_OBJ_GET_CHAR_SLOT(obj)) <= -1) return -1; break; case HCL_BRAND_STRING: @@ -808,16 +809,35 @@ void hcl_dumpcnode (hcl_t* hcl, hcl_cnode_t* cnode, int newline) case HCL_CNODE_DSYMBOL: case HCL_CNODE_STRLIT: case HCL_CNODE_BSTRLIT: + case HCL_CNODE_SYMLIT: case HCL_CNODE_NUMLIT: case HCL_CNODE_RADNUMLIT: case HCL_CNODE_FPDECLIT: case HCL_CNODE_SMPTRLIT: case HCL_CNODE_ERRLIT: + case HCL_CNODE_NIL: case HCL_CNODE_TRUE: case HCL_CNODE_FALSE: case HCL_CNODE_SELF: case HCL_CNODE_SUPER: + + case HCL_CNODE_CLASS: + case HCL_CNODE_FUN: + case HCL_CNODE_DO: + case HCL_CNODE_IF: + case HCL_CNODE_ELIF: + case HCL_CNODE_ELSE: + case HCL_CNODE_THROW: + case HCL_CNODE_TRY: + case HCL_CNODE_CATCH: + case HCL_CNODE_BREAK: + case HCL_CNODE_CONTINUE: + case HCL_CNODE_UNTIL: + case HCL_CNODE_WHILE: + case HCL_CNODE_RETURN: + case HCL_CNODE_REVERT: + case HCL_CNODE_ELLIPSIS: case HCL_CNODE_TRPCOLONS: case HCL_CNODE_DBLCOLONS: diff --git a/lib/read.c b/lib/read.c index 958c587..3956a94 100644 --- a/lib/read.c +++ b/lib/read.c @@ -38,8 +38,8 @@ static struct voca_t hcl_ooch_t str[11]; } vocas[] = { - { 8, { '#','i','n','c','l','u','d','e' } }, - { 7, { '#','p','r','a','g','m','a' } }, + { 8, { '$','i','n','c','l','u','d','e' } }, + { 7, { '$','p','r','a','g','m','a' } }, { 11, { '#','\\','b','a','c','k','s','p','a','c','e' } }, { 10, { '#','\\','l','i','n','e','f','e','e','d' } }, @@ -85,10 +85,10 @@ static struct voca_t { 4, { '#','[',' ',']' /* ARRAY */ } }, { 5, { '#','b','[',' ',']' /* BYTE ARRAY */ } }, { 5, { '#','c','[',' ',']' /* CHAR ARRAY */ } }, - { 4, { '#','{',' ','}' } }, - { 4, { '#','(',' ',')' } }, + { 4, { '#','{',' ','}' /* DICTIONARY */ } }, + { 4, { '#','(',' ',')' /* QLIST */ } }, { 3, { '[',' ',']' /* TUPLE */ } }, - { 3, { '|',' ','|' } }, + { 3, { '|',' ','|' /* VLIST */ } }, { 5, { '<','E','O','L','>' } }, { 5, { '<','E','O','F','>' } } @@ -1107,17 +1107,6 @@ static int chain_to_list (hcl_t* hcl, hcl_cnode_t* obj, hcl_loc_t* loc) fake_tok_ptr = &fake_tok; } -#if 0 -/* TODO: remove this part ... */ - if (list_concode == HCL_CONCODE_TUPLE && concode != HCL_CONCODE_TUPLE && - (!HCL_CNODE_IS_SYMBOL_PLAIN(obj) || HCL_CNODE_IS_SYMBOL_PLAIN_BINOP(obj))) - { - /* a tuple must contain some simple symbol names or nested tuples only */ - hcl_setsynerrbfmt (hcl, HCL_SYNERR_VARNAME, HCL_CNODE_GET_LOC(obj), HCL_CNODE_GET_TOK(obj), "invalid name - not symbol in tuple"); - return -1; - } -#endif - cons = hcl_makecnodecons(hcl, 0, (loc? loc: HCL_CNODE_GET_LOC(obj)), fake_tok_ptr, obj, HCL_NULL); if (HCL_UNLIKELY(!cons)) return -1; @@ -1871,6 +1860,18 @@ static int feed_process_token (hcl_t* hcl) frd->obj = hcl_makecnodebchrlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl), (hcl_oob_t)TOKEN_NAME_CHAR(hcl, 0)); goto auto_xlist; + case HCL_TOK_STRLIT: + frd->obj = hcl_makecnodestrlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + goto auto_xlist; + + case HCL_TOK_BSTRLIT: + frd->obj = hcl_makecnodebstrlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + goto auto_xlist; + + case HCL_TOK_SYMLIT: + frd->obj = hcl_makecnodesymlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + goto auto_xlist; + case HCL_TOK_NUMLIT: frd->obj = hcl_makecnodenumlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); goto auto_xlist; @@ -1889,14 +1890,6 @@ static int feed_process_token (hcl_t* hcl) break; */ - case HCL_TOK_STRLIT: - frd->obj = hcl_makecnodestrlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); - goto auto_xlist; - - case HCL_TOK_BSTRLIT: - frd->obj = hcl_makecnodebstrlit(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); - goto auto_xlist; - case HCL_TOK_IDENT: ident: frd->obj = hcl_makecnodesymbol(hcl, 0, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); @@ -2117,6 +2110,7 @@ static int feed_continue_with_char (hcl_t* hcl, hcl_ooci_t c, hcl_flx_state_t st /* short-cuts to lexer state data */ #define FLX_DT(hcl) (&((hcl)->c->feed.lx.u.dt)) +#define FLX_DI(hcl) (&((hcl)->c->feed.lx.u.di)) #define FLX_HC(hcl) (&((hcl)->c->feed.lx.u.hc)) #define FLX_HI(hcl) (&((hcl)->c->feed.lx.u.hi)) #define FLX_HB(hcl) (&((hcl)->c->feed.lx.u.hb)) @@ -2128,6 +2122,11 @@ static int feed_continue_with_char (hcl_t* hcl, hcl_ooci_t c, hcl_flx_state_t st #define FLX_ST(hcl) (&((hcl)->c->feed.lx.u.st)) #define FLX_BCP(hcl) (&((hcl)->c->feed.lx.u.bcp)) +static HCL_INLINE void init_flx_di (hcl_flx_di_t* di) +{ + HCL_MEMSET (di, 0, HCL_SIZEOF(*di)); +} + static HCL_INLINE void init_flx_hc (hcl_flx_hc_t* hc) { HCL_MEMSET (hc, 0, HCL_SIZEOF(*hc)); @@ -2251,6 +2250,11 @@ static int flx_start (hcl_t* hcl, hcl_ooci_t c) goto consumed; */ + case '$': + init_flx_di (FLX_DI(hcl)); + FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_DOLLARED_IDENT); + goto consumed; + case '#': /* no state date to initialize. just change the state */ FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_HMARKED_TOKEN); @@ -2376,6 +2380,50 @@ not_consumed: return 0; } +static int flx_dollared_ident (hcl_t* hcl, hcl_ooci_t c) +{ + hcl_flx_di_t* di = FLX_DI(hcl); + + /* di->char_count doesn't include the first '$' */ + + if (is_delimchar(c)) + { + hcl_tok_type_t tok_type; + + if (di->char_count == 0) + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILTOK, FLX_LOC(hcl), HCL_NULL, + "no valid character after dollar sign"); + return -1; + } + + if (get_directive_token_type(hcl, &tok_type) <= -1) + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), TOKEN_NAME(hcl), + "invalid dollar-signed literal %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); + return -1; + } + else + { + FEED_WRAP_UP (hcl, tok_type); + goto not_consumed; + } + } + else + { + ident_char: + ADD_TOKEN_CHAR (hcl, c); + di->char_count++; + goto consumed; + } + +consumed: + return 1; + +not_consumed: + return 0; +} + static int flx_hmarked_token (hcl_t* hcl, hcl_ooci_t c) { /* @@ -2467,11 +2515,11 @@ static int flx_hmarked_token (hcl_t* hcl, hcl_ooci_t c) FEED_WRAP_UP_WITH_CHAR (hcl, c, HCL_TOK_DLPAREN); goto consumed; -#if 0 case '"': /* #" */ - FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_TOK_HMARKED_SYMBOL); /* symbol lieral */ + reset_flx_token (hcl); + init_flx_qt (FLX_QT(hcl), HCL_TOK_SYMLIT, HCL_SYNERR_SYMLIT, c, '\\', 0, HCL_TYPE_MAX(hcl_oow_t), 0); + FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard prefix, quote and move on */ goto consumed; -#endif /* --------------------------- */ default: @@ -2948,8 +2996,9 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */ /* qt->tok_type + qt->is_byte assumes that the token types for * byte-string and byte-character literals are 1 greater than - * string and character literals. * see the definition of - * hcl_tok_type_t in hcl-prv.h */ + * string and character literals. see the definition of + * hcl_tok_type_t in hcl-prv.h. + * qt->is_byte is always 0 for HCL_TOK_SYMLIT. */ FEED_WRAP_UP (hcl, (hcl_tok_type_t)(qt->tok_type + qt->is_byte)); /* HCL_TOK_STRLIT or HCL_TOK_CHARLIT */ if (TOKEN_NAME_LEN(hcl) < qt->min_len) goto invalid_token; goto consumed; @@ -3173,6 +3222,7 @@ static int feed_char (hcl_t* hcl, hcl_ooci_t c) case HCL_FLX_BACKSLASHED: return flx_backslashed(hcl, c); case HCL_FLX_COMMENT: return flx_comment(hcl, c); case HCL_FLX_DELIM_TOKEN: return flx_delim_token(hcl, c); + case HCL_FLX_DOLLARED_IDENT: return flx_dollared_ident(hcl, c); case HCL_FLX_HMARKED_TOKEN: return flx_hmarked_token(hcl, c); case HCL_FLX_HMARKED_B: return flx_hmarked_b(hcl, c); case HCL_FLX_HMARKED_CHAR: return flx_hmarked_char(hcl, c);