From f19b9716455c0c1f7f84ac31750a7aabd80ad577 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Mon, 23 Oct 2023 18:49:15 +0900 Subject: [PATCH] removed unneeded reader code. changed { to #{ as a dictionary opener. { will be used as a block opener --- lib/hcl-prv.h | 35 +- lib/print.c | 2 +- lib/read.c | 1029 +------------------------------------------------ 3 files changed, 28 insertions(+), 1038 deletions(-) diff --git a/lib/hcl-prv.h b/lib/hcl-prv.h index 92c0939..dd81a2e 100644 --- a/lib/hcl-prv.h +++ b/lib/hcl-prv.h @@ -112,7 +112,7 @@ # if !defined(HAVE___BUILTIN_MEMSET) || \ !defined(HAVE___BUILTIN_MEMCPY) || \ !defined(HAVE___BUILTIN_MEMMOVE) || \ - !defined(HAVE___BUILTIN_MEMCMP) + !defined(HAVE___BUILTIN_MEMCMP) # include # endif @@ -186,11 +186,12 @@ enum hcl_iotok_type_t HCL_IOTOK_LPAREN, /* ( */ HCL_IOTOK_RPAREN, /* ) */ HCL_IOTOK_LPARCOLON, /* (: */ - HCL_IOTOK_BAPAREN, /* #[ */ - HCL_IOTOK_QLPAREN, /* #( */ - HCL_IOTOK_LBRACK, /* [ */ + HCL_IOTOK_BAPAREN, /* #[ - byte array parenthesis */ + HCL_IOTOK_QLPAREN, /* #( - quoted-list parenthesis */ + HCL_IOTOK_DLPAREN, /* #{ - dictionary parenthese */ + HCL_IOTOK_LBRACK, /* [ - array */ HCL_IOTOK_RBRACK, /* ] */ - HCL_IOTOK_LBRACE, /* { */ + HCL_IOTOK_LBRACE, /* { - block */ HCL_IOTOK_RBRACE, /* } */ HCL_IOTOK_VBAR, /* | */ HCL_IOTOK_EOL, /* end of line */ @@ -318,7 +319,7 @@ struct hcl_var_info_t int type; /* ctx_offset 0 means the current context. * 1 means current->home. - * 2 means current->home->home. + * 2 means current->home->home. * index_in_ctx is a relative index within the context found. */ hcl_oow_t ctx_offset; /* context offset */ @@ -447,7 +448,7 @@ struct hcl_cframe_t } _break; /* COP_COMPILE_CLASS_P1, COP_COMPILE_CLASS_P2 */ - struct + struct { hcl_ooi_t nsuperclasses; hcl_ioloc_t start_loc; @@ -515,7 +516,7 @@ typedef struct hcl_clsblk_info_t hcl_clsblk_info_t; /* reader stack for list reading */ typedef struct hcl_rstl_t hcl_rstl_t; -struct hcl_rstl_t +struct hcl_rstl_t { hcl_cnode_t* head; hcl_cnode_t* tail; @@ -689,7 +690,7 @@ struct hcl_compiler_t hcl_ioloc_t loc; hcl_ioloc_t _oloc; - union + union { hcl_flx_dt_t dt; /* delimiter token */ hcl_flx_hc_t hc; /* hash-marked character */ @@ -748,13 +749,13 @@ struct hcl_compiler_t /* hcl_context_t, hcl_block_t, hcl_function_t stores the local variable information - * + * * Use up to 29 bits in a 32-bit hcl_ooi_t. Exclude the tag bit and the sign bit. * | SIGN | INSTA | VA | NARGS | NRVARS | NLVARS | TAG | * 1 1 8 8 11 2 <= 32 * ----------------------------------------------------------- * Parameters to the MAKE_BLOCK or MAKE_FUNCTION instructions - * | INSTA | VA | NARGS | NRVARS | NLVARS + * | INSTA | VA | NARGS | NRVARS | NLVARS * 1 1 4 4 6 <= 16 (HCL_CODE_LONG_PARAM_SIZE 1, two params) * 1 1 8 8 11 <= 32 (HCL_CODE_LONG_PARAM_SIZE 2, two params, use 29 bits to avoid collection when converted to a smooi) * @@ -762,9 +763,9 @@ struct hcl_compiler_t * NARGS and NRVARS are also used for the CALL and CALL2 instructions. * CALL encodes NARGS in one parameter. * CALLR encodes NARGS in one parameter and NRVARS in another parameter. - * NARGS and NRVARS must not exceed a single parameter size. + * NARGS and NRVARS must not exceed a single parameter size. */ - + #if defined(HCL_CODE_LONG_PARAM_SIZE) && (HCL_CODE_LONG_PARAM_SIZE == 1) # define MAX_CODE_NBLKARGS (0xFu) /* 15 - 4 bits*/ @@ -794,7 +795,7 @@ struct hcl_compiler_t # define GET_BLK_MASK_NARGS(x) (((x) >> 19) & 0xFF) # define GET_BLK_MASK_NRVARS(x) (((x) >> 11) & 0xFF) # define GET_BLK_MASK_NLVARS(x) ((x) & 0x7FF) - + # define MAX_CODE_JUMP (0xFFFFu) # define MAX_CODE_PARAM (0xFFFFu) # define MAX_CODE_PARAM2 (0xFFFFFFFFu) /* 32 bits */ @@ -1096,11 +1097,11 @@ enum hcl_bcode_t HCL_CODE_CALL_X = 0xD4, /* 212 ## */ HCL_CODE_CALL_R = 0xD5, /* 213 ## ##*/ - HCL_CODE_PUSH_RETURN_R = 0xD6, /* 214 */ - HCL_CODE_TRY_ENTER = 0xD7, /* 215 ## */ + HCL_CODE_PUSH_RETURN_R = 0xD6, /* 214 */ + HCL_CODE_TRY_ENTER = 0xD7, /* 215 ## */ HCL_CODE_STORE_INTO_CTXTEMPVAR_X = 0xD8, /* 216 ## */ - HCL_CODE_TRY_ENTER2 = 0xD9, /* 217 ## */ + HCL_CODE_TRY_ENTER2 = 0xD9, /* 217 ## */ HCL_CODE_TRY_EXIT = 0xDA, /* 218 */ HCL_CODE_THROW = 0xDB, /* 219 */ diff --git a/lib/print.c b/lib/print.c index 88eb767..18a90ae 100644 --- a/lib/print.c +++ b/lib/print.c @@ -221,7 +221,7 @@ int hcl_fmt_object_ (hcl_fmtout_t* fmtout, hcl_oop_t obj) { "(:", "(" }, /*HCL_CONCODE_MLIST */ { "[", "[" }, /*HCL_CONCODE_ARRAY */ { "#[", "[" }, /*HCL_CONCODE_BYTEARRAY */ - { "{", "{" }, /*HCL_CONCODE_DIC */ + { "#{", "{" }, /*HCL_CONCODE_DIC */ { "#(", "[" } /*HCL_CONCODE_QLIST */ }; diff --git a/lib/read.c b/lib/read.c index 9f1de4d..2ad9d52 100644 --- a/lib/read.c +++ b/lib/read.c @@ -26,9 +26,6 @@ #include "hcl-prv.h" -static int begin_include (hcl_t* hcl); -static int end_include (hcl_t* hcl); - #define BUFFER_ALIGN 128 #define BALIT_BUFFER_ALIGN 128 #define SALIT_BUFFER_ALIGN 128 @@ -378,15 +375,6 @@ static int copy_string_to (hcl_t* hcl, const hcl_oocs_t* src, hcl_oocs_t* dst, h } while(0) -#define GET_TOKEN(hcl) \ - do { if (get_token(hcl) <= -1) return -1; } while (0) - -#define GET_TOKEN_WITH_ERRRET(hcl, v_ret) \ - do { if (get_token(hcl) <= -1) return v_ret; } while (0) - -#define GET_TOKEN_WITH_GOTO(hcl, goto_label) \ - do { if (get_token(hcl) <= -1) goto goto_label; } while (0) - #define ADD_TOKEN_STR(hcl,s,l) \ do { if (add_token_str(hcl, s, l) <= -1) return -1; } while (0) @@ -525,492 +513,6 @@ static int get_char (hcl_t* hcl) return n; } -static int skip_comment (hcl_t* hcl) -{ - hcl_ooci_t c = hcl->c->lxc.c; - hcl_iolxc_t lc; - - if (c == ';') goto single_line_comment; - if (c != '#') return 0; /* not a comment */ - - /* attempt to handle #! or ## */ - - lc = hcl->c->lxc; /* save the last character */ - GET_CHAR_TO (hcl, c); /* read a following character */ - - if (c == '!' || c == '#') - { - single_line_comment: - do - { - GET_CHAR_TO (hcl, c); - if (c == HCL_OOCI_EOF) - { - break; - } - else if (c == '\r' || c == '\n') - { - GET_CHAR (hcl); /* keep the first meaningful character in lxc */ - break; - } - } - while (1); - - return 1; /* single line comment led by ## or #! or ; */ - } - - /* unget the leading '#' */ - unget_char (hcl, &hcl->c->lxc); - /* restore the previous state */ - hcl->c->lxc = lc; - - return 0; -} - -static int get_string (hcl_t* hcl, hcl_ooch_t end_char, hcl_ooch_t esc_char, int regex, hcl_oow_t preescaped, hcl_synerrnum_t synerr_code) -{ - hcl_ooci_t c; - hcl_oow_t escaped = preescaped; - hcl_oow_t digit_count = 0; - hcl_ooci_t c_acc = 0; - - SET_TOKEN_TYPE (hcl, HCL_IOTOK_STRLIT); - - while (1) - { - GET_CHAR_TO (hcl, c); - - if (c == HCL_OOCI_EOF) - { - hcl_setsynerr (hcl, synerr_code, TOKEN_LOC(hcl) /*LEXER_LOC(hcl)*/, HCL_NULL); - return -1; - } - - if (escaped == 3) - { - if (c >= '0' && c <= '7') - { - /* more octal digits */ - c_acc = c_acc * 8 + c - '0'; - digit_count++; - if (digit_count >= escaped) - { - /* should i limit the max to 0xFF/0377? - * if (c_acc > 0377) c_acc = 0377;*/ - ADD_TOKEN_CHAR (hcl, c_acc); - escaped = 0; - } - continue; - } - else - { - ADD_TOKEN_CHAR (hcl, c_acc); - escaped = 0; - } - } - else if (escaped == 2 || escaped == 4 || escaped == 8) - { - if (c >= '0' && c <= '9') - { - c_acc = c_acc * 16 + c - '0'; - digit_count++; - if (digit_count >= escaped) - { - ADD_TOKEN_CHAR (hcl, c_acc); - escaped = 0; - } - continue; - } - else if (c >= 'A' && c <= 'F') - { - c_acc = c_acc * 16 + c - 'A' + 10; - digit_count++; - if (digit_count >= escaped) - { - ADD_TOKEN_CHAR (hcl, c_acc); - escaped = 0; - } - continue; - } - else if (c >= 'a' && c <= 'f') - { - c_acc = c_acc * 16 + c - 'a' + 10; - digit_count++; - if (digit_count >= escaped) - { - ADD_TOKEN_CHAR (hcl, c_acc); - escaped = 0; - } - continue; - } - else - { - hcl_ooch_t rc; - - rc = (escaped == 2)? 'x': - (escaped == 4)? 'u': 'U'; - if (digit_count == 0) - ADD_TOKEN_CHAR (hcl, rc); - else ADD_TOKEN_CHAR (hcl, c_acc); - - escaped = 0; - } - } - - if (escaped == 0 && c == end_char) - { - /* terminating quote */ - /* GET_CHAR (hcl); */ - break; - } - - if (escaped == 0 && c == esc_char) - { - escaped = 1; - continue; - } - - if (escaped == 1) - { - if (c == 'a') c = '\a'; - else if (c == 'b') c = '\b'; - else if (c == 'f') c = '\f'; - else if (c == 'n') c = '\n'; - else if (c == 'r') c = '\r'; - else if (c == 't') c = '\t'; - else if (c == 'v') c = '\v'; - else if (c >= '0' && c <= '7' && !regex) - { - /* i don't support the octal notation for a regular expression. - * it conflicts with the backreference notation between \1 and \7 inclusive. */ - escaped = 3; - digit_count = 1; - c_acc = c - '0'; - continue; - } - else if (c == 'x') - { - escaped = 2; - digit_count = 0; - c_acc = 0; - continue; - } - #if (HCL_SIZEOF_OOCH_T >= 2) - else if (c == 'u') - { - escaped = 4; - digit_count = 0; - c_acc = 0; - continue; - } - #endif - #if (HCL_SIZEOF_OOCH_T >= 4) - else if (c == 'U') - { - escaped = 8; - digit_count = 0; - c_acc = 0; - continue; - } - #endif - else if (regex) - { - /* if the following character doesn't compose a proper - * escape sequence, keep the escape character. - * an unhandled escape sequence can be handled - * outside this function since the escape character - * is preserved.*/ - ADD_TOKEN_CHAR (hcl, esc_char); - } - - escaped = 0; - } - - ADD_TOKEN_CHAR (hcl, c); - } - - return 0; -} - -static int get_radixed_number (hcl_t* hcl, hcl_ooci_t rc, int radix) -{ - hcl_ooci_t c; - - ADD_TOKEN_CHAR (hcl, '#'); - ADD_TOKEN_CHAR (hcl, rc); - - GET_CHAR_TO (hcl, c); - - if (CHAR_TO_NUM(c, radix) >= radix) - { - hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), - "no digit after radix specifier in %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr); - return -1; - } - - do - { - ADD_TOKEN_CHAR(hcl, c); - GET_CHAR_TO (hcl, c); - } - while (CHAR_TO_NUM(c, radix) < radix); - - if (!is_delimchar(c)) - { - /* collect more characters to form a complete token for the error message below */ - do - { - ADD_TOKEN_CHAR(hcl, c); - GET_CHAR_TO (hcl, c); - } - while (!is_delimchar(c)); - - hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), - "invalid digit in radixed number in %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr); - return -1; - } - - unget_char (hcl, &hcl->c->lxc); - SET_TOKEN_TYPE (hcl, HCL_IOTOK_RADNUMLIT); - - return 0; -} - -static int get_hmarked_token (hcl_t* hcl) -{ - hcl_ooci_t c; - int radix; - - HCL_ASSERT (hcl, hcl->c->lxc.c == '#'); - - GET_CHAR_TO (hcl, c); - - /* - * #xXXXX hexadecimal - * #oOOOO octal - * #bBBBB binary - * #eDDD error - * #pHHH smptr - * #\C character - * #\xHHHH unicode character - * #\UHHHH unicode character - * #\uHHHH unicode character - * #\backspace - * #\linefeed - * #\newline - * #\nul - * #\page - * #\return - * #\rubout - * #\space - * #\tab - * #\vtab - * #include - * #[ ] byte array - * #( ) qlist - */ - - switch (c) - { - case 'x': - radix = 16; - goto radixnum; - case 'o': - radix = 8; - goto radixnum; - case 'b': - radix = 2; - radixnum: - if (get_radixed_number (hcl, c, radix) <= -1) return -1; - break; - - case 'e': - if (get_radixed_number(hcl, c, 10) <= -1) return -1; - SET_TOKEN_TYPE (hcl, HCL_IOTOK_ERRLIT); - break; - - case 'p': - if (get_radixed_number(hcl, c, 16) <= -1) return -1; - SET_TOKEN_TYPE (hcl, HCL_IOTOK_SMPTRLIT); - break; - - case '\\': /* character literal */ - ADD_TOKEN_CHAR (hcl, '#'); - ADD_TOKEN_CHAR (hcl, '\\'); - - GET_CHAR_TO (hcl, c); - if (is_delimchar(c)) - { - hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), - "no valid character after #\\ in %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr); - return -1; - } - - SET_TOKEN_TYPE (hcl, HCL_IOTOK_CHARLIT); - do - { - ADD_TOKEN_CHAR (hcl, c); - GET_CHAR_TO (hcl, c); - } - while (!is_delimchar(c)); - - if (TOKEN_NAME_LEN(hcl) >= 4) - { - int max_digit_count = 0; - - if (TOKEN_NAME_CHAR(hcl, 2) == 'x') - { - hcl_oow_t i; - max_digit_count = 2; - - hexcharlit: - if (TOKEN_NAME_LEN(hcl) - 3 > max_digit_count) - { - hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), - "invalid hexadecimal character in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); - return -1; - } - c = 0; - for (i = 3; i < TOKEN_NAME_LEN(hcl); i++) - { - if (!is_xdigitchar(TOKEN_NAME_CHAR(hcl, i))) - { - hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), - "invalid hexadecimal character in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); - return -1; - } - c = c * 16 + CHAR_TO_NUM(TOKEN_NAME_CHAR(hcl, i), 16); /* don't care if it is for 'p' */ - } - } - #if (HCL_SIZEOF_OOCH_T >= 2) - else if (TOKEN_NAME_CHAR(hcl, 2) == 'u') - { - max_digit_count = 4; - goto hexcharlit; - } - #endif - #if (HCL_SIZEOF_OOCH_T >= 4) - else if (TOKEN_NAME_CHAR(hcl, 2) == 'U') - { - max_digit_count = 8; - goto hexcharlit; - } - #endif - else if (does_token_name_match(hcl, VOCA_BACKSPACE)) - { - c = '\b'; - } - else if (does_token_name_match(hcl, VOCA_LINEFEED)) - { - c = '\n'; - } - else if (does_token_name_match(hcl, VOCA_NEWLINE)) - { - /* TODO: convert it to host newline convention. how to handle if it's composed of 2 letters like \r\n? */ - c = '\n'; - } - else if (does_token_name_match(hcl, VOCA_NUL)) /* null character. not #nil */ - { - c = '\0'; - } - else if (does_token_name_match(hcl, VOCA_PAGE)) - { - c = '\f'; - } - else if (does_token_name_match(hcl, VOCA_RETURN)) - { - c = '\r'; - } - else if (does_token_name_match(hcl, VOCA_RUBOUT)) - { - c = '\x7F'; /* DEL */ - } - else if (does_token_name_match(hcl, VOCA_SPACE)) - { - c = ' '; - } - else if (does_token_name_match(hcl, VOCA_TAB)) - { - c = '\t'; - } - else if (does_token_name_match(hcl, VOCA_VTAB)) - { - c = '\v'; - } - else - { - hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), - "invalid character literal %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr); - return -1; - } - } - else - { - HCL_ASSERT (hcl, TOKEN_NAME_LEN(hcl) == 3); - c = TOKEN_NAME_CHAR(hcl, 2); - } - - /* reset the token name to the converted character */ - if (hcl->c->tok.type == HCL_IOTOK_CHARLIT) - { - CLEAR_TOKEN_NAME (hcl); - ADD_TOKEN_CHAR (hcl, c); - } - - unget_char (hcl, &hcl->c->lxc); - break; - - case '[': /* #[ - byte array opener */ - ADD_TOKEN_CHAR (hcl, '#'); - ADD_TOKEN_CHAR(hcl, c); - SET_TOKEN_TYPE (hcl, HCL_IOTOK_BAPAREN); - break; - - case '(': /* #( - qlist opener */ - ADD_TOKEN_CHAR (hcl, '#'); - ADD_TOKEN_CHAR(hcl, c); - SET_TOKEN_TYPE (hcl, HCL_IOTOK_QLPAREN); - break; - - default: - { - hcl_iotok_type_t tok_type; - - if (is_delimchar(c)) - { - /* EOF, whitespace, etc */ - hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), - "invalid hash-marked literal #%jc", c); - return -1; - } - - ADD_TOKEN_CHAR (hcl, '#'); - do - { - ADD_TOKEN_CHAR (hcl, c); - GET_CHAR_TO (hcl, c); - } - while (!is_delimchar(c)); - - if (get_directive_token_type(hcl, &tok_type) <= -1) - { - hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), - "invalid hash-marked literal %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr); - return -1; - } - else - { - SET_TOKEN_TYPE (hcl, tok_type); - } - - unget_char (hcl, &hcl->c->lxc); - break; - } - } - - return 0; -} - static hcl_iotok_type_t classify_ident_token (hcl_t* hcl, const hcl_oocs_t* v) { hcl_oow_t i; @@ -1036,363 +538,6 @@ static hcl_iotok_type_t classify_ident_token (hcl_t* hcl, const hcl_oocs_t* v) return HCL_IOTOK_IDENT; } -static int get_token (hcl_t* hcl) -{ - hcl_ooci_t c, oldc; - int n; - -retry: - GET_CHAR (hcl); - - do - { - /* skip spaces */ - while (is_spacechar(hcl->c->lxc.c)) GET_CHAR (hcl); - /* the first character after the last space is in hcl->c->lxc */ - if ((n = skip_comment(hcl)) <= -1) return -1; - } - while (n >= 1); - - /* clear the token name, reset its location */ - SET_TOKEN_TYPE (hcl, HCL_IOTOK_EOF); /* is it correct? */ - CLEAR_TOKEN_NAME (hcl); - SET_TOKEN_LOC (hcl, LEXER_LOC(hcl)); - - c = hcl->c->lxc.c; - - switch (c) - { - case HCL_OOCI_EOF: - { - int n; - - n = end_include(hcl); - if (n <= -1) return -1; - if (n >= 1) goto retry; - - SET_TOKEN_TYPE (hcl, HCL_IOTOK_EOF); - ADD_TOKEN_STR(hcl, vocas[VOCA_EOF].str, vocas[VOCA_EOF].len); - break; - } - - case '(': - oldc = c; - GET_CHAR_TO (hcl, c); - if(c == ':') - { - SET_TOKEN_TYPE (hcl, HCL_IOTOK_LPARCOLON); - ADD_TOKEN_CHAR (hcl, oldc); - ADD_TOKEN_CHAR (hcl, c); - break; - } - else - { - unget_char (hcl, &hcl->c->lxc); - } - c = oldc; - - ADD_TOKEN_CHAR(hcl, c); - SET_TOKEN_TYPE (hcl, HCL_IOTOK_LPAREN); - break; - - case ')': - ADD_TOKEN_CHAR(hcl, c); - SET_TOKEN_TYPE (hcl, HCL_IOTOK_RPAREN); - break; - - case '[': - ADD_TOKEN_CHAR(hcl, c); - SET_TOKEN_TYPE (hcl, HCL_IOTOK_LBRACK); - break; - - case ']': - ADD_TOKEN_CHAR(hcl, c); - SET_TOKEN_TYPE (hcl, HCL_IOTOK_RBRACK); - break; - - case '{': - ADD_TOKEN_CHAR(hcl, c); - SET_TOKEN_TYPE (hcl, HCL_IOTOK_LBRACE); - break; - - case '}': - ADD_TOKEN_CHAR (hcl, c); - SET_TOKEN_TYPE (hcl, HCL_IOTOK_RBRACE); - break; - - case '|': - ADD_TOKEN_CHAR (hcl, c); - SET_TOKEN_TYPE (hcl, HCL_IOTOK_VBAR); - break; - - case '.': - oldc = c; - GET_CHAR_TO (hcl, c); - if(c == '.') - { - hcl_iolxc_t sd; - hcl_ooci_t oldc2; - - sd = hcl->c->lxc; - - oldc2 = c; - GET_CHAR_TO (hcl, c); - if (c == '.') - { - SET_TOKEN_TYPE (hcl, HCL_IOTOK_ELLIPSIS); - ADD_TOKEN_CHAR (hcl, oldc); - ADD_TOKEN_CHAR (hcl, oldc2); - ADD_TOKEN_CHAR (hcl, c); - break; - } - - unget_char (hcl, &hcl->c->lxc); - unget_char (hcl, &sd); - } - else - { - unget_char (hcl, &hcl->c->lxc); - } - c = oldc; - - SET_TOKEN_TYPE (hcl, HCL_IOTOK_DOT); - ADD_TOKEN_CHAR (hcl, c); - break; - - case ',': - SET_TOKEN_TYPE (hcl, HCL_IOTOK_COMMA); - ADD_TOKEN_CHAR (hcl, c); - break; - - case ':': - oldc = c; - GET_CHAR_TO (hcl, c); - if(c == ':') - { - hcl_iolxc_t sd; - hcl_ooci_t oldc2; - - sd = hcl->c->lxc; - - oldc2 = c; - GET_CHAR_TO (hcl, c); - if (c == ':') - { - SET_TOKEN_TYPE (hcl, HCL_IOTOK_TRPCOLONS); - ADD_TOKEN_CHAR (hcl, oldc); - ADD_TOKEN_CHAR (hcl, oldc2); - ADD_TOKEN_CHAR (hcl, c); - break; - } - else if (c == '*') - { - /* double-cloned star */ - SET_TOKEN_TYPE (hcl, HCL_IOTOK_DCSTAR); - ADD_TOKEN_CHAR (hcl, oldc); - ADD_TOKEN_CHAR (hcl, oldc2); - ADD_TOKEN_CHAR (hcl, c); - break; - } - - unget_char (hcl, &hcl->c->lxc); - unget_char (hcl, &sd); - } - else - { - unget_char (hcl, &hcl->c->lxc); - } - c = oldc; - - SET_TOKEN_TYPE (hcl, HCL_IOTOK_COLON); - ADD_TOKEN_CHAR (hcl, c); - break; - - case '\"': - if (get_string(hcl, '\"', '\\', 0, 0, HCL_SYNERR_STRLIT) <= -1) return -1; - break; - - case '\'': - if (get_string(hcl, '\'', '\\', 0, 0, HCL_SYNERR_CHARLIT) <= -1) return -1; - if (TOKEN_NAME_LEN(hcl) != 1) - { - hcl_setsynerr (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); - return -1; - } - SET_TOKEN_TYPE (hcl, HCL_IOTOK_CHARLIT); - break; - - case '#': - if (get_hmarked_token(hcl) <= -1) return -1; - break; - - case '+': - case '-': - oldc = c; - GET_CHAR_TO (hcl, c); - if(is_digitchar(c)) - { - unget_char (hcl, &hcl->c->lxc); - c = oldc; - goto numlit; - } - else if (c == '#') - { - int radix; - hcl_iolxc_t sharp; - - sharp = hcl->c->lxc; /* back up '#' */ - - GET_CHAR_TO (hcl, c); - switch (c) - { - case 'b': - radix = 2; - goto radnumlit; - case 'o': - radix = 8; - goto radnumlit; - case 'x': - radix = 16; - radnumlit: - ADD_TOKEN_CHAR (hcl, oldc); - if (get_radixed_number(hcl, c, radix) <= -1) return -1; - break; - - default: - unget_char (hcl, &hcl->c->lxc); - unget_char (hcl, &sharp); - c = oldc; - goto ident; - } - } - else - { - unget_char (hcl, &hcl->c->lxc); - c = oldc; - goto ident; - } - break; - - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - numlit: - SET_TOKEN_TYPE (hcl, HCL_IOTOK_NUMLIT); - while (1) - { - ADD_TOKEN_CHAR (hcl, c); - GET_CHAR_TO (hcl, c); - if (TOKEN_TYPE(hcl) == HCL_IOTOK_NUMLIT && c == '.') - { - SET_TOKEN_TYPE (hcl, HCL_IOTOK_FPDECLIT); - ADD_TOKEN_CHAR (hcl, c); - GET_CHAR_TO (hcl, c); - if (!is_digitchar(c)) - { - /* the first character after the decimal point is not a decimal digit */ - hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), "invalid numeric literal with no digit after decimal point"); - return -1; - } - } - - if (!is_digitchar(c)) - { - unget_char (hcl, &hcl->c->lxc); - break; - } - } - - break; - - default: - ident: - if (is_delimchar(c)) - { - hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILCHR, TOKEN_LOC(hcl), HCL_NULL, "illegal character %jc encountered", c); - return -1; - } - - SET_TOKEN_TYPE (hcl, HCL_IOTOK_IDENT); - while (1) - { - ADD_TOKEN_CHAR (hcl, c); - GET_CHAR_TO (hcl, c); - - if (c == '.') - { - hcl_iolxc_t period; - hcl_iotok_type_t type; - - type = classify_ident_token(hcl, TOKEN_NAME(hcl)); - if (type != HCL_IOTOK_IDENT) - { - SET_TOKEN_TYPE (hcl, type); - unget_char (hcl, &hcl->c->lxc); - break; - } - - period = hcl->c->lxc; - - read_more_seg: - GET_CHAR_TO (hcl, c); - if (!is_delimchar(c)) - { - hcl_oow_t start; - hcl_oocs_t seg; - - SET_TOKEN_TYPE (hcl, HCL_IOTOK_IDENT_DOTTED); - ADD_TOKEN_CHAR (hcl, '.'); - - start = TOKEN_NAME_LEN(hcl); - do - { - ADD_TOKEN_CHAR (hcl, c); - GET_CHAR_TO (hcl, c); - } - while (!is_delimchar(c)); - - seg.ptr = &TOKEN_NAME_CHAR(hcl,start); - seg.len = TOKEN_NAME_LEN(hcl) - start; - if (classify_ident_token(hcl, &seg) != HCL_IOTOK_IDENT) - { - hcl_setsynerr (hcl, HCL_SYNERR_MSEGIDENT, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); - return -1; - } - - if (c == '.') goto read_more_seg; - - unget_char (hcl, &hcl->c->lxc); - break; - } - else - { - unget_char (hcl, &hcl->c->lxc); - unget_char (hcl, &period); - } - break; - } - else if (is_delimchar(c)) - { - unget_char (hcl, &hcl->c->lxc); - break; - } - } - - if (TOKEN_TYPE(hcl) == HCL_IOTOK_IDENT) - { - hcl_iotok_type_t type; - type = classify_ident_token(hcl, TOKEN_NAME(hcl)); - SET_TOKEN_TYPE (hcl, type); - } - break; - } - -#if defined(HCL_DEBUG_LEXER) - HCL_DEBUG2 (hcl, "TOKEN: [%.*js]\n", (hcl_ooi_t)TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); -#endif - - return 0; -} - static int is_sr_name_in_use (hcl_t* hcl, const hcl_ooch_t* sr_name) { /* [NOTE] @@ -1456,97 +601,6 @@ static const hcl_ooch_t* add_sr_name (hcl_t* hcl, const hcl_oocs_t* name) /* -------------------------------------------------------------------------- */ -static int begin_include (hcl_t* hcl) -{ - hcl_iosrarg_t* arg; - const hcl_ooch_t* io_name; - - io_name = add_sr_name(hcl, TOKEN_NAME(hcl)); - if (HCL_UNLIKELY(!io_name)) return -1; - - arg = (hcl_iosrarg_t*)hcl_callocmem(hcl, HCL_SIZEOF(*arg)); - if (HCL_UNLIKELY(!arg)) goto oops; - - arg->name = io_name; - arg->line = 1; - arg->colm = 1; - /*arg->nl = '\0';*/ - arg->includer = hcl->c->curinp; - - if (hcl->c->reader(hcl, HCL_IO_OPEN, arg) <= -1) - { - const hcl_ooch_t* org_errmsg = hcl_backuperrmsg(hcl); - hcl_setsynerrbfmt (hcl, HCL_SYNERR_INCLUDE, TOKEN_LOC(hcl), TOKEN_NAME(hcl), "unable to include %js - %js", io_name, org_errmsg); - goto oops; - } - -#if 0 - GET_TOKEN_WITH_GOTO (hcl, oops); - if (TOKEN_TYPE(hcl) != HCL_IOTOK_DOT) - { - /* check if a period is following the includee name */ - hcl_setsynerr (hcl, HCL_SYNERR_PERIOD, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); - goto oops; - } -#endif - - /* switch to the includee's stream */ - hcl->c->curinp = arg; - /* hcl->c->depth.incl++; */ - - /* read in the first character in the included file. - * so the next call to get_token() sees the character read - * from this file. */ - if (get_token(hcl) <= -1) - { - end_include (hcl); - /* i don't jump to oops since i've called - * end_include() which frees hcl->c->curinp/arg */ - return -1; - } - - return 0; - -oops: - if (arg) hcl_freemem (hcl, arg); - return -1; -} - -static int end_include (hcl_t* hcl) -{ - int x; - hcl_iosrarg_t* cur; - - if (hcl->c->curinp == &hcl->c->srarg) return 0; /* no include */ - - /* if it is an included file, close it and - * retry to read a character from an outer file */ - - x = hcl->c->reader(hcl, HCL_IO_CLOSE, hcl->c->curinp); - - /* if closing has failed, still destroy the - * sio structure first as normal and return - * the failure below. this way, the caller - * does not call HCL_IO_CLOSE on - * hcl->c->curinp again. */ - - cur = hcl->c->curinp; - hcl->c->curinp = hcl->c->curinp->includer; - - HCL_ASSERT (hcl, cur->name != HCL_NULL); - hcl_freemem (hcl, cur); - /* hcl->parse.depth.incl--; */ - - if (x != 0) - { - /* the failure mentioned above is returned here */ - return -1; - } - - hcl->c->lxc = hcl->c->curinp->lxc; - return 1; /* ended the included file successfully */ -} - static HCL_INLINE int enter_list (hcl_t* hcl, const hcl_ioloc_t* loc, int flagv) { hcl_rstl_t* rstl; @@ -1788,75 +842,6 @@ static int chain_to_list (hcl_t* hcl, hcl_cnode_t* obj) return 0; } -static hcl_cnode_t* read_vlist (hcl_t* hcl) -{ - hcl_cnode_t* vh, * ve; - hcl_ioloc_t start_loc; - - HCL_ASSERT (hcl, TOKEN_TYPE(hcl) == HCL_IOTOK_VBAR); - - vh = ve = HCL_NULL; - start_loc = *TOKEN_LOC(hcl); - - GET_TOKEN_WITH_GOTO(hcl, oops); - - while (TOKEN_TYPE(hcl) == HCL_IOTOK_IDENT /* || TOKEN_TYPE(hcl) == HCL_IOTOK_IDENT_DOTTED */) - { - hcl_cnode_t* sym, * cons; - - if (hcl_getsyncodebyoocs_noseterr(hcl, TOKEN_NAME(hcl)) > 0) - { - hcl_setsynerrbfmt (hcl, HCL_SYNERR_BANNEDVARNAME, TOKEN_LOC(hcl), TOKEN_NAME(hcl), - "special symbol not to be declared as a variable"); - goto oops; - } - - sym = hcl_makecnodesymbol(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); - if (HCL_UNLIKELY(!sym)) goto oops; - - cons = hcl_makecnodecons(hcl, HCL_CNODE_GET_LOC(sym), sym, HCL_NULL); - if (HCL_UNLIKELY(!cons)) - { - hcl_freesinglecnode (hcl, sym); /* manual disposal because sym is not chained to the list */ - goto oops; - } - - if (!vh) - { - vh = cons; - ve = cons; - } - else - { - HCL_ASSERT (hcl, HCL_CNODE_IS_CONS(ve)); - ve->u.cons.cdr = cons; - ve = cons; - } - - GET_TOKEN_WITH_GOTO (hcl, oops); - } - - if (TOKEN_TYPE(hcl) != HCL_IOTOK_VBAR) - { - hcl_setsynerr (hcl, HCL_SYNERR_VBAR, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); - goto oops; - } - - if (vh) - { - HCL_ASSERT (hcl, HCL_CNODE_IS_CONS(vh)); - HCL_CNODE_CONS_CONCODE(vh) = HCL_CONCODE_VLIST; - return vh; - } - - /* this is an empty list */ - return hcl_makecnodeelist(hcl, &start_loc, HCL_CONCODE_VLIST); - -oops: - if (vh) hcl_freecnode (hcl, vh); - return HCL_NULL; -} - /* ------------------------------------------------------------------------ */ /* TODO: @@ -2097,7 +1082,7 @@ static int feed_process_token (hcl_t* hcl) LIST_FLAG_SET_CONCODE (frd->flagv, HCL_CONCODE_BYTEARRAY); goto start_list; - case HCL_IOTOK_LBRACE: /* { */ + case HCL_IOTOK_DLPAREN: /* #{ */ frd->flagv = DATA_LIST; LIST_FLAG_SET_CONCODE (frd->flagv, HCL_CONCODE_DIC); goto start_list; @@ -2164,7 +1149,7 @@ static int feed_process_token (hcl_t* hcl) case HCL_IOTOK_RPAREN: /* xlist (), qlist #() */ case HCL_IOTOK_RBRACK: /* bytearray #[], array[] */ - case HCL_IOTOK_RBRACE: /* dictionary {} */ + case HCL_IOTOK_RBRACE: /* dictionary #{} */ { static struct { @@ -2176,7 +1161,7 @@ static int feed_process_token (hcl_t* hcl) { HCL_IOTOK_RPAREN, HCL_SYNERR_RPAREN }, /* MLIST (: ) */ { HCL_IOTOK_RBRACK, HCL_SYNERR_RBRACK }, /* ARRAY [ ] */ { HCL_IOTOK_RBRACK, HCL_SYNERR_RBRACK }, /* BYTEARRAY #[ ] */ - { HCL_IOTOK_RBRACE, HCL_SYNERR_RBRACE }, /* DIC { } */ + { HCL_IOTOK_RBRACE, HCL_SYNERR_RBRACE }, /* DIC #{ } */ { HCL_IOTOK_RPAREN, HCL_SYNERR_RPAREN } /* QLIST #( ) */ }; @@ -2592,7 +1577,6 @@ static int flx_start (hcl_t* hcl, hcl_ooci_t c) reset_flx_token (hcl); -//HCL_DEBUG1 (hcl, "XXX[%jc]\n", c); if (find_delim_token_char(hcl, c, 0, HCL_COUNTOF(delim_token_tab) - 1, 0, FLX_DT(hcl))) { /* the character is one of the first character of a delimiter token such as (, [, :, etc */ @@ -2722,6 +1706,7 @@ static int flx_hmarked_token (hcl_t* hcl, hcl_ooci_t c) * #include * #[ ] byte array * #( ) qlist + * #{ } dictionary */ switch (c) @@ -2772,6 +1757,10 @@ static int flx_hmarked_token (hcl_t* hcl, hcl_ooci_t c) FEED_WRAP_UP_WITH_CHAR (hcl, c, HCL_IOTOK_QLPAREN); goto consumed; + case '{': + FEED_WRAP_UP_WITH_CHAR (hcl, c, HCL_IOTOK_DLPAREN); + goto consumed; + /* --------------------------- */ default: /* the character used as case values above can never be the first character of a hash-marked identifier */ @@ -2896,7 +1885,7 @@ static int flx_hmarked_ident (hcl_t* hcl, hcl_ooci_t c) if (hi->char_count == 0) { hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FLX_LOC(hcl), HCL_NULL, - "no valid character after the hash sign"); + "no valid character after hash sign"); return -1; }