diff --git a/lib/read2.c b/lib/read2.c new file mode 100644 index 0000000..0cbd9e9 --- /dev/null +++ b/lib/read2.c @@ -0,0 +1,2445 @@ +/* + * $Id$ + * + Copyright (c) 2016-2018 Chung, Hyung-Hwan. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WAfRRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "hcl-prv.h" + +static int begin_include (hcl_t* hcl); +static int end_include (hcl_t* hcl); + +#define BUFFER_ALIGN 128 +#define BALIT_BUFFER_ALIGN 128 +#define SALIT_BUFFER_ALIGN 128 +#define ARLIT_BUFFER_ALIGN 128 + +#define CHAR_TO_NUM(c,base) \ + ((c >= '0' && c <= '9')? ((c - '0' < base)? (c - '0'): base): \ + (c >= 'A' && c <= 'Z')? ((c - 'A' + 10 < base)? (c - 'A' + 10): base): \ + (c >= 'a' && c <= 'z')? ((c - 'a' + 10 < base)? (c - 'a' + 10): base): base) + +static struct voca_t +{ + hcl_oow_t len; + hcl_ooch_t str[11]; +} vocas[] = +{ + { 8, { '#','i','n','c','l','u','d','e' } }, + { 11, { '#','\\','b','a','c','k','s','p','a','c','e' } }, + { 10, { '#','\\','l','i','n','e','f','e','e','d' } }, + { 9, { '#','\\','n','e','w','l','i','n','e' } }, + { 5, { '#','\\','n','u','l' } }, + { 6, { '#','\\','p','a','g','e' } }, + { 8, { '#','\\','r','e','t','u','r','n' } }, + { 8, { '#','\\','r','u','b','o','u','t' } }, + { 7, { '#','\\','s','p','a','c','e' } }, + { 5, { '#','\\','t','a','b' } }, + { 6, { '#','\\','v','t','a','b' } }, + { 5, { '<','E','O','L','>' } }, + { 5, { '<','E','O','F','>' } } +}; + +enum voca_id_t +{ + VOCA_INCLUDE, + VOCA_BACKSPACE, + VOCA_LINEFEED, + VOCA_NEWLINE, + VOCA_NUL, + VOCA_PAGE, + VOCA_RETURN, + VOCA_RUBOUT, + VOCA_SPACE, + VOCA_TAB, + VOCA_VTAB, + + VOCA_EOL, + VOCA_EOF +}; +typedef enum voca_id_t voca_id_t; + + +enum list_flag_t +{ + QUOTED = (1 << 0), + DOTTED = (1 << 1), + COMMAED = (1 << 2), + COLONED = (1 << 3), + CLOSED = (1 << 4), + JSON = (1 << 5) +}; + +#define LIST_FLAG_GET_CONCODE(x) (((x) >> 8) & 0xFF) +#define LIST_FLAG_SET_CONCODE(x,type) ((x) = ((x) & ~0xFF00) | ((type) << 8)) + + +static int string_to_ooi (hcl_t* hcl, hcl_oocs_t* str, int radixed, hcl_ooi_t* num) +{ + /* it is not a generic conversion function. + * it assumes a certain pre-sanity check on the string + * done by the lexical analyzer */ + + int v, negsign, base; + const hcl_ooch_t* ptr, * end; + hcl_oow_t value, old_value; + + negsign = 0; + ptr = str->ptr, + end = str->ptr + str->len; + + HCL_ASSERT (hcl, ptr < end); + + if (*ptr == '+' || *ptr == '-') + { + negsign = *ptr - '+'; + ptr++; + } + + if (radixed) + { + HCL_ASSERT (hcl, ptr < end); + + if (*ptr != '#') + { + hcl_seterrbfmt (hcl, HCL_EINVAL, "radixed number not starting with # - %*.js", str->len, str->ptr); + return -1; + } + ptr++; /* skip '#' */ + + if (*ptr == 'x') base = 16; + else if (*ptr == 'o') base = 8; + else if (*ptr == 'b') base = 2; + else + { + hcl_seterrbfmt (hcl, HCL_EINVAL, "invalid radix specifier - %c", *ptr); + return -1; + } + ptr++; + } + else base = 10; + + HCL_ASSERT (hcl, ptr < end); + + value = old_value = 0; + while (ptr < end && (v = CHAR_TO_NUM(*ptr, base)) < base) + { + value = value * base + v; + if (value < old_value) + { + /* overflow must have occurred */ + hcl_seterrbfmt (hcl, HCL_ERANGE, "number too big - %.*js", str->len, str->ptr); + return -1; + } + old_value = value; + ptr++; + } + + if (ptr < end) + { + /* trailing garbage? */ + hcl_seterrbfmt (hcl, HCL_EINVAL, "trailing garbage after numeric literal - %.*js", str->len, str->ptr); + return -1; + } + + if (value > HCL_TYPE_MAX(hcl_ooi_t) + (negsign? 1: 0)) /* assume 2's complement */ + { + hcl_seterrbfmt (hcl, HCL_ERANGE, "number too big - %.*js", str->len, str->ptr); + return -1; + } + + *num = value; + if (negsign) *num *= -1; + + return 0; +} + +static hcl_oop_t string_to_num (hcl_t* hcl, hcl_oocs_t* str, int radixed) +{ + int negsign, base; + const hcl_ooch_t* ptr, * end; + + negsign = 0; + ptr = str->ptr, + end = str->ptr + str->len; + + HCL_ASSERT (hcl, ptr < end); + + if (*ptr == '+' || *ptr == '-') + { + negsign = *ptr - '+'; + ptr++; + } + +#if 0 + if (radixed) + { + HCL_ASSERT (hcl, ptr < end); + + base = 0; + do + { + base = base * 10 + CHAR_TO_NUM(*ptr, 10); + ptr++; + } + while (*ptr != 'r'); + + ptr++; + } + else base = 10; +#else + if (radixed) + { + HCL_ASSERT (hcl, ptr < end); + + if (*ptr != '#') + { + hcl_seterrbfmt(hcl, HCL_EINVAL, "radixed number not starting with # - %.*js", str->len, str->ptr); + return HCL_NULL; + } + ptr++; /* skip '#' */ + + if (*ptr == 'x') base = 16; + else if (*ptr == 'o') base = 8; + else if (*ptr == 'b') base = 2; + else + { + hcl_seterrbfmt (hcl, HCL_EINVAL, "invalid radix specifier - %c", *ptr); + return HCL_NULL; + } + ptr++; + } + else base = 10; +#endif + +/* TODO: handle floating point numbers ... etc */ + if (negsign) base = -base; + return hcl_strtoint(hcl, ptr, end - ptr, base); +} + +static hcl_oop_t string_to_fpdec (hcl_t* hcl, hcl_oocs_t* str, const hcl_ioloc_t* loc) +{ + hcl_oow_t pos; + hcl_oow_t scale = 0; + hcl_oop_t v; + + pos = str->len; + while (pos > 0) + { + pos--; + if (str->ptr[pos] == '.') + { + scale = str->len - pos - 1; + if (scale > HCL_SMOOI_MAX) + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMRANGE, loc, str, "too many digits after decimal point"); + return HCL_NULL; + } + + HCL_ASSERT (hcl, scale > 0); + /*if (scale > 0)*/ HCL_MEMMOVE (&str->ptr[pos], &str->ptr[pos + 1], scale * HCL_SIZEOF(str->ptr[0])); /* remove the decimal point */ + break; + } + } + + /* if no decimal point is included or no digit after the point , you must not call this function */ + HCL_ASSERT (hcl, scale > 0); + + v = hcl_strtoint(hcl, str->ptr, str->len - 1, 10); + if (!v) return HCL_NULL; + + return hcl_makefpdec(hcl, v, scale); +} + +static HCL_INLINE int is_spacechar (hcl_ooci_t c) +{ + /* TODO: handle other space unicode characters */ + switch (c) + { + case ' ': + case '\f': /* formfeed */ + case '\n': /* linefeed */ + case '\r': /* carriage return */ + case '\t': /* horizon tab */ + case '\v': /* vertical tab */ + return 1; + + default: + return 0; + } +} + +static HCL_INLINE int is_alphachar (hcl_ooci_t c) +{ +/* TODO: support full unicode */ + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +static HCL_INLINE int is_digitchar (hcl_ooci_t c) +{ +/* TODO: support full unicode */ + return (c >= '0' && c <= '9'); +} + +static HCL_INLINE int is_xdigitchar (hcl_ooci_t c) +{ +/* TODO: support full unicode */ + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); +} + +static HCL_INLINE int is_alnumchar (hcl_ooci_t c) +{ +/* TODO: support full unicode */ + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'); +} + +static HCL_INLINE int is_delimiter (hcl_ooci_t c) +{ + return c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c == '}' || + c == '\"' || c == '\'' || c == '#' || c == ';' || c == '|' || c == '.' || + c == ',' || c == ':' || is_spacechar(c) || c == HCL_UCI_EOF; +} + +static int copy_string_to (hcl_t* hcl, const hcl_oocs_t* src, hcl_oocs_t* dst, hcl_oow_t* dst_capa, int append, hcl_ooch_t add_delim) +{ + hcl_oow_t len, pos; + + if (append) + { + pos = dst->len; + len = dst->len + src->len; + if (add_delim != '\0') len++; + } + else + { + pos = 0; + len = src->len; + } + + if (len > *dst_capa) + { + hcl_ooch_t* tmp; + hcl_oow_t capa; + + capa = HCL_ALIGN(len, BUFFER_ALIGN); + + tmp = (hcl_ooch_t*)hcl_reallocmem(hcl, dst->ptr, HCL_SIZEOF(*tmp) * capa); + if (HCL_UNLIKELY(!tmp)) return -1; + + dst->ptr = tmp; + *dst_capa = capa; + } + + if (append && add_delim) dst->ptr[pos++] = add_delim; + hcl_copy_oochars (&dst->ptr[pos], src->ptr, src->len); + dst->len = len; + return 0; +} + + +#define GET_CHAR(hcl) \ + do { if (get_char(hcl) <= -1) return -1; } while (0) + +#define GET_CHAR_TO(hcl,c) \ + do { \ + if (get_char(hcl) <= -1) return -1; \ + c = (hcl)->c->lxc.c; \ + } while(0) + + +#define GET_TOKEN(hcl) \ + do { if (get_token(hcl) <= -1) return -1; } while (0) + +#define GET_TOKEN_WITH_ERRRET(hcl, v_ret) \ + do { if (get_token(hcl) <= -1) return v_ret; } while (0) + +#define GET_TOKEN_WITH_GOTO(hcl, goto_label) \ + do { if (get_token(hcl) <= -1) goto goto_label; } while (0) + +#define ADD_TOKEN_STR(hcl,s,l) \ + do { if (add_token_str(hcl, s, l) <= -1) return -1; } while (0) + +#define ADD_TOKEN_CHAR(hcl,c) \ + do { if (add_token_char(hcl, c) <= -1) return -1; } while (0) + +#define CLEAR_TOKEN_NAME(hcl) ((hcl)->c->tok.name.len = 0) +#define SET_TOKEN_TYPE(hcl,tv) ((hcl)->c->tok.type = (tv)) + +#define TOKEN_TYPE(hcl) ((hcl)->c->tok.type) +#define TOKEN_NAME(hcl) (&(hcl)->c->tok.name) +#define TOKEN_NAME_CAPA(hcl) ((hcl)->c->tok.name_capa) +#define TOKEN_NAME_LEN(hcl) ((hcl)->c->tok.name.len) +#define TOKEN_NAME_PTR(hcl) ((hcl)->c->tok.name.ptr) +#define TOKEN_NAME_CHAR(hcl,index) ((hcl)->c->tok.name.ptr[index]) +#define TOKEN_LOC(hcl) (&(hcl)->c->tok.loc) +#define LEXER_LOC(hcl) (&(hcl)->c->lxc.l) + +static HCL_INLINE int add_token_str (hcl_t* hcl, const hcl_ooch_t* ptr, hcl_oow_t len) +{ + hcl_oocs_t tmp; + tmp.ptr = (hcl_ooch_t*)ptr; + tmp.len = len; + return copy_string_to(hcl, &tmp, TOKEN_NAME(hcl), &TOKEN_NAME_CAPA(hcl), 1, '\0'); +} + +static HCL_INLINE int does_token_name_match (hcl_t* hcl, voca_id_t id) +{ + return hcl->c->tok.name.len == vocas[id].len && + hcl_equal_oochars(hcl->c->tok.name.ptr, vocas[id].str, vocas[id].len); +} + +static HCL_INLINE int add_token_char (hcl_t* hcl, hcl_ooch_t c) +{ + hcl_oocs_t tmp; + + tmp.ptr = &c; + tmp.len = 1; + return copy_string_to (hcl, &tmp, TOKEN_NAME(hcl), &TOKEN_NAME_CAPA(hcl), 1, '\0'); +} + +static HCL_INLINE void unget_char (hcl_t* hcl, const hcl_iolxc_t* c) +{ + /* Make sure that the unget buffer is large enough */ + HCL_ASSERT (hcl, hcl->c->nungots < HCL_COUNTOF(hcl->c->ungot)); + hcl->c->ungot[hcl->c->nungots++] = *c; +} + +static int get_char (hcl_t* hcl) +{ + hcl_ooci_t lc; + + if (hcl->c->nungots > 0) + { + /* something in the unget buffer */ + hcl->c->lxc = hcl->c->ungot[--hcl->c->nungots]; + return 0; + } + + if (hcl->c->curinp->b.state == -1) + { + hcl->c->curinp->b.state = 0; + return -1; + } + else if (hcl->c->curinp->b.state == 1) + { + hcl->c->curinp->b.state = 0; + goto return_eof; + } + + if (hcl->c->curinp->b.pos >= hcl->c->curinp->b.len) + { + if (hcl->c->reader(hcl, HCL_IO_READ, hcl->c->curinp) <= -1) return -1; + + if (hcl->c->curinp->xlen <= 0) + { + return_eof: + hcl->c->curinp->lxc.c = HCL_OOCI_EOF; + hcl->c->curinp->lxc.l.line = hcl->c->curinp->line; + hcl->c->curinp->lxc.l.colm = hcl->c->curinp->colm; + hcl->c->curinp->lxc.l.file = hcl->c->curinp->name; + hcl->c->lxc = hcl->c->curinp->lxc; + + /* indicate that EOF has been read. lxc.c is also set to EOF. */ + return 0; + } + + hcl->c->curinp->b.pos = 0; + hcl->c->curinp->b.len = hcl->c->curinp->xlen; + } + + if (hcl->c->curinp->lxc.c == '\n' || hcl->c->curinp->lxc.c == '\r') + { + /* hcl->c->curinp->lxc.c is a previous character. the new character + * to be read is still in the buffer (hcl->c->curinp->buf). + * hcl->cu->curinp->colm has been incremented when the previous + * character has been read. */ + if (hcl->c->curinp->line > 1 && hcl->c->curinp->colm == 2 && hcl->c->curinp->nl != hcl->c->curinp->lxc.c) + { + /* most likely, it's the second character in '\r\n' or '\n\r' + * sequence. let's not update the line and column number. */ + /*hcl->c->curinp->colm = 1;*/ + } + else + { + /* if the previous charater was a newline, + * increment the line counter and reset column to 1. + * incrementing the line number here instead of + * updating inp->lxc causes the line number for + * TOK_EOF to be the same line as the lxc newline. */ + hcl->c->curinp->line++; + hcl->c->curinp->colm = 1; + hcl->c->curinp->nl = hcl->c->curinp->lxc.c; + } + } + + lc = hcl->c->curinp->buf[hcl->c->curinp->b.pos++]; + + hcl->c->curinp->lxc.c = lc; + hcl->c->curinp->lxc.l.line = hcl->c->curinp->line; + hcl->c->curinp->lxc.l.colm = hcl->c->curinp->colm++; + hcl->c->curinp->lxc.l.file = hcl->c->curinp->name; + hcl->c->lxc = hcl->c->curinp->lxc; + + return 1; /* indicate that a normal character has been read */ +} + +static int skip_comment (hcl_t* hcl) +{ + hcl_ooci_t c = hcl->c->lxc.c; + hcl_iolxc_t lc; + + if (c == ';') goto single_line_comment; + if (c != '#') return 0; /* not a comment */ + + /* attempt to handle #! or ## */ + + lc = hcl->c->lxc; /* save the last character */ + GET_CHAR_TO (hcl, c); /* read a following character */ + + if (c == '!' || c == '#') + { + single_line_comment: + do + { + GET_CHAR_TO (hcl, c); + if (c == HCL_OOCI_EOF) + { + break; + } + else if (c == '\r' || c == '\n') + { + GET_CHAR (hcl); /* keep the first meaningful character in lxc */ + break; + } + } + while (1); + + return 1; /* single line comment led by ## or #! or ; */ + } + + /* unget the leading '#' */ + unget_char (hcl, &hcl->c->lxc); + /* restore the previous state */ + hcl->c->lxc = lc; + + return 0; +} + +static int get_string (hcl_t* hcl, hcl_ooch_t end_char, hcl_ooch_t esc_char, int regex, hcl_oow_t preescaped) +{ + hcl_ooci_t c; + hcl_oow_t escaped = preescaped; + hcl_oow_t digit_count = 0; + hcl_ooci_t c_acc = 0; + + SET_TOKEN_TYPE (hcl, HCL_IOTOK_STRLIT); + + while (1) + { + GET_CHAR_TO (hcl, c); + + if (c == HCL_OOCI_EOF) + { + hcl_setsynerr (hcl, HCL_SYNERR_STRCHRNC, TOKEN_LOC(hcl) /*LEXER_LOC(hcl)*/, HCL_NULL); + return -1; + } + + if (escaped == 3) + { + if (c >= '0' && c <= '7') + { + /* more octal digits */ + c_acc = c_acc * 8 + c - '0'; + digit_count++; + if (digit_count >= escaped) + { + /* should i limit the max to 0xFF/0377? + * if (c_acc > 0377) c_acc = 0377;*/ + ADD_TOKEN_CHAR (hcl, c_acc); + escaped = 0; + } + continue; + } + else + { + ADD_TOKEN_CHAR (hcl, c_acc); + escaped = 0; + } + } + else if (escaped == 2 || escaped == 4 || escaped == 8) + { + if (c >= '0' && c <= '9') + { + c_acc = c_acc * 16 + c - '0'; + digit_count++; + if (digit_count >= escaped) + { + ADD_TOKEN_CHAR (hcl, c_acc); + escaped = 0; + } + continue; + } + else if (c >= 'A' && c <= 'F') + { + c_acc = c_acc * 16 + c - 'A' + 10; + digit_count++; + if (digit_count >= escaped) + { + ADD_TOKEN_CHAR (hcl, c_acc); + escaped = 0; + } + continue; + } + else if (c >= 'a' && c <= 'f') + { + c_acc = c_acc * 16 + c - 'a' + 10; + digit_count++; + if (digit_count >= escaped) + { + ADD_TOKEN_CHAR (hcl, c_acc); + escaped = 0; + } + continue; + } + else + { + hcl_ooch_t rc; + + rc = (escaped == 2)? 'x': + (escaped == 4)? 'u': 'U'; + if (digit_count == 0) + ADD_TOKEN_CHAR (hcl, rc); + else ADD_TOKEN_CHAR (hcl, c_acc); + + escaped = 0; + } + } + + if (escaped == 0 && c == end_char) + { + /* terminating quote */ + /* GET_CHAR (hcl); */ + break; + } + + if (escaped == 0 && c == esc_char) + { + escaped = 1; + continue; + } + + if (escaped == 1) + { + if (c == 'a') c = '\a'; + else if (c == 'b') c = '\b'; + else if (c == 'f') c = '\f'; + else if (c == 'n') c = '\n'; + else if (c == 'r') c = '\r'; + else if (c == 't') c = '\t'; + else if (c == 'v') c = '\v'; + else if (c >= '0' && c <= '7' && !regex) + { + /* i don't support the octal notation for a regular expression. + * it conflicts with the backreference notation between \1 and \7 inclusive. */ + escaped = 3; + digit_count = 1; + c_acc = c - '0'; + continue; + } + else if (c == 'x') + { + escaped = 2; + digit_count = 0; + c_acc = 0; + continue; + } + #if (HCL_SIZEOF_OOCH_T >= 2) + else if (c == 'u') + { + escaped = 4; + digit_count = 0; + c_acc = 0; + continue; + } + #endif + #if (HCL_SIZEOF_OOCH_T >= 4) + else if (c == 'U') + { + escaped = 8; + digit_count = 0; + c_acc = 0; + continue; + } + #endif + else if (regex) + { + /* if the following character doesn't compose a proper + * escape sequence, keep the escape character. + * an unhandled escape sequence can be handled + * outside this function since the escape character + * is preserved.*/ + ADD_TOKEN_CHAR (hcl, esc_char); + } + + escaped = 0; + } + + ADD_TOKEN_CHAR (hcl, c); + } + + return 0; +} + +static int get_radix_number (hcl_t* hcl, hcl_ooci_t rc, int radix) +{ + hcl_ooci_t c; + + ADD_TOKEN_CHAR (hcl, '#'); + ADD_TOKEN_CHAR (hcl, rc); + + GET_CHAR_TO (hcl, c); + + if (CHAR_TO_NUM(c, radix) >= radix) + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), + "no digit after radix specifier in %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr); + return -1; + } + + do + { + ADD_TOKEN_CHAR(hcl, c); + GET_CHAR_TO (hcl, c); + } + while (CHAR_TO_NUM(c, radix) < radix); + + if (!is_delimiter(c)) + { + do + { + ADD_TOKEN_CHAR(hcl, c); + GET_CHAR_TO (hcl, c); + } + while (!is_delimiter(c)); + + hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), + "invalid digit in radixed number in %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr); + return -1; + } + + unget_char (hcl, &hcl->c->lxc); + SET_TOKEN_TYPE (hcl, HCL_IOTOK_RADNUMLIT); + + return 0; +} + +static int get_sharp_token (hcl_t* hcl) +{ + hcl_ooci_t c; + int radix; + + HCL_ASSERT (hcl, hcl->c->lxc.c == '#'); + + GET_CHAR_TO (hcl, c); + + /* + * #bBBBB binary + * #oOOOO octal + * #xXXXX hexadecimal + * #eDDD error + * #pHHH smptr + * #nil + * #true + * #false + * #include + * #\C character + * #\xHHHH unicode character + * #\UHHHH unicode character + * #\uHHHH unicode character + * #[ ] byte array + * #( ) qlist + */ + + switch (c) + { + case 'x': + radix = 16; + goto radixnum; + case 'o': + radix = 8; + goto radixnum; + case 'b': + radix = 2; + radixnum: + if (get_radix_number (hcl, c, radix) <= -1) return -1; + break; + + case 'e': + if (get_radix_number(hcl, c, 10) <= -1) return -1; + SET_TOKEN_TYPE (hcl, HCL_IOTOK_ERRLIT); + break; + + case 'p': + if (get_radix_number(hcl, c, 16) <= -1) return -1; + SET_TOKEN_TYPE (hcl, HCL_IOTOK_SMPTRLIT); + break; + + case '\\': /* character literal */ + ADD_TOKEN_CHAR (hcl, '#'); + ADD_TOKEN_CHAR (hcl, '\\'); + + GET_CHAR_TO (hcl, c); + if (is_delimiter(c)) + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), + "no valid character after #\\ in %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr); + return -1; + } + + SET_TOKEN_TYPE (hcl, HCL_IOTOK_CHARLIT); + do + { + ADD_TOKEN_CHAR (hcl, c); + GET_CHAR_TO (hcl, c); + } + while (!is_delimiter(c)); + + if (TOKEN_NAME_LEN(hcl) >= 4) + { + int max_digit_count = 0; + + if (TOKEN_NAME_CHAR(hcl, 2) == 'x') + { + hcl_oow_t i; + max_digit_count = 2; + + hexcharlit: + if (TOKEN_NAME_LEN(hcl) - 3 > max_digit_count) + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), + "invalid hexadecimal character in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); + return -1; + } + c = 0; + for (i = 3; i < TOKEN_NAME_LEN(hcl); i++) + { + if (!is_xdigitchar(TOKEN_NAME_CHAR(hcl, i))) + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), + "invalid hexadecimal character in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); + return -1; + } + + c = c * 16 + CHAR_TO_NUM(hcl->c->tok.name.ptr[i], 16); /* don't care if it is for 'p' */ + } + + } + #if (HCL_SIZEOF_OOCH_T >= 2) + else if (TOKEN_NAME_CHAR(hcl, 2) == 'u') + { + max_digit_count = 4; + goto hexcharlit; + } + #endif + #if (HCL_SIZEOF_OOCH_T >= 4) + else if (TOKEN_NAME_CHAR(hcl, 2) == 'U') + { + max_digit_count = 8; + goto hexcharlit; + } + #endif + else if (does_token_name_match(hcl, VOCA_SPACE)) + { + c = ' '; + } + else if (does_token_name_match(hcl, VOCA_NEWLINE)) + { + /* TODO: convert it to host newline convention. how to handle if it's composed of 2 letters like \r\n? */ + c = '\n'; + } + else if (does_token_name_match(hcl, VOCA_BACKSPACE)) + { + c = '\b'; + } + else if (does_token_name_match(hcl, VOCA_TAB)) + { + c = '\t'; + } + else if (does_token_name_match(hcl, VOCA_LINEFEED)) + { + c = '\n'; + } + else if (does_token_name_match(hcl, VOCA_PAGE)) + { + c = '\f'; + } + else if (does_token_name_match(hcl, VOCA_RETURN)) + { + c = '\r'; + } + else if (does_token_name_match(hcl, VOCA_NUL)) /* null character. not #nil */ + { + c = '\0'; + } + else if (does_token_name_match(hcl, VOCA_VTAB)) + { + c = '\v'; + } + else if (does_token_name_match(hcl, VOCA_RUBOUT)) + { + c = '\x7F'; /* DEL */ + } + else + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), + "invalid character literal %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr); + return -1; + } + } + else + { + HCL_ASSERT (hcl, TOKEN_NAME_LEN(hcl) == 3); + c = TOKEN_NAME_CHAR(hcl, 2); + } + + /* reset the token name to the converted character */ + if (hcl->c->tok.type == HCL_IOTOK_CHARLIT) + { + CLEAR_TOKEN_NAME (hcl); + ADD_TOKEN_CHAR (hcl, c); + } + + unget_char (hcl, &hcl->c->lxc); + break; + + case '[': /* #[ - byte array opener */ + ADD_TOKEN_CHAR (hcl, '#'); + ADD_TOKEN_CHAR(hcl, c); + SET_TOKEN_TYPE (hcl, HCL_IOTOK_BAPAREN); + break; + + case '(': /* #( - qlist opener */ + ADD_TOKEN_CHAR (hcl, '#'); + ADD_TOKEN_CHAR(hcl, c); + SET_TOKEN_TYPE (hcl, HCL_IOTOK_QLPAREN); + break; + + default: + if (is_delimiter(c)) + { + /* EOF, whitespace, etc */ + hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), + "invalid hashed literal #%jc", c); + return -1; + } + + ADD_TOKEN_CHAR (hcl, '#'); + long_name: + do + { + ADD_TOKEN_CHAR (hcl, c); + GET_CHAR_TO (hcl, c); + } + while (!is_delimiter(c)); + + if (does_token_name_match (hcl, VOCA_INCLUDE)) + { + SET_TOKEN_TYPE (hcl, HCL_IOTOK_INCLUDE); + } + else + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), + "invalid hashed literal %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr); + return -1; + } + + unget_char (hcl, &hcl->c->lxc); + break; + } + + return 0; +} + +static hcl_iotok_type_t classify_ident_token (hcl_t* hcl, const hcl_oocs_t* v) +{ + hcl_oow_t i; + struct + { + hcl_oow_t len; + hcl_ooch_t name[10]; + hcl_iotok_type_t type; + } tab[] = + { + { 4, { 'n','u','l','l' }, HCL_IOTOK_NIL }, + { 4, { 't','r','u','e' }, HCL_IOTOK_TRUE }, + { 5, { 'f','a','l','s','e' }, HCL_IOTOK_FALSE } + }; + + for (i = 0; i < HCL_COUNTOF(tab); i++) + { + if (hcl_comp_oochars(v->ptr, v->len, tab[i].name, tab[i].len) == 0) return tab[i].type; + } + + return HCL_IOTOK_IDENT; +} + +static int get_token (hcl_t* hcl) +{ + hcl_ooci_t c, oldc; + int n; + +retry: + GET_CHAR (hcl); + + do + { + /* skip spaces */ + while (is_spacechar(hcl->c->lxc.c)) GET_CHAR (hcl); + /* the first character after the last space is in hcl->c->lxc */ + if ((n = skip_comment(hcl)) <= -1) return -1; + } + while (n >= 1); + + /* clear the token name, reset its location */ + SET_TOKEN_TYPE (hcl, HCL_IOTOK_EOF); /* is it correct? */ + CLEAR_TOKEN_NAME (hcl); + hcl->c->tok.loc = hcl->c->lxc.l; /* set token location */ + + c = hcl->c->lxc.c; + + switch (c) + { + case HCL_OOCI_EOF: + { + int n; + + n = end_include (hcl); + if (n <= -1) return -1; + if (n >= 1) goto retry; + + SET_TOKEN_TYPE (hcl, HCL_IOTOK_EOF); + ADD_TOKEN_STR(hcl, vocas[VOCA_EOF].str, vocas[VOCA_EOF].len); + break; + } + + case '(': + ADD_TOKEN_CHAR(hcl, c); + SET_TOKEN_TYPE (hcl, HCL_IOTOK_LPAREN); + break; + + case ')': + ADD_TOKEN_CHAR(hcl, c); + SET_TOKEN_TYPE (hcl, HCL_IOTOK_RPAREN); + break; + + case '[': + ADD_TOKEN_CHAR(hcl, c); + SET_TOKEN_TYPE (hcl, HCL_IOTOK_LBRACK); + break; + + case ']': + ADD_TOKEN_CHAR(hcl, c); + SET_TOKEN_TYPE (hcl, HCL_IOTOK_RBRACK); + break; + + case '{': + ADD_TOKEN_CHAR(hcl, c); + SET_TOKEN_TYPE (hcl, HCL_IOTOK_LBRACE); + break; + + case '}': + ADD_TOKEN_CHAR (hcl, c); + SET_TOKEN_TYPE (hcl, HCL_IOTOK_RBRACE); + break; + + case '|': + ADD_TOKEN_CHAR (hcl, c); + SET_TOKEN_TYPE (hcl, HCL_IOTOK_VBAR); + break; + + case '.': + SET_TOKEN_TYPE (hcl, HCL_IOTOK_DOT); + ADD_TOKEN_CHAR (hcl, c); + break; + + case ',': + SET_TOKEN_TYPE (hcl, HCL_IOTOK_COMMA); + ADD_TOKEN_CHAR (hcl, c); + break; + + case ':': + SET_TOKEN_TYPE (hcl, HCL_IOTOK_COLON); + ADD_TOKEN_CHAR (hcl, c); + break; + + case '\"': + if (get_string(hcl, '\"', '\\', 0, 0) <= -1) return -1; + break; + + case '\'': + if (get_string(hcl, '\'', '\\', 0, 0) <= -1) return -1; + if (hcl->c->tok.name.len != 1) + { + hcl_setsynerr (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + return -1; + } + SET_TOKEN_TYPE (hcl, HCL_IOTOK_CHARLIT); + break; + + case '#': + if (get_sharp_token(hcl) <= -1) return -1; + break; + + case '+': + case '-': + oldc = c; + GET_CHAR_TO (hcl, c); + if(is_digitchar(c)) + { + unget_char (hcl, &hcl->c->lxc); + c = oldc; + goto numlit; + } + else if (c == '#') + { + int radix; + hcl_iolxc_t sharp; + + sharp = hcl->c->lxc; /* back up '#' */ + + GET_CHAR_TO (hcl, c); + switch (c) + { + case 'b': + radix = 2; + goto radnumlit; + case 'o': + radix = 8; + goto radnumlit; + case 'x': + radix = 16; + radnumlit: + ADD_TOKEN_CHAR (hcl, oldc); + if (get_radix_number(hcl, c, radix) <= -1) return -1; + break; + + default: + unget_char (hcl, &hcl->c->lxc); + unget_char (hcl, &sharp); + c = oldc; + goto ident; + } + } + else + { + unget_char (hcl, &hcl->c->lxc); + c = oldc; + goto ident; + } + break; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + numlit: + SET_TOKEN_TYPE (hcl, HCL_IOTOK_NUMLIT); + while (1) + { + ADD_TOKEN_CHAR (hcl, c); + GET_CHAR_TO (hcl, c); + if (TOKEN_TYPE(hcl) == HCL_IOTOK_NUMLIT && c == '.') + { + SET_TOKEN_TYPE (hcl, HCL_IOTOK_FPDECLIT); + ADD_TOKEN_CHAR (hcl, c); + GET_CHAR_TO (hcl, c); + if (!is_digitchar(c)) + { + /* the first character after the decimal point is not a decimal digit */ + hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), "invalid numeric literal with no digit after decimal point"); + return -1; + } + } + + if (!is_digitchar(c)) + { + unget_char (hcl, &hcl->c->lxc); + break; + } + } + + break; + + default: + ident: + if (is_delimiter(c)) + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILCHR, TOKEN_LOC(hcl), HCL_NULL, "illegal character %jc encountered", c); + return -1; + } + + SET_TOKEN_TYPE (hcl, HCL_IOTOK_IDENT); + while (1) + { + ADD_TOKEN_CHAR (hcl, c); + GET_CHAR_TO (hcl, c); + + if (c == '.') + { + hcl_iolxc_t period; + hcl_iotok_type_t type; + + type = classify_ident_token(hcl, TOKEN_NAME(hcl)); + if (type != HCL_IOTOK_IDENT) + { + SET_TOKEN_TYPE (hcl, type); + unget_char (hcl, &hcl->c->lxc); + break; + } + + period = hcl->c->lxc; + + read_more_seg: + GET_CHAR_TO (hcl, c); + if (!is_delimiter(c)) + { + hcl_oow_t start; + hcl_oocs_t seg; + + SET_TOKEN_TYPE (hcl, HCL_IOTOK_IDENT_DOTTED); + ADD_TOKEN_CHAR (hcl, '.'); + + start = TOKEN_NAME_LEN(hcl); + do + { + ADD_TOKEN_CHAR (hcl, c); + GET_CHAR_TO (hcl, c); + } + while (!is_delimiter(c)); + + seg.ptr = &TOKEN_NAME_CHAR(hcl,start); + seg.len = TOKEN_NAME_LEN(hcl) - start; + if (classify_ident_token(hcl, &seg) != HCL_IOTOK_IDENT) + { + hcl_setsynerr (hcl, HCL_SYNERR_MSEGIDENT, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + return -1; + } + + if (c == '.') goto read_more_seg; + + unget_char (hcl, &hcl->c->lxc); + break; + } + else + { + unget_char (hcl, &hcl->c->lxc); + unget_char (hcl, &period); + } + break; + } + else if (is_delimiter(c)) + { + unget_char (hcl, &hcl->c->lxc); + break; + } + } + + if (TOKEN_TYPE(hcl) == HCL_IOTOK_IDENT) + { + hcl_iotok_type_t type; + type = classify_ident_token(hcl, TOKEN_NAME(hcl)); + SET_TOKEN_TYPE (hcl, type); + } + break; + } + +done: +#if defined(HCL_DEBUG_LEXER) + HCL_DEBUG2 (hcl, "TOKEN: [%.*js]\n", (hcl_ooi_t)TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); +#endif + + return 0; +} + +static void clear_io_names (hcl_t* hcl) +{ + hcl_iolink_t* cur; + + HCL_ASSERT (hcl, hcl->c != HCL_NULL); + + while (hcl->c->io_names) + { + cur = hcl->c->io_names; + hcl->c->io_names = cur->link; + hcl_freemem (hcl, cur); + } +} + +static const hcl_ooch_t* add_io_name (hcl_t* hcl, const hcl_oocs_t* name) +{ + hcl_iolink_t* link; + hcl_ooch_t* ptr; + + link = (hcl_iolink_t*)hcl_callocmem (hcl, HCL_SIZEOF(*link) + HCL_SIZEOF(hcl_ooch_t) * (name->len + 1)); + if (HCL_UNLIKELY(!link)) return HCL_NULL; + + ptr = (hcl_ooch_t*)(link + 1); + + hcl_copy_oochars (ptr, name->ptr, name->len); + ptr[name->len] = '\0'; + + link->link = hcl->c->io_names; + hcl->c->io_names = link; + + return ptr; +} + +/* -------------------------------------------------------------------------- */ + +static int begin_include (hcl_t* hcl) +{ + hcl_ioinarg_t* arg; + const hcl_ooch_t* io_name; + + io_name = add_io_name (hcl, TOKEN_NAME(hcl)); + if (!io_name) return -1; + + arg = (hcl_ioinarg_t*)hcl_callocmem (hcl, HCL_SIZEOF(*arg)); + if (HCL_UNLIKELY(!arg)) goto oops; + + arg->name = io_name; + arg->line = 1; + arg->colm = 1; + /*arg->nl = '\0';*/ + arg->includer = hcl->c->curinp; + + if (hcl->c->reader(hcl, HCL_IO_OPEN, arg) <= -1) + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_INCLUDE, TOKEN_LOC(hcl), TOKEN_NAME(hcl), "unable to include %js", io_name); + goto oops; + } + +#if 0 + GET_TOKEN_WITH_GOTO (hcl, oops); + if (TOKEN_TYPE(hcl) != HCL_IOTOK_DOT) + { + /* check if a period is following the includee name */ + hcl_setsynerr (hcl, HCL_SYNERR_PERIOD, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + goto oops; + } +#endif + + /* switch to the includee's stream */ + hcl->c->curinp = arg; + /* hcl->c->depth.incl++; */ + + /* read in the first character in the included file. + * so the next call to get_token() sees the character read + * from this file. */ + if (get_token(hcl) <= -1) + { + end_include (hcl); + /* i don't jump to oops since i've called + * end_include() which frees hcl->c->curinp/arg */ + return -1; + } + + return 0; + +oops: + if (arg) hcl_freemem (hcl, arg); + return -1; +} + +static int end_include (hcl_t* hcl) +{ + int x; + hcl_ioinarg_t* cur; + + if (hcl->c->curinp == &hcl->c->inarg) return 0; /* no include */ + + /* if it is an included file, close it and + * retry to read a character from an outer file */ + + x = hcl->c->reader(hcl, HCL_IO_CLOSE, hcl->c->curinp); + + /* if closing has failed, still destroy the + * sio structure first as normal and return + * the failure below. this way, the caller + * does not call HCL_IO_CLOSE on + * hcl->c->curinp again. */ + + cur = hcl->c->curinp; + hcl->c->curinp = hcl->c->curinp->includer; + + HCL_ASSERT (hcl, cur->name != HCL_NULL); + hcl_freemem (hcl, cur); + /* hcl->parse.depth.incl--; */ + + if (x != 0) + { + /* the failure mentioned above is returned here */ + return -1; + } + + hcl->c->lxc = hcl->c->curinp->lxc; + return 1; /* ended the included file successfully */ +} + + +static HCL_INLINE hcl_oop_t enter_list (hcl_t* hcl, int flagv) +{ + hcl_oop_oop_t rsa; + + /* upon entering a list, it pushes a frame of 4 slots. + * rsa[0] stores the first element in the list. + * rsa[1] stores the last element in the list. + * both are updated in chain_to_list() as items are added. + * rsa[2] stores the flag value. + * rsa[3] stores the pointer to the previous top frame. + * rsa[4] stores the number of elements in the list */ + rsa = (hcl_oop_oop_t)hcl_makearray(hcl, 5, 0); + if (HCL_UNLIKELY(!rsa)) return HCL_NULL; + + rsa->slot[2] = HCL_SMOOI_TO_OOP(flagv); + rsa->slot[3] = hcl->c->r.s; /* push */ + hcl->c->r.s = (hcl_oop_t)rsa; + + rsa->slot[4] = HCL_SMOOI_TO_OOP(0); + + return hcl->c->r.s; +} + +static HCL_INLINE hcl_oop_t leave_list (hcl_t* hcl, int* flagv, int* oldflagv) +{ + hcl_oop_oop_t rsa; + hcl_oop_t head; + int fv, concode; + + /* the stack must not be empty - cannot leave a list without entering it */ + HCL_ASSERT (hcl, !HCL_IS_NIL(hcl,hcl->c->r.s)); + + rsa = (hcl_oop_oop_t)hcl->c->r.s; + + head = rsa->slot[0]; + HCL_ASSERT (hcl, HCL_OOP_IS_SMOOI(rsa->slot[2])); + fv = HCL_OOP_TO_SMOOI(rsa->slot[2]); + concode = LIST_FLAG_GET_CONCODE(fv); + + hcl->c->r.s = rsa->slot[3]; /* pop off */ + rsa->slot[3] = hcl->_nil; + + if (fv & (COMMAED | COLONED)) + { + hcl_setsynerr (hcl, ((fv & COMMAED)? HCL_SYNERR_COMMANOVALUE: HCL_SYNERR_COLONNOVALUE), TOKEN_LOC(hcl), HCL_NULL); + return HCL_NULL; + } + +#if 0 + /* TODO: literalize the list if all the elements are all literals */ + if (concode == HCL_CONCODE_ARRAY || concode == HCL_CONCODE_BYTEARRAY /*|| concode == HCL_CONCODE_DIC*/) + { + /* convert a list to an array */ + hcl_oop_oop_t arr; + hcl_oop_t ptr; + hcl_oow_t count; + + ptr = head; + count = 0; + while (ptr != hcl->_nil) + { + hcl_oop_t car; + HCL_ASSERT (hcl, HCL_OBJ_GET_FLAGS_BRAND(ptr) == HCL_BRAND_CONS); + car = HCL_CONS_CAR(ptr); + + if (!HCL_OOP_IS_NUMERIC(car)) goto done; /* TODO: check if the element is a literal properly here */ + + ptr = HCL_CONS_CDR(ptr); + count++; + } + + hcl_pushvolat (hcl, &head); + arr = (hcl_oop_oop_t)hcl_makearray(hcl, count, 0); + hcl_popvolat (hcl); + if (!arr) return HCL_NULL; + + ptr = head; + count = 0; + while (ptr != hcl->_nil) + { + arr->slot[count++] = HCL_CONS_CAR(ptr); + ptr = HCL_CONS_CDR(ptr); + } + + head = (hcl_oop_t)arr; + } +done: +#endif + + *oldflagv = fv; + if (HCL_IS_NIL(hcl,hcl->c->r.s)) + { + /* the stack is empty after popping. + * it is back to the top level. + * the top level can never be quoted. */ + *flagv = 0; + } + else + { + /* restore the flag for the outer returning level */ + rsa = (hcl_oop_oop_t)hcl->c->r.s; + HCL_ASSERT (hcl, HCL_OOP_IS_SMOOI(rsa->slot[2])); + *flagv = HCL_OOP_TO_SMOOI(rsa->slot[2]); + } + + /* return the head of the list being left */ + if (HCL_IS_NIL(hcl,head)) + { + /* the list is empty. literalize the empty list according to + * the list opener. for a list, it is same as #nil. */ + switch (concode) + { + case HCL_CONCODE_ARRAY: + return (hcl_oop_t)hcl_makearray(hcl, 0, 0); + case HCL_CONCODE_BYTEARRAY: + return (hcl_oop_t)hcl_makebytearray(hcl, HCL_NULL, 0); + case HCL_CONCODE_DIC: + return (hcl_oop_t)hcl_makedic(hcl, 100); /* TODO: default dictionary size for empty definition? */ + + /* NOTE: empty xlist will get translated to #nil. + * this is useful when used in the lambda expression to express an empty argument. also in defun. + * (lambda () ...) is equivalent to (lambda #nil ...) + * (defun x() ...) */ + } + } + + if (HCL_IS_CONS(hcl,head)) HCL_OBJ_SET_FLAGS_SYNCODE(head, concode); + return head; +} + +static HCL_INLINE int can_dot_list (hcl_t* hcl) +{ + hcl_oop_oop_t rsa; + int flagv; + hcl_ooi_t count; + + HCL_ASSERT (hcl, !HCL_IS_NIL(hcl,hcl->c->r.s)); + + /* mark the state that a dot has appeared in the list */ + rsa = (hcl_oop_oop_t)hcl->c->r.s; + HCL_ASSERT (hcl, HCL_OOP_IS_SMOOI(rsa->slot[2])); + flagv = HCL_OOP_TO_SMOOI(rsa->slot[2]); + count = HCL_OOP_TO_SMOOI(rsa->slot[4]); + + if (count <= 0) return 0; + if (LIST_FLAG_GET_CONCODE(flagv) != HCL_CONCODE_QLIST) return 0; + + flagv |= DOTTED; + rsa->slot[2] = HCL_SMOOI_TO_OOP(flagv); + return 1; +} + +static HCL_INLINE int can_comma_list (hcl_t* hcl) +{ + hcl_oop_oop_t rsa; + int flagv; + hcl_ooi_t count; + + HCL_ASSERT (hcl, !HCL_IS_NIL(hcl,hcl->c->r.s)); + + rsa = (hcl_oop_oop_t)hcl->c->r.s; + HCL_ASSERT (hcl, HCL_OOP_IS_SMOOI(rsa->slot[2])); + flagv = HCL_OOP_TO_SMOOI(rsa->slot[2]); + count = HCL_OOP_TO_SMOOI(rsa->slot[4]); + + if (count <= 0) return 0; + if (count == 1) flagv |= JSON; + else if (!(flagv & JSON)) return 0; + if (flagv & (COMMAED | COLONED)) return 0; + + if (LIST_FLAG_GET_CONCODE(flagv) == HCL_CONCODE_DIC) + { + if (count & 1) return 0; + } + else if (LIST_FLAG_GET_CONCODE(flagv) != HCL_CONCODE_ARRAY && + LIST_FLAG_GET_CONCODE(flagv) != HCL_CONCODE_BYTEARRAY) + { + return 0; + } + + flagv |= COMMAED; + rsa->slot[2] = HCL_SMOOI_TO_OOP(flagv); + return 1; +} + +static HCL_INLINE int can_colon_list (hcl_t* hcl) +{ + hcl_oop_oop_t rsa; + int flagv; + hcl_ooi_t count; + + HCL_ASSERT (hcl, !HCL_IS_NIL(hcl,hcl->c->r.s)); + + /* mark the state that a colon has appeared in the list */ + rsa = (hcl_oop_oop_t)hcl->c->r.s; + HCL_ASSERT (hcl, HCL_OOP_IS_SMOOI(rsa->slot[2])); + flagv = HCL_OOP_TO_SMOOI(rsa->slot[2]); + count = HCL_OOP_TO_SMOOI(rsa->slot[4]); + + if (count <= 0) return 0; + if (count == 1) flagv |= JSON; + else if (!(flagv & JSON)) return 0; + + if (flagv & (COMMAED | COLONED)) return 0; + + if (LIST_FLAG_GET_CONCODE(flagv) != HCL_CONCODE_DIC) return 0; + + count = HCL_OOP_TO_SMOOI(rsa->slot[4]); + if (!(count & 1)) return 0; + + flagv |= COLONED; + rsa->slot[2] = HCL_SMOOI_TO_OOP(flagv); + return 1; +} + +static HCL_INLINE void clear_comma_colon_flag (hcl_t* hcl) +{ + hcl_oop_oop_t rsa; + int flagv; + + HCL_ASSERT (hcl, !HCL_IS_NIL(hcl,hcl->c->r.s)); + + rsa = (hcl_oop_oop_t)hcl->c->r.s; + HCL_ASSERT (hcl, HCL_OOP_IS_SMOOI(rsa->slot[2])); + flagv = HCL_OOP_TO_SMOOI(rsa->slot[2]); + + flagv &= ~(COMMAED | COLONED); + rsa->slot[2] = HCL_SMOOI_TO_OOP(flagv); +} + +static hcl_oop_t chain_to_list (hcl_t* hcl, hcl_oop_t obj) +{ + hcl_oop_oop_t rsa; + int flagv; + + HCL_ASSERT (hcl, !HCL_IS_NIL(hcl,hcl->c->r.s)); + rsa = (hcl_oop_oop_t)hcl->c->r.s; + HCL_ASSERT (hcl, HCL_OOP_IS_SMOOI(rsa->slot[2])); + flagv = (int)HCL_OOP_TO_SMOOI(rsa->slot[2]); + + if (flagv & CLOSED) + { + /* the list has already been closed and cannot add more items + * for instance, see this faulty expression [1 2 . 3 4 ]. + * you can have only 1 item after the period. this condition + * can only be triggered by a wrong qlist where a period is + * allowed. so i can safely hard-code the error code to + * HCL_SYNERR_RBRACK. */ + hcl_setsynerr (hcl, HCL_SYNERR_RBRACK, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + return HCL_NULL; + } + else if (flagv & DOTTED) + { + hcl_ooi_t count; + + /* the list must not be empty to have reached the dotted state */ + HCL_ASSERT (hcl, !HCL_IS_NIL(hcl,rsa->slot[1])); + + /* chain the object via 'cdr' of the tail cell */ + HCL_CONS_CDR(rsa->slot[1]) = obj; + + /* update the flag to CLOSED so that you can have more than + * one item after the dot. */ + flagv |= CLOSED; + rsa->slot[2] = HCL_SMOOI_TO_OOP(flagv); + + count = HCL_OOP_TO_SMOOI(rsa->slot[4]) + 1; + rsa->slot[4] = HCL_SMOOI_TO_OOP(count); + } + else + { + hcl_oop_t cons; + hcl_ooi_t count; + + count = HCL_OOP_TO_SMOOI(rsa->slot[4]); + + if ((flagv & JSON) && count > 0 && !(flagv & (COMMAED | COLONED))) + { + /* there is no separator between array/dictionary elements + * for instance, [1 2] { 10 20 } */ + hcl_setsynerr (hcl, HCL_SYNERR_NOSEP, TOKEN_LOC(hcl), HCL_NULL); + return HCL_NULL; + } + + hcl_pushvolat (hcl, (hcl_oop_t*)&rsa); + cons = hcl_makecons(hcl, obj, hcl->_nil); + hcl_popvolat (hcl); + if (!cons) return HCL_NULL; + + if (HCL_IS_NIL(hcl, rsa->slot[0])) + { + /* the list head is not set yet. it is the first + * element added to the list. let both head and tail + * point to the new cons cell */ + HCL_ASSERT (hcl, HCL_IS_NIL(hcl, rsa->slot[1])); + rsa->slot[0] = cons; + rsa->slot[1] = cons; + } + else + { + /* the new cons cell is not the first element. + * append it to the list */ + HCL_CONS_CDR(rsa->slot[1]) = cons; + rsa->slot[1] = cons; + } + + count++; + rsa->slot[4] = HCL_SMOOI_TO_OOP(count); + } + + return obj; +} + +#if 0 +static HCL_INLINE int is_list_empty (hcl_t* hcl) +{ + hcl_oop_oop_t rsa; + /* the stack must not be empty */ + HCL_ASSERT (hcl, !HCL_IS_NIL(hcl,hcl->c->r.s)); + rsa = (hcl_oop_oop_t)hcl->c->r.s; + /* if the tail pointer is pointing to nil, the list is empty */ + return HCL_IS_NIL(hcl, rsa->slot[1]); +} +#endif + + +static int add_to_symbol_array_literal_buffer (hcl_t* hcl, hcl_oop_t b) +{ + if (hcl->c->r.salit.size >= hcl->c->r.salit.capa) + { + hcl_oop_t* tmp; + hcl_oow_t new_capa; + + new_capa = HCL_ALIGN (hcl->c->r.salit.size + 1, SALIT_BUFFER_ALIGN); + tmp = (hcl_oop_t*)hcl_reallocmem (hcl, hcl->c->r.salit.ptr, new_capa * HCL_SIZEOF(*tmp)); + if (!tmp) return -1; + + hcl->c->r.salit.capa = new_capa; + hcl->c->r.salit.ptr = tmp; + } + +/* TODO: overflow check of hcl->c->r.tvlit_count itself */ + hcl->c->r.salit.ptr[hcl->c->r.salit.size++] = b; + return 0; +} + +static int get_symbol_array_literal (hcl_t* hcl, hcl_oop_t* xlit) +{ + hcl_oop_t sa, sym; + hcl_oow_t i; + + /* if the program is not buggy, salit.size must be 0 here. */ + HCL_ASSERT (hcl, hcl->c->r.salit.size == 0); + hcl->c->r.salit.size = 0; /* i want to set it to 0 in case it's buggy */ + + HCL_ASSERT (hcl, TOKEN_TYPE(hcl) == HCL_IOTOK_VBAR); + GET_TOKEN_WITH_GOTO(hcl, oops); + + while (TOKEN_TYPE(hcl) == HCL_IOTOK_IDENT /* || TOKEN_TYPE(hcl) == HCL_IOTOK_IDENT_DOTTED */) + { + sym = hcl_makesymbol(hcl, TOKEN_NAME_PTR(hcl), TOKEN_NAME_LEN(hcl)); + if (!sym) goto oops; + + if (HCL_OBJ_GET_FLAGS_SYNCODE(sym) || HCL_OBJ_GET_FLAGS_KERNEL(sym)) + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_BANNEDVARNAME, HCL_NULL, HCL_NULL, + "special symbol not to be declared as a variable - %O", sym); /* TOOD: error location */ + goto oops; + } + + if (add_to_symbol_array_literal_buffer(hcl, sym) <= -1) goto oops; + GET_TOKEN_WITH_GOTO (hcl, oops); + } + + if (TOKEN_TYPE(hcl) != HCL_IOTOK_VBAR) + { + hcl_setsynerr (hcl, HCL_SYNERR_VBAR, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + goto oops; + } + + sa = hcl_makearray(hcl, hcl->c->r.salit.size, 0); + if (!sa) goto oops; + + for (i = 0; i < hcl->c->r.salit.size; i++) + ((hcl_oop_oop_t)sa)->slot[i] = hcl->c->r.salit.ptr[i]; + + /* switch array to symbol array. this is special-purpose. */ + HCL_OBJ_SET_FLAGS_BRAND (sa, HCL_BRAND_SYMBOL_ARRAY); + + *xlit = sa; + + hcl->c->r.salit.size = 0; /* reset literal count... */ + return 0; + +oops: + hcl->c->r.salit.size = 0; /* reset literal count... */ + return -1; +} + +static int read_object (hcl_t* hcl) +{ + /* this function read an s-expression non-recursively + * by manipulating its own stack. */ + + int level = 0, array_level = 0, flagv = 0; + hcl_oop_t obj; + hcl_cnode_t* cnode; + + while (1) + { + redo: + switch (TOKEN_TYPE(hcl)) + { + default: + hcl_setsynerr (hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + return -1; + + case HCL_IOTOK_EOF: + hcl_setsynerr (hcl, HCL_SYNERR_EOF, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + return -1; + + case HCL_IOTOK_INCLUDE: + /* TODO: should i limit where #include can be specified? + * disallow it inside a list literal or an array literal? */ + GET_TOKEN (hcl); + if (TOKEN_TYPE(hcl) != HCL_IOTOK_STRLIT) + { + hcl_setsynerr (hcl, HCL_SYNERR_STRING, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + return -1; + } + if (begin_include(hcl) <= -1) return -1; + goto redo; + + case HCL_IOTOK_LBRACK: /* [] */ + flagv = 0; + LIST_FLAG_SET_CONCODE (flagv, HCL_CONCODE_ARRAY); + goto start_list; + + case HCL_IOTOK_BAPAREN: /* #[] */ + flagv = 0; + LIST_FLAG_SET_CONCODE (flagv, HCL_CONCODE_BYTEARRAY); + goto start_list; + + case HCL_IOTOK_LBRACE: /* {} */ + flagv = 0; + LIST_FLAG_SET_CONCODE (flagv, HCL_CONCODE_DIC); + goto start_list; + + case HCL_IOTOK_QLPAREN: /* #() */ +#if 1 + hcl_setsynerr (hcl, HCL_SYNERR_ILTOK, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + return -1; +#else + flagv = 0; + LIST_FLAG_SET_CONCODE (flagv, HCL_CONCODE_QLIST); + goto start_list; +#endif + + case HCL_IOTOK_LPAREN: /* () */ + flagv = 0; + LIST_FLAG_SET_CONCODE (flagv, HCL_CONCODE_XLIST); + start_list: + if (level >= HCL_TYPE_MAX(int)) + { + /* the nesting level has become too deep */ + hcl_setsynerr (hcl, HCL_SYNERR_NESTING, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + return -1; + } + + /* push some data to simulate recursion into + * a list literal or an array literal */ + if (enter_list(hcl, flagv) == HCL_NULL) return -1; + level++; + if (LIST_FLAG_GET_CONCODE(flagv) == HCL_CONCODE_ARRAY) array_level++; + + /* read the next token */ + GET_TOKEN (hcl); + goto redo; + + case HCL_IOTOK_DOT: + if (level <= 0 || !can_dot_list(hcl)) + { + /* cannot have a period: + * 1. at the top level - not inside () + * 2. at the beginning of a list + * 3. inside an #(), #[], #{}, () */ + hcl_setsynerr (hcl, HCL_SYNERR_DOTBANNED, TOKEN_LOC(hcl), HCL_NULL); + return -1; + } + + GET_TOKEN (hcl); + goto redo; + + case HCL_IOTOK_COLON: + if (level <= 0 || !can_colon_list(hcl)) + { + hcl_setsynerr (hcl, HCL_SYNERR_COLONBANNED, TOKEN_LOC(hcl), HCL_NULL); + return -1; + } + + GET_TOKEN (hcl); + goto redo; + + case HCL_IOTOK_COMMA: + if (level <= 0 || !can_comma_list(hcl)) + { + hcl_setsynerr (hcl, HCL_SYNERR_COMMABANNED, TOKEN_LOC(hcl), HCL_NULL); + return -1; + } + + GET_TOKEN (hcl); + goto redo; + + case HCL_IOTOK_RPAREN: /* xlist (), qlist #() */ + case HCL_IOTOK_RBRACK: /* bytearray #[], array[] */ + case HCL_IOTOK_RBRACE: /* dictionary {} */ + { + static struct + { + int closer; + hcl_synerrnum_t synerr; + } req[] = + { + { HCL_IOTOK_RPAREN, HCL_SYNERR_RPAREN }, /* XLIST () */ + { HCL_IOTOK_RBRACK, HCL_SYNERR_RBRACK }, /* ARRAY [] */ + { HCL_IOTOK_RBRACK, HCL_SYNERR_RBRACK }, /* BYTEARRAY #[] */ + { HCL_IOTOK_RBRACE, HCL_SYNERR_RBRACE }, /* DIC {} */ + { HCL_IOTOK_RPAREN, HCL_SYNERR_RPAREN } /* QLIST #() */ + }; + + int oldflagv; + int concode; + + if (level <= 0) + { + hcl_setsynerr (hcl, HCL_SYNERR_UNBALPBB, TOKEN_LOC(hcl), HCL_NULL); + return -1; + } + + concode = LIST_FLAG_GET_CONCODE(flagv); + + if (req[concode].closer != TOKEN_TYPE(hcl)) + { + hcl_setsynerr (hcl, req[concode].synerr, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + return -1; + } + +#if 0 + if ((flagv & QUOTED) || level <= 0) + { + /* the right parenthesis can never appear while + * 'quoted' is true. 'quoted' is set to false when + * entering a normal list. 'quoted' is set to true + * when entering a quoted list. a quoted list does + * not have an explicit right parenthesis. + * so the right parenthesis can only pair up with + * the left parenthesis for the normal list. + * + * For example, '(1 2 3 ') 5 6) + * + * this condition is triggerred when the first ) is + * met after the second quote. + * + * also it is illegal to have the right parenthesis + * with no opening(left) parenthesis, which is + * indicated by level<=0. + */ + hcl_setsynerr (hcl, HCL_SYNERR_LPAREN, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + return -1; + } +#endif + obj = leave_list(hcl, &flagv, &oldflagv); + + level--; + if (LIST_FLAG_GET_CONCODE(oldflagv) == HCL_CONCODE_ARRAY) array_level--; + break; + } + + case HCL_IOTOK_VBAR: +/* TODO: think wheter to allow | | inside a quoted list... */ +/* TODO: revise this part ... */ + if (array_level > 0) + { + hcl_setsynerr (hcl, HCL_SYNERR_VBARBANNED, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + return -1; + } + if (get_symbol_array_literal(hcl, &obj) <= -1) return -1; + break; + + case HCL_IOTOK_NIL: + cnode = hcl_makecnodenil(hcl, TOKEN_LOC(hcl)); + if (HCL_UNLIKELY(!cnode)) return -1; + break; + + case HCL_IOTOK_TRUE: + cnode = hcl_makecnodetrue(hcl, TOKEN_LOC(hcl)); + if (HCL_UNLIKELY(!cnode)) return -1; + break; + + case HCL_IOTOK_FALSE: + cnode = hcl_makecnodefalse(hcl, TOKEN_LOC(hcl)); + if (HCL_UNLIKELY(!cnode)) return -1; + break; + + case HCL_IOTOK_SMPTRLIT: + { + hcl_oow_t i; + hcl_oow_t v = 0; + + HCL_ASSERT (hcl, TOKEN_NAME_LEN(hcl) >= 3); + for (i = 2; i < TOKEN_NAME_LEN(hcl); i++) + { + HCL_ASSERT (hcl, is_xdigitchar(TOKEN_NAME_CHAR(hcl, i))); + v = v * 16 + CHAR_TO_NUM(TOKEN_NAME_CHAR(hcl, i), 16); + } + + if (!HCL_IN_SMPTR_RANGE(v)) + { + hcl_setsynerr (hcl, HCL_SYNERR_SMPTRLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + return -1; + } + + obj = HCL_SMPTR_TO_OOP(v); + break; + } + + case HCL_IOTOK_ERRLIT: + { + hcl_oow_t i; + hcl_ooi_t v = 0; + + HCL_ASSERT (hcl, TOKEN_NAME_LEN(hcl) >= 3); + for (i = 2; i < TOKEN_NAME_LEN(hcl); i++) + { + HCL_ASSERT (hcl, is_digitchar(TOKEN_NAME_CHAR(hcl, i))); + v = v * 10 + CHAR_TO_NUM(TOKEN_NAME_CHAR(hcl, i), 10); + + if (v > HCL_ERROR_MAX) + { + hcl_setsynerr (hcl, HCL_SYNERR_ERRLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + return -1; + } + } + + cnode = hcl_makecnodeerrlit(hcl, TOKEN_LOC(hcl), v); + break; + } + + case HCL_IOTOK_CHARLIT: + cnode = hcl_makecnodecharlit(hcl, TOKEN_LOC(hcl), TOKEN_NAME_CHAR(hcl, 0)); + break; + + case HCL_IOTOK_NUMLIT: + //obj = string_to_num(hcl, TOKEN_NAME(hcl), TOKEN_TYPE(hcl) == HCL_IOTOK_RADNUMLIT); + cnode = hcl_makecnodenumlit(hcl, TOKEN_LOC(hcl), TOKEN_NAME_PTR(hcl), TOKEN_NAME_LEN(hcl)); + break; + + case HCL_IOTOK_RADNUMLIT: + //obj = string_to_num(hcl, TOKEN_NAME(hcl), TOKEN_TYPE(hcl) == HCL_IOTOK_RADNUMLIT); + cnode = hcl_makecnoderadnumlit(hcl, TOKEN_LOC(hcl), TOKEN_NAME_PTR(hcl), TOKEN_NAME_LEN(hcl)); + break; + + case HCL_IOTOK_FPDECLIT: + //obj = string_to_fpdec(hcl, TOKEN_NAME(hcl), TOKEN_LOC(hcl)); + cnode = hcl_makecnodefpdeclit(hcl, TOKEN_LOC(hcl), TOKEN_NAME_PTR(hcl), TOKEN_NAME_LEN(hcl)); + break; + + /* + case HCL_IOTOK_REAL: + obj = hcl_makerealnum(hcl, HCL_IOTOK_RVAL(hcl)); + break; + */ + + case HCL_IOTOK_STRLIT: + cnode = hcl_makecnodestrlit(hcl, TOKEN_LOC(hcl), TOKEN_NAME_PTR(hcl), TOKEN_NAME_LEN(hcl)); + break; + + case HCL_IOTOK_IDENT: + obj = hcl_makesymbol(hcl, TOKEN_NAME_PTR(hcl), TOKEN_NAME_LEN(hcl)); + break; + + case HCL_IOTOK_IDENT_DOTTED: + obj = hcl_makesymbol(hcl, TOKEN_NAME_PTR(hcl), TOKEN_NAME_LEN(hcl)); + if (obj && !hcl_getatsysdic(hcl, obj)) + { + /* query the module for information if it is the first time + * when the dotted symbol is seen */ + + hcl_pfbase_t* pfbase; + hcl_mod_t* mod; + hcl_oop_t val; + unsigned int kernel_bits; + + pfbase = hcl_querymod(hcl, TOKEN_NAME_PTR(hcl), TOKEN_NAME_LEN(hcl), &mod); + if (!pfbase) + { + /* TODO switch to syntax error */ + return -1; + } + + hcl_pushvolat (hcl, &obj); + switch (pfbase->type) + { + case HCL_PFBASE_FUNC: + kernel_bits = 2; + val = hcl_makeprim(hcl, pfbase->handler, pfbase->minargs, pfbase->maxargs, mod); + break; + + case HCL_PFBASE_VAR: + kernel_bits = 1; + val = hcl->_nil; + break; + + case HCL_PFBASE_CONST: + /* TODO: create a value from the pfbase information. it needs to get extended first + * can i make use of pfbase->handler type-cast to a differnt type? */ + kernel_bits = 2; + val = hcl->_nil; + break; + + default: + hcl_popvolat (hcl); + hcl_seterrbfmt (hcl, HCL_EINVAL, "invalid pfbase type - %d\n", pfbase->type); + return -1; + } + + if (!val || !hcl_putatsysdic(hcl, obj, val)) + { + hcl_popvolat (hcl); + return -1; + } + hcl_popvolat (hcl); + + /* make this dotted symbol special that it can't get changed + * to a different value */ + HCL_OBJ_SET_FLAGS_KERNEL (obj, kernel_bits); + } + break; + } + + if (!obj) return -1; + +#if 0 + /* check if the element is read for a quoted list */ + while (flagv & QUOTED) + { + int oldflagv; + + HCL_ASSERT (hcl, level > 0); + + /* if so, append the element read into the quote list */ + if (chain_to_list(hcl, obj) == HCL_NULL) return -1; + + /* exit out of the quoted list. the quoted list can have + * one element only. */ + obj = leave_list(hcl, &flagv, &oldflagv); + + /* one level up toward the top */ + level--; + + if (LIST_FLAG_GET_CONCODE(oldflagv) == HCL_CONCODE_ARRAY) array_level--; + } +#endif + + /* check if we are at the top level */ + if (level <= 0) break; /* yes */ + + /* if not, append the element read into the current list. + * if we are not at the top level, we must be in a list */ + if (chain_to_list(hcl, obj) == HCL_NULL) return -1; + clear_comma_colon_flag (hcl); + + /* read the next token */ + GET_TOKEN (hcl); + } + + /* upon exit, we must be at the top level */ + HCL_ASSERT (hcl, level == 0); + HCL_ASSERT (hcl, array_level == 0); + + hcl->c->r.e = obj; + return 0; +} + +static HCL_INLINE int __read (hcl_t* hcl) +{ + if (get_token(hcl) <= -1) return -1; + if (TOKEN_TYPE(hcl) == HCL_IOTOK_EOF) + { + hcl_seterrnum (hcl, HCL_EFINIS); + return -1; + } + return read_object(hcl); +} + +hcl_oop_t hcl_read (hcl_t* hcl) +{ + HCL_ASSERT (hcl, hcl->c && hcl->c->reader); + if (__read(hcl) <= -1) return HCL_NULL; + return hcl->c->r.e; +} + +/* ========================================================================= */ + +/* TODO: rename compiler to something else that can include reader, printer, and compiler + * move compiler intialization/finalization here to more common place */ + +static void gc_compiler (hcl_t* hcl) +{ + hcl_ooi_t i; + + hcl->c->r.s = hcl_moveoop (hcl, hcl->c->r.s); + hcl->c->r.e = hcl_moveoop (hcl, hcl->c->r.e); + + + for (i = 0; i <= hcl->c->cfs.top; i++) + { + hcl->c->cfs.ptr[i].operand = hcl_moveoop(hcl, hcl->c->cfs.ptr[i].operand); + } + + for (i = 0; i < hcl->c->tv.size; i++) + { + hcl->c->tv.ptr[i] = hcl_moveoop (hcl, hcl->c->tv.ptr[i]); + } + + for (i = 0; i < hcl->c->r.salit.size; i++) + { + hcl->c->r.salit.ptr[i] = hcl_moveoop (hcl, hcl->c->r.salit.ptr[i]); + } +} + +static void fini_compiler (hcl_t* hcl) +{ + /* called before the hcl object is closed */ + if (hcl->c) + { + if (hcl->c->r.balit.ptr) + { + hcl_freemem (hcl, hcl->c->r.balit.ptr); + hcl->c->r.balit.ptr = HCL_NULL; + hcl->c->r.balit.size = 0; + hcl->c->r.balit.capa = 0; + } + + if (hcl->c->r.salit.ptr) + { + hcl_freemem (hcl, hcl->c->r.salit.ptr); + hcl->c->r.salit.ptr = HCL_NULL; + hcl->c->r.salit.size = 0; + hcl->c->r.salit.capa = 0; + } + + if (hcl->c->cfs.ptr) + { + hcl_freemem (hcl, hcl->c->cfs.ptr); + hcl->c->cfs.ptr = HCL_NULL; + hcl->c->cfs.top = -1; + hcl->c->cfs.capa = 0; + } + + if (hcl->c->tv.ptr) + { + hcl_freemem (hcl, hcl->c->tv.ptr); + hcl->c->tv.ptr = HCL_NULL; + hcl->c->tv.size = 0; + hcl->c->tv.capa = 0; + } + + if (hcl->c->blk.info) + { + hcl_freemem (hcl, hcl->c->blk.info); + hcl->c->blk.info = HCL_NULL; + hcl->c->blk.info_capa = 0; + hcl->c->blk.depth = -1; + } + + clear_io_names (hcl); + if (hcl->c->tok.name.ptr) hcl_freemem (hcl, hcl->c->tok.name.ptr); + + hcl_detachio (hcl); + + hcl_freemem (hcl, hcl->c); + hcl->c = HCL_NULL; + } +} + +int hcl_attachio (hcl_t* hcl, hcl_ioimpl_t reader, hcl_ioimpl_t printer) +{ + int n; + hcl_cb_t* cbp = HCL_NULL; + + if (!reader || !printer) + { + hcl_seterrbfmt (hcl, HCL_EINVAL, "reader and/or printer not supplied"); + return -1; + } + + if (!hcl->c) + { + hcl_cb_t cb; + + HCL_MEMSET (&cb, 0, HCL_SIZEOF(cb)); + cb.gc = gc_compiler; + cb.fini = fini_compiler; + cbp = hcl_regcb(hcl, &cb); + if (!cbp) return -1; + + hcl->c = (hcl_compiler_t*)hcl_callocmem(hcl, HCL_SIZEOF(*hcl->c)); + if (!hcl->c) + { + hcl_deregcb (hcl, cbp); + return -1; + } + + hcl->c->ilchr_ucs.ptr = &hcl->c->ilchr; + hcl->c->ilchr_ucs.len = 1; + + hcl->c->r.s = hcl->_nil; + hcl->c->r.e = hcl->_nil; + + hcl->c->cfs.top = -1; + hcl->c->blk.depth = -1; + } + else if (hcl->c->reader || hcl->c->printer) + { + hcl_seterrnum (hcl, HCL_EPERM); /* TODO: change this error code */ + return -1; + } + + /* Some IO names could have been stored in earlier calls to this function. + * I clear such names before i begin this function. i don't clear it + * at the end of this function because i may be referenced as an error + * location */ + clear_io_names (hcl); + + /* initialize some key fields */ + hcl->c->printer = printer; + hcl->c->reader = reader; + hcl->c->nungots = 0; + + /* The name field and the includer field are HCL_NULL + * for the main stream */ + HCL_MEMSET (&hcl->c->inarg, 0, HCL_SIZEOF(hcl->c->inarg)); + hcl->c->inarg.line = 1; + hcl->c->inarg.colm = 1; + + /* open the top-level stream */ + n = hcl->c->reader(hcl, HCL_IO_OPEN, &hcl->c->inarg); + if (n <= -1) goto oops; + + HCL_MEMSET (&hcl->c->outarg, 0, HCL_SIZEOF(hcl->c->outarg)); + n = hcl->c->printer(hcl, HCL_IO_OPEN, &hcl->c->outarg); + if (n <= -1) + { + hcl->c->reader (hcl, HCL_IO_CLOSE, &hcl->c->inarg); + goto oops; + } + + /* the stream is open. set it as the current input stream */ + hcl->c->curinp = &hcl->c->inarg; + return 0; + +oops: + if (cbp) + { + hcl_deregcb (hcl, cbp); + hcl_freemem (hcl, hcl->c); + hcl->c = HCL_NULL; + } + else + { + hcl->c->printer = HCL_NULL; + hcl->c->reader = HCL_NULL; + } + return -1; +} + +void hcl_flushio (hcl_t* hcl) +{ + if (hcl->c) + { + if (hcl->c->printer) hcl->c->printer (hcl, HCL_IO_FLUSH, &hcl->c->outarg); + } +} + +void hcl_detachio (hcl_t* hcl) +{ + /* an error occurred and control has reached here + * probably, some included files might not have been + * closed. close them */ + + if (hcl->c) + { + if (hcl->c->reader) + { + while (hcl->c->curinp != &hcl->c->inarg) + { + hcl_ioinarg_t* prev; + + /* nothing much to do about a close error */ + hcl->c->reader (hcl, HCL_IO_CLOSE, hcl->c->curinp); + + prev = hcl->c->curinp->includer; + HCL_ASSERT (hcl, hcl->c->curinp->name != HCL_NULL); + hcl_freemem (hcl, hcl->c->curinp); + hcl->c->curinp = prev; + } + + hcl->c->reader (hcl, HCL_IO_CLOSE, hcl->c->curinp); + hcl->c->reader = HCL_NULL; /* ready for another attachment */ + } + + if (hcl->c->printer) + { + hcl->c->printer (hcl, HCL_IO_CLOSE, &hcl->c->outarg); + hcl->c->printer = HCL_NULL; /* ready for another attachment */ + } + } +} + +hcl_iolxc_t* hcl_readchar (hcl_t* hcl) +{ + int n = get_char(hcl); + if (n <= -1) return HCL_NULL; + return &hcl->c->lxc; +} + +int hcl_unreadchar (hcl_t* hcl, const hcl_iolxc_t* c) +{ + if (hcl->c->nungots >= HCL_COUNTOF(hcl->c->ungot)) + { + hcl_seterrbfmt (hcl, HCL_EBUFFULL, "character unread buffer full"); + return -1; + } + + unget_char (hcl, c); + return 0; +}