2021-01-09 16:53:47 +00:00
/*
Copyright ( c ) 2016 - 2018 Chung , Hyung - Hwan . All rights reserved .
Redistribution and use in source and binary forms , with or without
modification , are permitted provided that the following conditions
are met :
1. Redistributions of source code must retain the above copyright
notice , this list of conditions and the following disclaimer .
2. Redistributions in binary form must reproduce the above copyright
notice , this list of conditions and the following disclaimer in the
documentation and / or other materials provided with the distribution .
THIS SOFTWARE IS PROVIDED BY THE AUTHOR " AS IS " AND ANY EXPRESS OR
2021-01-12 00:21:43 +00:00
IMPLIED WARRANTIES , INCLUDING , BUT NOT LIMITED TO , THE IMPLIED WARRANTIES
2021-01-09 16:53:47 +00:00
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED .
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT , INDIRECT ,
INCIDENTAL , SPECIAL , EXEMPLARY , OR CONSEQUENTIAL DAMAGES ( INCLUDING , BUT
NOT LIMITED TO , PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE ,
DATA , OR PROFITS ; OR BUSINESS INTERRUPTION ) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY , WHETHER IN CONTRACT , STRICT LIABILITY , OR TORT
( INCLUDING NEGLIGENCE OR OTHERWISE ) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE , EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE .
*/
# include "hcl-prv.h"
# define BUFFER_ALIGN 128
# define BALIT_BUFFER_ALIGN 128
# define SALIT_BUFFER_ALIGN 128
# define ARLIT_BUFFER_ALIGN 128
static struct voca_t
{
hcl_oow_t len ;
hcl_ooch_t str [ 11 ] ;
} vocas [ ] =
{
{ 8 , { ' # ' , ' i ' , ' n ' , ' c ' , ' l ' , ' u ' , ' d ' , ' e ' } } ,
2022-07-24 00:49:03 +00:00
{ 7 , { ' # ' , ' p ' , ' r ' , ' a ' , ' g ' , ' m ' , ' a ' } } ,
2023-11-12 12:54:17 +00:00
2021-01-09 16:53:47 +00:00
{ 11 , { ' # ' , ' \\ ' , ' b ' , ' a ' , ' c ' , ' k ' , ' s ' , ' p ' , ' a ' , ' c ' , ' e ' } } ,
{ 10 , { ' # ' , ' \\ ' , ' l ' , ' i ' , ' n ' , ' e ' , ' f ' , ' e ' , ' e ' , ' d ' } } ,
{ 9 , { ' # ' , ' \\ ' , ' n ' , ' e ' , ' w ' , ' l ' , ' i ' , ' n ' , ' e ' } } ,
{ 5 , { ' # ' , ' \\ ' , ' n ' , ' u ' , ' l ' } } ,
{ 6 , { ' # ' , ' \\ ' , ' p ' , ' a ' , ' g ' , ' e ' } } ,
{ 8 , { ' # ' , ' \\ ' , ' r ' , ' e ' , ' t ' , ' u ' , ' r ' , ' n ' } } ,
{ 8 , { ' # ' , ' \\ ' , ' r ' , ' u ' , ' b ' , ' o ' , ' u ' , ' t ' } } ,
{ 7 , { ' # ' , ' \\ ' , ' s ' , ' p ' , ' a ' , ' c ' , ' e ' } } ,
{ 5 , { ' # ' , ' \\ ' , ' t ' , ' a ' , ' b ' } } ,
{ 6 , { ' # ' , ' \\ ' , ' v ' , ' t ' , ' a ' , ' b ' } } ,
2023-11-12 12:54:17 +00:00
{ 4 , { ' n ' , ' u ' , ' l ' , ' l ' } } ,
{ 4 , { ' t ' , ' r ' , ' u ' , ' e ' } } ,
{ 5 , { ' f ' , ' a ' , ' l ' , ' s ' , ' e ' } } ,
{ 4 , { ' s ' , ' e ' , ' l ' , ' f ' } } ,
{ 5 , { ' s ' , ' u ' , ' p ' , ' e ' , ' r ' } } ,
2024-01-07 15:38:42 +00:00
{ 3 , { ' s ' , ' e ' , ' t ' } } ,
2024-01-13 05:37:59 +00:00
{ 5 , { ' s ' , ' e ' , ' t ' , ' - ' , ' r ' } } ,
2024-01-07 15:38:42 +00:00
2024-01-06 09:11:39 +00:00
{ 3 , { ' ( ' , ' ' , ' ) ' /* XLIST */ } } ,
{ 4 , { ' ( ' , ' : ' , ' ' , ' ) ' /* MLIST */ } } ,
2024-01-13 05:37:59 +00:00
{ 3 , { ' ( ' , ' : ' , ' = ' , ' ) ' /* ALIST */ } } ,
2024-01-06 09:11:39 +00:00
{ 3 , { ' { ' , ' ' , ' } ' /* BLOCK */ } } ,
2024-01-27 15:03:40 +00:00
{ 4 , { ' # ' , ' [ ' , ' ' , ' ] ' /* ARRAY */ } } ,
{ 5 , { ' # ' , ' b ' , ' [ ' , ' ' , ' ] ' /* BYTE ARRAY */ } } ,
2024-02-02 05:57:46 +00:00
{ 5 , { ' # ' , ' c ' , ' [ ' , ' ' , ' ] ' /* CHAR ARRAY */ } } ,
2023-11-12 12:54:17 +00:00
{ 4 , { ' # ' , ' { ' , ' ' , ' } ' } } ,
{ 4 , { ' # ' , ' ( ' , ' ' , ' ) ' } } ,
{ 3 , { ' | ' , ' ' , ' | ' } } ,
2021-01-09 16:53:47 +00:00
{ 5 , { ' < ' , ' E ' , ' O ' , ' L ' , ' > ' } } ,
{ 5 , { ' < ' , ' E ' , ' O ' , ' F ' , ' > ' } }
} ;
enum voca_id_t
{
VOCA_INCLUDE ,
2022-07-24 00:49:03 +00:00
VOCA_PRAGMA ,
2023-11-12 12:54:17 +00:00
VOCA_CHAR_BACKSPACE ,
VOCA_CHAR_LINEFEED ,
VOCA_CHAR_NEWLINE ,
VOCA_CHAR_NUL ,
VOCA_CHAR_PAGE ,
VOCA_CHAR_RETURN ,
VOCA_CHAR_RUBOUT ,
VOCA_CHAR_SPACE ,
VOCA_CHAR_TAB ,
VOCA_CHAR_VTAB ,
VOCA_KW_NULL ,
VOCA_KW_TRUE ,
VOCA_KW_FALSE ,
VOCA_KW_SELF ,
VOCA_KW_SUPER ,
2024-01-07 15:38:42 +00:00
VOCA_SYM_SET ,
2024-01-13 05:37:59 +00:00
VOCA_SYM_SET_R ,
2024-01-07 15:38:42 +00:00
2023-11-12 12:54:17 +00:00
VOCA_XLIST ,
VOCA_MLIST ,
2024-01-13 05:37:59 +00:00
VOCA_ALIST , /* assignment list */
2023-11-12 12:54:17 +00:00
VOCA_BLOCK ,
VOCA_ARRAY ,
VOCA_BYTEARRAY ,
2024-02-02 05:57:46 +00:00
VOCA_CHARARRAY ,
2023-11-12 12:54:17 +00:00
VOCA_DIC ,
VOCA_QLIST ,
VOCA_VLIST ,
2021-01-09 16:53:47 +00:00
VOCA_EOL ,
VOCA_EOF
} ;
typedef enum voca_id_t voca_id_t ;
enum list_flag_t
{
2023-11-10 09:06:49 +00:00
QUOTED = ( 1 < < 0 ) ,
DOTTED = ( 1 < < 1 ) ,
COMMAED = ( 1 < < 2 ) ,
COLONED = ( 1 < < 3 ) ,
2024-01-06 04:15:15 +00:00
COLONEQED = ( 1 < < 4 ) ,
CLOSED = ( 1 < < 5 ) ,
JSON = ( 1 < < 6 ) ,
DATA_LIST = ( 1 < < 7 ) ,
AUTO_FORGED = ( 1 < < 8 ) , /* automatically added list. only applicable to XLIST */
AT_BEGINNING = ( 1 < < 9 )
2023-11-11 15:31:34 +00:00
2023-11-10 09:06:49 +00:00
/* TOTOAL 12 items are allowed for LIST_FLAG_GET_CONCODE and LIST_FLAG_SET_CONCODE().
* they reserve lower 12 bits as flag bits . */
2021-01-09 16:53:47 +00:00
} ;
2023-11-10 09:06:49 +00:00
# define LIST_FLAG_GET_CONCODE(x) (((x) >> 12) & 0x0FFF)
# define LIST_FLAG_SET_CONCODE(x,type) ((x) = ((x) & ~0xFF000) | ((type) << 12))
2021-01-09 16:53:47 +00:00
2023-11-12 12:54:17 +00:00
static struct
{
int closer ;
hcl_synerrnum_t synerr ;
int voca_id ;
} cons_info [ ] =
{
/*[HCL_CONCODE_XLIST] =*/ { HCL_TOK_RPAREN , HCL_SYNERR_RPAREN , VOCA_XLIST } , /* XLIST ( ) */
2024-01-06 04:15:15 +00:00
/*[HCL_CONCODE_MLIST] =*/ { HCL_TOK_RPAREN , HCL_SYNERR_RPAREN , VOCA_MLIST } , /* MLIST (obj:message) */
/*[HCL_CONCODE_ALIST] =*/ { HCL_TOK_RPAREN , HCL_SYNERR_RPAREN , VOCA_ALIST } , /* ALIST (var:=value) */
2023-11-12 12:54:17 +00:00
/*[HCL_CONCODE_BLOCK] =*/ { HCL_TOK_RBRACE , HCL_SYNERR_RBRACE , VOCA_BLOCK } , /* BLOCK { } */
2024-02-02 05:57:46 +00:00
/*[HCL_CONCODE_ARRAY] =*/ { HCL_TOK_RBRACK , HCL_SYNERR_RBRACK , VOCA_ARRAY } , /* ARRAY #[ ] */
/*[HCL_CONCODE_BYTEARRAY] =*/ { HCL_TOK_RBRACK , HCL_SYNERR_RBRACK , VOCA_BYTEARRAY } , /* BYTEARRAY #b[ ] */
/*[HCL_CONCODE_CHARARRAY] =*/ { HCL_TOK_RBRACK , HCL_SYNERR_RBRACK , VOCA_CHARARRAY } , /* CHARARRAY #c[ ] */
2023-11-12 12:54:17 +00:00
/*[HCL_CONCODE_DIC] =*/ { HCL_TOK_RBRACE , HCL_SYNERR_RBRACE , VOCA_DIC } , /* DIC #{ } */
/*[HCL_CONCODE_QLIST] =*/ { HCL_TOK_RPAREN , HCL_SYNERR_RPAREN , VOCA_QLIST } , /* QLIST #( ) */
/* VLIST's closer and synerr are not used. there is dedicated logic in feed_process_token(). only voca_id is used */
/*[HCL_CONCODE_VLIST] =*/ { HCL_TOK_VBAR , HCL_SYNERR_VBAR , VOCA_VLIST } /* VLIST | | */
} ;
2023-05-18 01:24:01 +00:00
static int init_compiler ( hcl_t * hcl ) ;
2021-01-09 16:53:47 +00:00
static HCL_INLINE int is_spacechar ( hcl_ooci_t c )
{
/* TODO: handle other space unicode characters */
switch ( c )
{
case ' ' :
case ' \f ' : /* formfeed */
case ' \n ' : /* linefeed */
case ' \r ' : /* carriage return */
case ' \t ' : /* horizon tab */
case ' \v ' : /* vertical tab */
return 1 ;
default :
return 0 ;
}
}
2022-05-25 14:23:43 +00:00
static HCL_INLINE int is_linebreak ( hcl_ooci_t c )
{
/* TODO: different line end conventions? */
2024-01-19 04:25:23 +00:00
return c = = ' \n ' ; /* make sure this is one of the space chars in is_spacechar() */
2022-05-25 14:23:43 +00:00
}
2021-01-09 16:53:47 +00:00
static HCL_INLINE int is_digitchar ( hcl_ooci_t c )
{
/* TODO: support full unicode */
return ( c > = ' 0 ' & & c < = ' 9 ' ) ;
}
static HCL_INLINE int is_xdigitchar ( hcl_ooci_t c )
{
/* TODO: support full unicode */
return ( c > = ' 0 ' & & c < = ' 9 ' ) | | ( c > = ' A ' & & c < = ' F ' ) | | ( c > = ' a ' & & c < = ' f ' ) ;
}
2024-02-04 12:35:31 +00:00
#if 0
static HCL_INLINE int is_alphachar ( hcl_ooci_t c )
{
/* TODO: support full unicode */
return ( c > = ' a ' & & c < = ' z ' ) | | ( c > = ' A ' & & c < = ' Z ' ) ;
}
2021-01-09 16:53:47 +00:00
static HCL_INLINE int is_alnumchar ( hcl_ooci_t c )
{
/* TODO: support full unicode */
return ( c > = ' a ' & & c < = ' z ' ) | | ( c > = ' A ' & & c < = ' Z ' ) | | ( c > = ' 0 ' & & c < = ' 9 ' ) ;
}
2024-02-04 12:35:31 +00:00
# endif
2021-01-09 16:53:47 +00:00
2022-05-25 14:23:43 +00:00
static HCL_INLINE int is_delimchar ( hcl_ooci_t c )
2021-01-09 16:53:47 +00:00
{
return c = = ' ( ' | | c = = ' ) ' | | c = = ' [ ' | | c = = ' ] ' | | c = = ' { ' | | c = = ' } ' | |
2023-12-01 12:49:28 +00:00
c = = ' | ' | | c = = ' , ' | | c = = ' . ' | | c = = ' : ' | | c = = ' ; ' | |
2022-05-25 14:23:43 +00:00
/* the first characters of tokens in delim_token_tab up to this point */
2024-02-03 16:57:53 +00:00
c = = ' # ' | | c = = ' \" ' | | c = = ' \' ' | | c = = ' \\ ' | | is_spacechar ( c ) | | c = = HCL_OOCI_EOF ;
2021-01-09 16:53:47 +00:00
}
2024-02-04 12:35:31 +00:00
static HCL_INLINE int is_binopchar ( hcl_ooci_t c )
{
return c = = ' & ' | | c = = ' * ' | | c = = ' + ' | | c = = ' - ' | | c = = ' / ' | | c = = ' % ' | |
c = = ' < ' | | c = = ' > ' | | c = = ' = ' | | c = = ' @ ' | | c = = ' | ' | | c = = ' ~ ' ;
}
2024-01-06 09:11:39 +00:00
/* TODO: remove this use the one in comp.c */
2021-01-09 16:53:47 +00:00
static int copy_string_to ( hcl_t * hcl , const hcl_oocs_t * src , hcl_oocs_t * dst , hcl_oow_t * dst_capa , int append , hcl_ooch_t add_delim )
{
hcl_oow_t len , pos ;
if ( append )
{
pos = dst - > len ;
len = dst - > len + src - > len ;
if ( add_delim ! = ' \0 ' ) len + + ;
}
else
{
pos = 0 ;
len = src - > len ;
}
if ( len > * dst_capa )
{
hcl_ooch_t * tmp ;
hcl_oow_t capa ;
capa = HCL_ALIGN ( len , BUFFER_ALIGN ) ;
tmp = ( hcl_ooch_t * ) hcl_reallocmem ( hcl , dst - > ptr , HCL_SIZEOF ( * tmp ) * capa ) ;
2024-01-06 09:11:39 +00:00
if ( HCL_UNLIKELY ( ! tmp ) )
{
const hcl_ooch_t * orgmsg = hcl_backuperrmsg ( hcl ) ;
2024-02-03 04:36:05 +00:00
hcl_seterrbfmt ( hcl , HCL_ERRNUM ( hcl ) , " failed to grow token buffer - %js " , orgmsg ) ;
2024-01-06 09:11:39 +00:00
return - 1 ;
}
2021-01-09 16:53:47 +00:00
dst - > ptr = tmp ;
* dst_capa = capa ;
}
if ( append & & add_delim ) dst - > ptr [ pos + + ] = add_delim ;
hcl_copy_oochars ( & dst - > ptr [ pos ] , src - > ptr , src - > len ) ;
dst - > len = len ;
return 0 ;
}
# define GET_CHAR(hcl) \
do { if ( get_char ( hcl ) < = - 1 ) return - 1 ; } while ( 0 )
# define GET_CHAR_TO(hcl,c) \
do { \
if ( get_char ( hcl ) < = - 1 ) return - 1 ; \
c = ( hcl ) - > c - > lxc . c ; \
} while ( 0 )
# define ADD_TOKEN_STR(hcl,s,l) \
do { if ( add_token_str ( hcl , s , l ) < = - 1 ) return - 1 ; } while ( 0 )
# define ADD_TOKEN_CHAR(hcl,c) \
do { if ( add_token_char ( hcl , c ) < = - 1 ) return - 1 ; } while ( 0 )
# define CLEAR_TOKEN_NAME(hcl) ((hcl)->c->tok.name.len = 0)
# define SET_TOKEN_TYPE(hcl,tv) ((hcl)->c->tok.type = (tv))
2022-07-22 08:02:14 +00:00
# define SET_TOKEN_LOC(hcl,locv) ((hcl)->c->tok.loc = *(locv))
2021-01-09 16:53:47 +00:00
# define TOKEN_TYPE(hcl) ((hcl)->c->tok.type)
# define TOKEN_NAME(hcl) (&(hcl)->c->tok.name)
# define TOKEN_NAME_CAPA(hcl) ((hcl)->c->tok.name_capa)
# define TOKEN_NAME_LEN(hcl) ((hcl)->c->tok.name.len)
# define TOKEN_NAME_PTR(hcl) ((hcl)->c->tok.name.ptr)
# define TOKEN_NAME_CHAR(hcl,index) ((hcl)->c->tok.name.ptr[index])
# define TOKEN_LOC(hcl) (&(hcl)->c->tok.loc)
# define LEXER_LOC(hcl) (&(hcl)->c->lxc.l)
static HCL_INLINE int add_token_str ( hcl_t * hcl , const hcl_ooch_t * ptr , hcl_oow_t len )
{
hcl_oocs_t tmp ;
tmp . ptr = ( hcl_ooch_t * ) ptr ;
tmp . len = len ;
return copy_string_to ( hcl , & tmp , TOKEN_NAME ( hcl ) , & TOKEN_NAME_CAPA ( hcl ) , 1 , ' \0 ' ) ;
}
static HCL_INLINE int does_token_name_match ( hcl_t * hcl , voca_id_t id )
{
return hcl - > c - > tok . name . len = = vocas [ id ] . len & &
hcl_equal_oochars ( hcl - > c - > tok . name . ptr , vocas [ id ] . str , vocas [ id ] . len ) ;
}
static HCL_INLINE int add_token_char ( hcl_t * hcl , hcl_ooch_t c )
{
hcl_oocs_t tmp ;
tmp . ptr = & c ;
tmp . len = 1 ;
2021-01-15 09:12:28 +00:00
return copy_string_to ( hcl , & tmp , TOKEN_NAME ( hcl ) , & TOKEN_NAME_CAPA ( hcl ) , 1 , ' \0 ' ) ;
2021-01-09 16:53:47 +00:00
}
2023-11-04 13:58:31 +00:00
static HCL_INLINE void unget_char ( hcl_t * hcl , const hcl_lxc_t * c )
2021-01-09 16:53:47 +00:00
{
/* Make sure that the unget buffer is large enough */
HCL_ASSERT ( hcl , hcl - > c - > nungots < HCL_COUNTOF ( hcl - > c - > ungot ) ) ;
hcl - > c - > ungot [ hcl - > c - > nungots + + ] = * c ;
}
2023-11-04 13:58:31 +00:00
static int get_directive_token_type ( hcl_t * hcl , hcl_tok_type_t * tok_type )
2022-07-24 00:49:03 +00:00
{
2023-05-18 01:24:01 +00:00
if ( does_token_name_match ( hcl , VOCA_INCLUDE ) )
2022-07-24 00:49:03 +00:00
{
2023-11-07 10:19:06 +00:00
* tok_type = HCL_TOK_INCLUDE ;
2022-07-24 00:49:03 +00:00
return 0 ;
}
2023-05-18 01:24:01 +00:00
else if ( does_token_name_match ( hcl , VOCA_PRAGMA ) )
2022-07-24 00:49:03 +00:00
{
2023-11-07 10:19:06 +00:00
* tok_type = HCL_TOK_PRAGMA ;
2022-07-24 00:49:03 +00:00
return 0 ;
}
return - 1 ;
}
2023-11-05 13:31:33 +00:00
static int _get_char ( hcl_t * hcl , hcl_io_cciarg_t * inp )
2021-01-09 16:53:47 +00:00
{
hcl_ooci_t lc ;
2022-08-02 13:41:13 +00:00
if ( inp - > b . pos > = inp - > b . len )
2021-01-09 16:53:47 +00:00
{
2023-11-05 13:31:33 +00:00
if ( hcl - > c - > cci_rdr ( hcl , HCL_IO_READ , inp ) < = - 1 ) return - 1 ;
2021-01-09 16:53:47 +00:00
2022-08-02 13:41:13 +00:00
if ( inp - > xlen < = 0 )
2021-01-09 16:53:47 +00:00
{
2022-08-02 13:41:13 +00:00
inp - > lxc . c = HCL_OOCI_EOF ;
inp - > lxc . l . line = inp - > line ;
inp - > lxc . l . colm = inp - > colm ;
inp - > lxc . l . file = inp - > name ;
2021-01-09 16:53:47 +00:00
/* indicate that EOF has been read. lxc.c is also set to EOF. */
return 0 ;
}
2022-08-02 13:41:13 +00:00
inp - > b . pos = 0 ;
inp - > b . len = inp - > xlen ;
2021-01-09 16:53:47 +00:00
}
2022-08-02 13:41:13 +00:00
if ( inp - > lxc . c = = ' \n ' | | inp - > lxc . c = = ' \r ' )
2021-01-09 16:53:47 +00:00
{
2022-08-02 13:41:13 +00:00
/* inp->lxc.c is a previous character. the new character
* to be read is still in the buffer ( inp - > buf ) .
2021-01-09 16:53:47 +00:00
* hcl - > cu - > curinp - > colm has been incremented when the previous
* character has been read . */
2022-08-02 13:41:13 +00:00
if ( inp - > line > 1 & & inp - > colm = = 2 & & inp - > nl ! = inp - > lxc . c )
2021-01-09 16:53:47 +00:00
{
/* most likely, it's the second character in '\r\n' or '\n\r'
* sequence . let ' s not update the line and column number . */
2022-08-02 13:41:13 +00:00
/*inp->colm = 1;*/
2021-01-09 16:53:47 +00:00
}
else
{
/* if the previous charater was a newline,
* increment the line counter and reset column to 1.
* incrementing the line number here instead of
* updating inp - > lxc causes the line number for
* TOK_EOF to be the same line as the lxc newline . */
2022-08-02 13:41:13 +00:00
inp - > line + + ;
inp - > colm = 1 ;
inp - > nl = inp - > lxc . c ;
2021-01-09 16:53:47 +00:00
}
}
2022-08-02 13:41:13 +00:00
lc = inp - > buf [ inp - > b . pos + + ] ;
2021-01-09 16:53:47 +00:00
2022-08-02 13:41:13 +00:00
inp - > lxc . c = lc ;
inp - > lxc . l . line = inp - > line ;
inp - > lxc . l . colm = inp - > colm + + ;
inp - > lxc . l . file = inp - > name ;
2021-01-09 16:53:47 +00:00
return 1 ; /* indicate that a normal character has been read */
}
2022-08-02 13:41:13 +00:00
static int get_char ( hcl_t * hcl )
{
int n ;
if ( hcl - > c - > nungots > 0 )
{
/* something in the unget buffer */
hcl - > c - > lxc = hcl - > c - > ungot [ - - hcl - > c - > nungots ] ;
return 0 ;
}
n = _get_char ( hcl , hcl - > c - > curinp ) ;
if ( n > = 0 ) hcl - > c - > lxc = hcl - > c - > curinp - > lxc ;
return n ;
}
2023-11-04 13:58:31 +00:00
static hcl_tok_type_t classify_ident_token ( hcl_t * hcl , const hcl_oocs_t * v )
2021-01-09 16:53:47 +00:00
{
hcl_oow_t i ;
2023-11-12 12:54:17 +00:00
static struct
2021-01-09 16:53:47 +00:00
{
2023-11-12 12:54:17 +00:00
int voca_id ;
2023-11-04 13:58:31 +00:00
hcl_tok_type_t type ;
2021-01-09 16:53:47 +00:00
} tab [ ] =
{
2023-11-12 12:54:17 +00:00
{ VOCA_KW_NULL , HCL_TOK_NIL } ,
{ VOCA_KW_TRUE , HCL_TOK_TRUE } ,
{ VOCA_KW_FALSE , HCL_TOK_FALSE } ,
{ VOCA_KW_SELF , HCL_TOK_SELF } ,
{ VOCA_KW_SUPER , HCL_TOK_SUPER }
2021-01-09 16:53:47 +00:00
} ;
for ( i = 0 ; i < HCL_COUNTOF ( tab ) ; i + + )
{
2023-11-12 12:54:17 +00:00
int vid = tab [ i ] . voca_id ;
if ( hcl_comp_oochars ( v - > ptr , v - > len , vocas [ vid ] . str , vocas [ vid ] . len ) = = 0 ) return tab [ i ] . type ;
2021-01-09 16:53:47 +00:00
}
2023-11-07 10:19:06 +00:00
return HCL_TOK_IDENT ;
2021-01-09 16:53:47 +00:00
}
2023-05-19 03:55:08 +00:00
static int is_sr_name_in_use ( hcl_t * hcl , const hcl_ooch_t * sr_name )
{
/* [NOTE]
* this is very error prone . if there are changes in refernece
* points of this sr_name in the source code , this function also
* must be modifed . */
2023-11-05 13:31:33 +00:00
hcl_io_cciarg_t * cur ;
2023-05-19 03:55:08 +00:00
if ( hcl - > c - > synerr . loc . file = = sr_name ) return 1 ;
cur = hcl - > c - > curinp ;
while ( cur )
{
if ( cur - > lxc . l . file = = sr_name ) return 1 ;
cur = cur - > includer ;
}
return 0 ;
}
static void clear_sr_names ( hcl_t * hcl )
2021-01-09 16:53:47 +00:00
{
2023-11-04 13:58:31 +00:00
hcl_link_t * cur ;
2021-01-09 16:53:47 +00:00
HCL_ASSERT ( hcl , hcl - > c ! = HCL_NULL ) ;
2023-05-19 03:55:08 +00:00
while ( hcl - > c - > sr_names )
2021-01-09 16:53:47 +00:00
{
2023-05-19 03:55:08 +00:00
cur = hcl - > c - > sr_names ;
hcl - > c - > sr_names = cur - > link ;
2021-01-09 16:53:47 +00:00
hcl_freemem ( hcl , cur ) ;
}
}
2023-05-19 03:55:08 +00:00
static const hcl_ooch_t * add_sr_name ( hcl_t * hcl , const hcl_oocs_t * name )
2021-01-09 16:53:47 +00:00
{
2023-11-04 13:58:31 +00:00
hcl_link_t * link ;
2023-05-19 03:55:08 +00:00
hcl_ooch_t * nptr ;
/* TODO: make search faster */
link = hcl - > c - > sr_names ;
while ( link )
{
nptr = ( hcl_ooch_t * ) ( link + 1 ) ;
if ( hcl_comp_oochars_oocstr ( name - > ptr , name - > len , nptr ) = = 0 ) return nptr ;
link = link - > link ;
}
2021-01-09 16:53:47 +00:00
2024-01-06 09:11:39 +00:00
link = ( hcl_link_t * ) hcl_callocmem ( hcl , HCL_SIZEOF ( * link ) + HCL_SIZEOF ( hcl_ooch_t ) * ( name - > len + 1 ) ) ;
if ( HCL_UNLIKELY ( ! link ) )
{
const hcl_ooch_t * orgmsg = hcl_backuperrmsg ( hcl ) ;
2024-02-03 04:36:05 +00:00
hcl_seterrbfmt ( hcl , HCL_ERRNUM ( hcl ) , " failed to source name [%.*js] - %js " , name - > len , name - > ptr , orgmsg ) ;
2024-01-06 09:11:39 +00:00
return HCL_NULL ;
}
2021-01-09 16:53:47 +00:00
2023-05-19 03:55:08 +00:00
nptr = ( hcl_ooch_t * ) ( link + 1 ) ;
2021-01-09 16:53:47 +00:00
2023-05-19 03:55:08 +00:00
hcl_copy_oochars ( nptr , name - > ptr , name - > len ) ;
nptr [ name - > len ] = ' \0 ' ;
2021-01-09 16:53:47 +00:00
2023-05-19 03:55:08 +00:00
link - > link = hcl - > c - > sr_names ;
hcl - > c - > sr_names = link ;
2021-01-09 16:53:47 +00:00
2023-05-19 03:55:08 +00:00
return nptr ;
2021-01-09 16:53:47 +00:00
}
/* -------------------------------------------------------------------------- */
2023-11-04 13:58:31 +00:00
static HCL_INLINE int enter_list ( hcl_t * hcl , const hcl_loc_t * loc , int flagv )
2021-01-09 16:53:47 +00:00
{
2021-01-13 09:54:44 +00:00
hcl_rstl_t * rstl ;
2023-11-27 16:44:13 +00:00
rstl = ( hcl_rstl_t * ) hcl_callocmem ( hcl , HCL_SIZEOF ( * rstl ) ) ;
2024-01-06 09:11:39 +00:00
if ( HCL_UNLIKELY ( ! rstl ) )
{
const hcl_ooch_t * orgmsg = hcl_backuperrmsg ( hcl ) ;
2024-02-03 04:36:05 +00:00
hcl_seterrbfmt ( hcl , HCL_ERRNUM ( hcl ) , " failed to allocate reader stack node - %js " , orgmsg ) ;
2024-01-06 09:11:39 +00:00
return - 1 ;
}
2021-01-13 09:54:44 +00:00
rstl - > loc = * loc ;
rstl - > flagv = flagv ;
rstl - > prev = hcl - > c - > r . st ; /* push */
hcl - > c - > r . st = rstl ;
return 0 ;
2021-01-09 16:53:47 +00:00
}
2023-11-12 12:54:17 +00:00
static HCL_INLINE hcl_cnode_t * leave_list ( hcl_t * hcl , hcl_loc_t * list_loc , int * flagv , int * oldflagv )
2021-01-09 16:53:47 +00:00
{
2021-01-13 09:54:44 +00:00
hcl_rstl_t * rstl ;
2022-07-31 04:18:09 +00:00
hcl_cnode_t * head ;
2023-11-04 13:58:31 +00:00
hcl_loc_t loc ;
2021-01-09 16:53:47 +00:00
int fv , concode ;
/* the stack must not be empty - cannot leave a list without entering it */
2021-01-13 09:54:44 +00:00
HCL_ASSERT ( hcl , hcl - > c - > r . st ! = HCL_NULL ) ;
rstl = hcl - > c - > r . st ; /* get the stack top */
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
head = rstl - > head ;
fv = rstl - > flagv ;
loc = rstl - > loc ;
2021-01-09 16:53:47 +00:00
concode = LIST_FLAG_GET_CONCODE ( fv ) ;
2021-01-13 09:54:44 +00:00
hcl - > c - > r . st = rstl - > prev ; /* pop off */
hcl_freemem ( hcl , rstl ) ; /* dispose of the stack node */
2021-01-09 16:53:47 +00:00
2024-01-06 04:15:15 +00:00
if ( fv & ( COMMAED | COLONED | COLONEQED ) )
2021-01-09 16:53:47 +00:00
{
2024-01-03 12:11:23 +00:00
if ( concode = = HCL_CONCODE_MLIST )
{
2024-01-06 04:15:15 +00:00
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_CALLABLE , TOKEN_LOC ( hcl ) , HCL_NULL , " missing message after : " ) ;
}
else if ( concode = = HCL_CONCODE_ALIST )
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_CALLABLE , TOKEN_LOC ( hcl ) , HCL_NULL , " missing value after := " ) ;
2024-01-03 12:11:23 +00:00
}
else
{
2024-01-07 01:38:37 +00:00
hcl_synerrnum_t err ;
err = ( fv & COMMAED ) ? HCL_SYNERR_COMMANOVALUE : HCL_SYNERR_COLONNOVALUE ;
hcl_setsynerr ( hcl , err , TOKEN_LOC ( hcl ) , HCL_NULL ) ;
2024-01-03 12:11:23 +00:00
}
2021-01-15 09:12:28 +00:00
if ( head ) hcl_freecnode ( hcl , head ) ;
2021-01-09 16:53:47 +00:00
return HCL_NULL ;
}
2023-11-12 12:54:17 +00:00
* list_loc = loc ;
2021-01-12 09:06:25 +00:00
* oldflagv = fv ;
if ( ! hcl - > c - > r . st )
{
/* the stack is empty after popping.
* it is back to the top level .
* the top level can never be quoted . */
* flagv = 0 ;
}
else
{
/* restore the flag for the outer returning level */
2021-01-13 09:54:44 +00:00
* flagv = hcl - > c - > r . st - > flagv ;
2021-01-12 09:06:25 +00:00
}
2023-05-18 01:24:01 +00:00
if ( head )
2021-01-19 14:07:42 +00:00
{
HCL_ASSERT ( hcl , HCL_CNODE_IS_CONS ( head ) ) ;
2024-01-07 01:38:37 +00:00
/* HACK */
if ( concode = = HCL_CONCODE_ALIST )
{
2024-01-07 15:38:42 +00:00
/* tranform (var := val) to (set var val) */
2024-01-13 16:28:00 +00:00
hcl_cnode_t * sym , * newhead , * lval ;
2024-01-07 01:38:37 +00:00
hcl_oocs_t fake_tok , * fake_tok_ptr = HCL_NULL ;
2024-01-13 16:28:00 +00:00
lval = HCL_CNODE_CONS_CAR ( head ) ;
if ( lval & & HCL_CNODE_IS_ELIST ( lval ) )
2024-01-13 05:37:59 +00:00
{
2024-01-13 16:28:00 +00:00
/* invalid lvalue */
hcl_setsynerr ( hcl , HCL_SYNERR_LVALUE , HCL_CNODE_GET_LOC ( lval ) , HCL_CNODE_GET_TOK ( lval ) ) ;
if ( head ) hcl_freecnode ( hcl , head ) ;
return HCL_NULL ;
}
else if ( lval & & HCL_CNODE_IS_CONS ( lval ) & & HCL_CNODE_CONS_CONCODE ( lval ) = = HCL_CONCODE_ARRAY )
{
hcl_cnode_t * tmp , * rval ;
2024-01-13 05:37:59 +00:00
fake_tok . ptr = vocas [ VOCA_SYM_SET_R ] . str ;
fake_tok . len = vocas [ VOCA_SYM_SET_R ] . len ;
fake_tok_ptr = & fake_tok ;
2024-01-13 16:28:00 +00:00
/* move the array item up to the main list and join the original lval to the end of it */
rval = HCL_CNODE_CONS_CDR ( head ) ;
hcl_freesinglecnode ( hcl , head ) ;
head = lval ;
/* TODO: check in advance if array items are all symbols... */
for ( tmp = lval ; tmp & & HCL_CNODE_IS_CONS ( tmp ) ; tmp = HCL_CNODE_CONS_CDR ( tmp ) )
2024-01-13 05:37:59 +00:00
{
if ( ! HCL_CNODE_CONS_CDR ( tmp ) )
{
2024-01-13 16:28:00 +00:00
HCL_CNODE_CONS_CDR ( tmp ) = rval ;
2024-01-13 05:37:59 +00:00
break ;
}
}
}
else
{
fake_tok . ptr = vocas [ VOCA_SYM_SET ] . str ;
fake_tok . len = vocas [ VOCA_SYM_SET ] . len ;
fake_tok_ptr = & fake_tok ;
}
2024-01-07 01:38:37 +00:00
2024-01-25 14:48:06 +00:00
/* TODO: check the number of arguments in advance??? */
2024-01-07 15:38:42 +00:00
sym = hcl_makecnodesymbol ( hcl , 0 , & loc , fake_tok_ptr ) ;
2024-01-07 01:38:37 +00:00
if ( HCL_UNLIKELY ( ! sym ) )
{
2024-01-07 15:38:42 +00:00
const hcl_ooch_t * orgmsg = hcl_backuperrmsg ( hcl ) ;
2024-02-03 04:36:05 +00:00
hcl_seterrbfmt ( hcl , HCL_ERRNUM ( hcl ) , " failed to create symbol cnode for := - %js " , orgmsg ) ;
2024-01-07 01:38:37 +00:00
if ( head ) hcl_freecnode ( hcl , head ) ;
return HCL_NULL ;
}
2024-01-13 05:37:59 +00:00
2024-01-13 16:28:00 +00:00
/* create a new head joined with set or set-r */
2024-01-07 01:38:37 +00:00
newhead = hcl_makecnodecons ( hcl , 0 , & loc , fake_tok_ptr , sym , head ) ;
if ( HCL_UNLIKELY ( ! newhead ) )
{
2024-01-07 15:38:42 +00:00
const hcl_ooch_t * orgmsg = hcl_backuperrmsg ( hcl ) ;
2024-02-03 04:36:05 +00:00
hcl_seterrbfmt ( hcl , HCL_ERRNUM ( hcl ) , " failed to create cons cnode for := - %js " , orgmsg ) ;
2024-01-07 01:38:37 +00:00
hcl_freecnode ( hcl , sym ) ;
if ( head ) hcl_freecnode ( hcl , head ) ;
return HCL_NULL ;
}
2024-01-07 15:38:42 +00:00
2024-01-07 01:38:37 +00:00
head = newhead ;
2024-01-07 15:38:42 +00:00
concode = HCL_CONCODE_XLIST ; /* switch back to XLIST */
2024-01-07 01:38:37 +00:00
}
/* END HACK */
2021-01-19 14:07:42 +00:00
HCL_CNODE_CONS_CONCODE ( head ) = concode ;
2023-11-27 16:44:13 +00:00
if ( fv & AUTO_FORGED ) HCL_CNODE_GET_FLAGS ( head ) | = HCL_CNODE_AUTO_FORGED ;
2024-01-07 15:38:42 +00:00
}
else
{
/* the list is empty */
head = hcl_makecnodeelist ( hcl , ( ( fv & AUTO_FORGED ) ? HCL_CNODE_AUTO_FORGED : 0 ) , & loc , concode ) ;
if ( HCL_UNLIKELY ( ! head ) )
{
const hcl_ooch_t * orgmsg = hcl_backuperrmsg ( hcl ) ;
2024-02-03 04:36:05 +00:00
hcl_seterrbfmt ( hcl , HCL_ERRNUM ( hcl ) , " failed to create empty list - %js " , orgmsg ) ;
2024-01-07 15:38:42 +00:00
}
2021-01-19 14:07:42 +00:00
}
2024-01-07 15:38:42 +00:00
return head ;
2021-01-09 16:53:47 +00:00
}
static HCL_INLINE int can_dot_list ( hcl_t * hcl )
{
2021-01-13 09:54:44 +00:00
hcl_rstl_t * rstl ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
HCL_ASSERT ( hcl , hcl - > c - > r . st ! = HCL_NULL ) ;
rstl = hcl - > c - > r . st ;
2021-01-09 16:53:47 +00:00
/* mark the state that a dot has appeared in the list */
2021-01-13 09:54:44 +00:00
if ( rstl - > count < = 0 ) return 0 ;
if ( LIST_FLAG_GET_CONCODE ( rstl - > flagv ) ! = HCL_CONCODE_QLIST ) return 0 ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
rstl - > flagv | = DOTTED ;
2021-01-09 16:53:47 +00:00
return 1 ;
}
static HCL_INLINE int can_comma_list ( hcl_t * hcl )
{
2021-01-13 09:54:44 +00:00
hcl_rstl_t * rstl ;
2024-01-13 05:37:59 +00:00
hcl_concode_t cc ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
HCL_ASSERT ( hcl , hcl - > c - > r . st ! = HCL_NULL ) ;
rstl = hcl - > c - > r . st ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
if ( rstl - > count < = 0 ) return 0 ;
2024-01-01 16:41:41 +00:00
2021-01-13 09:54:44 +00:00
if ( rstl - > count = = 1 ) rstl - > flagv | = JSON ;
else if ( ! ( rstl - > flagv & JSON ) ) return 0 ;
2024-01-01 16:41:41 +00:00
2024-01-13 05:37:59 +00:00
if ( rstl - > flagv & ( COMMAED | COLONED | COLONEQED ) ) return 0 ;
2021-01-09 16:53:47 +00:00
2024-01-13 05:37:59 +00:00
cc = LIST_FLAG_GET_CONCODE ( rstl - > flagv ) ;
if ( cc = = HCL_CONCODE_XLIST )
{
LIST_FLAG_SET_CONCODE ( rstl - > flagv , HCL_CONCODE_ALIST ) ;
}
else if ( cc = = HCL_CONCODE_DIC )
2021-01-09 16:53:47 +00:00
{
2021-01-13 09:54:44 +00:00
if ( rstl - > count & 1 ) return 0 ;
2021-01-09 16:53:47 +00:00
}
2024-01-13 05:37:59 +00:00
else if ( cc ! = HCL_CONCODE_ARRAY & & cc ! = HCL_CONCODE_BYTEARRAY )
2021-01-09 16:53:47 +00:00
{
return 0 ;
}
2021-01-13 09:54:44 +00:00
rstl - > flagv | = COMMAED ;
2021-01-09 16:53:47 +00:00
return 1 ;
}
static HCL_INLINE int can_colon_list ( hcl_t * hcl )
{
2021-01-13 09:54:44 +00:00
hcl_rstl_t * rstl ;
2024-01-01 16:41:41 +00:00
hcl_concode_t cc ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
HCL_ASSERT ( hcl , hcl - > c - > r . st ! = HCL_NULL ) ;
rstl = hcl - > c - > r . st ;
2021-01-09 16:53:47 +00:00
2024-01-01 16:41:41 +00:00
if ( rstl - > count < = 0 ) return 0 ; /* not allowed at the list beginning */
2021-01-09 16:53:47 +00:00
2024-01-06 04:15:15 +00:00
/* mark the state that a colon has appeared in the list */
2024-01-01 16:41:41 +00:00
if ( rstl - > count = = 1 ) rstl - > flagv | = JSON ; /* mark that the first key is colon-delimited */
else if ( ! ( rstl - > flagv & JSON ) ) return 0 ; /* the first key is not colon-delimited. so not allowed to colon-delimit other keys */
/* multiple single-colons - e.g. #{ "abc": : 20 } */
2024-01-13 05:37:59 +00:00
if ( rstl - > flagv & ( COMMAED | COLONED | COLONEQED ) ) return 0 ;
2021-01-09 16:53:47 +00:00
2024-01-01 16:41:41 +00:00
cc = LIST_FLAG_GET_CONCODE ( rstl - > flagv ) ;
if ( cc = = HCL_CONCODE_XLIST )
{
if ( rstl - > count > 1 ) return 0 ;
/* ugly dual use of a colon sign. switch to MLIST if the first element
* is delimited by a colon . e . g . ( obj : new 10 20 30 ) */
LIST_FLAG_SET_CONCODE ( rstl - > flagv , HCL_CONCODE_MLIST ) ;
rstl - > flagv & = ~ JSON ;
}
else if ( cc ! = HCL_CONCODE_DIC ) return 0 ; /* no allowed if not in a dictionary */
2021-01-09 16:53:47 +00:00
2024-01-01 16:41:41 +00:00
if ( ! ( rstl - > count & 1 ) ) return 0 ; /* not allwed after the value in a dictionary */
2021-01-09 16:53:47 +00:00
2024-01-01 16:41:41 +00:00
/* mark that it's coloned. this is to be cleared when clear_comma_colon_flag() is called */
2021-01-13 09:54:44 +00:00
rstl - > flagv | = COLONED ;
2021-01-09 16:53:47 +00:00
return 1 ;
}
2024-01-06 04:15:15 +00:00
static HCL_INLINE int can_coloneq_list ( hcl_t * hcl )
{
hcl_rstl_t * rstl ;
hcl_concode_t cc ;
HCL_ASSERT ( hcl , hcl - > c - > r . st ! = HCL_NULL ) ;
rstl = hcl - > c - > r . st ;
if ( rstl - > count < = 0 | | rstl - > count > 1 ) return 0 ; /* allowed after the first item only */
/* repeated delimiters - e.g (a := := ...) (a : := ... ) */
if ( rstl - > flagv & ( COMMAED | COLONED | COLONEQED ) ) return 0 ;
cc = LIST_FLAG_GET_CONCODE ( rstl - > flagv ) ;
/* assignment only in XLIST */
if ( cc ! = HCL_CONCODE_XLIST ) return 0 ;
2024-01-07 01:38:37 +00:00
LIST_FLAG_SET_CONCODE ( rstl - > flagv , HCL_CONCODE_ALIST ) ;
2024-01-06 04:15:15 +00:00
rstl - > flagv | = COLONEQED ;
return 1 ;
}
2021-01-09 16:53:47 +00:00
static HCL_INLINE void clear_comma_colon_flag ( hcl_t * hcl )
{
2021-01-13 09:54:44 +00:00
hcl_rstl_t * rstl ;
HCL_ASSERT ( hcl , hcl - > c - > r . st ! = HCL_NULL ) ;
rstl = hcl - > c - > r . st ;
2024-01-06 04:15:15 +00:00
rstl - > flagv & = ~ ( COMMAED | COLONED | COLONEQED ) ;
2021-01-09 16:53:47 +00:00
}
2023-11-12 12:54:17 +00:00
static int chain_to_list ( hcl_t * hcl , hcl_cnode_t * obj , hcl_loc_t * loc )
2021-01-09 16:53:47 +00:00
{
2021-01-13 09:54:44 +00:00
hcl_rstl_t * rstl ;
2021-01-09 16:53:47 +00:00
int flagv ;
2021-01-13 09:54:44 +00:00
HCL_ASSERT ( hcl , hcl - > c - > r . st ! = HCL_NULL ) ;
rstl = hcl - > c - > r . st ;
flagv = rstl - > flagv ;
2021-01-09 16:53:47 +00:00
if ( flagv & CLOSED )
{
/* the list has already been closed and cannot add more items
2021-01-25 15:23:24 +00:00
* for instance , see this faulty expression # ( 1 2 . 3 4 ) .
2021-01-09 16:53:47 +00:00
* you can have only 1 item after the period . this condition
* can only be triggered by a wrong qlist where a period is
* allowed . so i can safely hard - code the error code to
2021-01-25 15:23:24 +00:00
* HCL_SYNERR_RPAREN */
hcl_setsynerr ( hcl , HCL_SYNERR_RPAREN , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
2021-01-15 09:12:28 +00:00
return - 1 ;
2021-01-09 16:53:47 +00:00
}
else if ( flagv & DOTTED )
{
2021-01-12 09:06:25 +00:00
hcl_cnode_t * tail ;
2021-01-09 16:53:47 +00:00
/* the list must not be empty to have reached the dotted state */
2021-01-13 09:54:44 +00:00
HCL_ASSERT ( hcl , rstl - > head ! = HCL_NULL ) ;
HCL_ASSERT ( hcl , rstl - > tail ! = HCL_NULL ) ;
HCL_ASSERT ( hcl , rstl - > count > 0 ) ;
2021-01-09 16:53:47 +00:00
/* chain the object via 'cdr' of the tail cell */
2021-01-13 09:54:44 +00:00
tail = rstl - > tail ;
HCL_ASSERT ( hcl , tail ! = HCL_NULL ) ;
2021-01-19 14:07:42 +00:00
HCL_ASSERT ( hcl , HCL_CNODE_IS_CONS ( tail ) ) ;
2021-01-25 15:23:24 +00:00
if ( HCL_CNODE_IS_CONS ( obj ) & & HCL_CNODE_CONS_CONCODE ( obj ) ! = HCL_CONCODE_QLIST )
{
hcl_cnode_t * shell ;
/* if the last element is another non-data list
2023-05-18 01:24:01 +00:00
* for example , # ( 1 2 . [ 3 4 5 ] )
2021-01-25 15:23:24 +00:00
* use a shell node to wrap the actual object list node head
* for the compiler .
*/
2023-11-27 16:44:13 +00:00
shell = hcl_makecnodeshell ( hcl , 0 , HCL_CNODE_GET_LOC ( obj ) , obj ) ;
2021-01-25 15:23:24 +00:00
if ( HCL_UNLIKELY ( ! shell ) ) return - 1 ;
tail - > u . cons . cdr = shell ;
}
else
{
tail - > u . cons . cdr = obj ;
}
2021-01-09 16:53:47 +00:00
/* update the flag to CLOSED so that you can have more than
* one item after the dot . */
flagv | = CLOSED ;
2021-01-13 09:54:44 +00:00
rstl - > flagv = flagv ;
2021-01-12 09:06:25 +00:00
/* TODO: check overflow on count??? */
2021-01-13 09:54:44 +00:00
rstl - > count + + ;
2021-01-09 16:53:47 +00:00
}
else
{
2021-01-12 09:06:25 +00:00
hcl_cnode_t * cons , * tail ;
2023-11-12 12:54:17 +00:00
hcl_oocs_t fake_tok , * fake_tok_ptr = HCL_NULL ;
int concode ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
if ( ( flagv & JSON ) & & rstl - > count > 0 & & ! ( flagv & ( COMMAED | COLONED ) ) )
2021-01-09 16:53:47 +00:00
{
/* there is no separator between array/dictionary elements
* for instance , [ 1 2 ] { 10 20 } */
hcl_setsynerr ( hcl , HCL_SYNERR_NOSEP , TOKEN_LOC ( hcl ) , HCL_NULL ) ;
2021-01-15 09:12:28 +00:00
return - 1 ;
2021-01-09 16:53:47 +00:00
}
2023-11-12 12:54:17 +00:00
/* `loc` may be passed in if the added `obj` is a cons cell for another list */
HCL_ASSERT ( hcl , ( loc & & ( HCL_CNODE_IS_CONS ( obj ) | | HCL_CNODE_IS_ELIST ( obj ) ) ) | | ( ! loc & & ! HCL_CNODE_IS_CONS ( obj ) ) ) ;
concode = HCL_CNODE_IS_CONS ( obj ) ? HCL_CNODE_CONS_CONCODE ( obj ) :
HCL_CNODE_IS_ELIST ( obj ) ? HCL_CNODE_ELIST_CONCODE ( obj ) : - 1 ;
if ( concode > = 0 )
{
int vid = cons_info [ concode ] . voca_id ;
fake_tok . ptr = vocas [ vid ] . str ;
fake_tok . len = vocas [ vid ] . len ;
fake_tok_ptr = & fake_tok ;
}
2023-11-27 16:44:13 +00:00
cons = hcl_makecnodecons ( hcl , 0 , ( loc ? loc : HCL_CNODE_GET_LOC ( obj ) ) , fake_tok_ptr , obj , HCL_NULL ) ;
2021-01-15 09:12:28 +00:00
if ( HCL_UNLIKELY ( ! cons ) ) return - 1 ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
if ( rstl - > count < = 0 )
2021-01-09 16:53:47 +00:00
{
/* the list head is not set yet. it is the first
* element added to the list . let both head and tail
* point to the new cons cell */
2021-01-13 09:54:44 +00:00
HCL_ASSERT ( hcl , rstl - > tail = = HCL_NULL ) ;
HCL_ASSERT ( hcl , rstl - > head = = HCL_NULL ) ;
2021-01-12 09:06:25 +00:00
2021-01-13 09:54:44 +00:00
rstl - > head = cons ;
rstl - > tail = cons ;
2021-01-09 16:53:47 +00:00
}
else
{
2023-11-12 12:54:17 +00:00
/* the new cons cell is not the first element. append it to the list */
2021-01-13 09:54:44 +00:00
tail = rstl - > tail ;
2021-01-19 14:07:42 +00:00
HCL_ASSERT ( hcl , HCL_CNODE_IS_CONS ( tail ) ) ;
2021-01-15 09:12:28 +00:00
tail - > u . cons . cdr = cons ;
2021-01-13 09:54:44 +00:00
rstl - > tail = cons ;
2021-01-09 16:53:47 +00:00
}
2021-01-12 09:06:25 +00:00
/* TODO: check overflow on count??? */
2021-01-13 09:54:44 +00:00
rstl - > count + + ;
2021-01-09 16:53:47 +00:00
}
2021-01-15 09:12:28 +00:00
return 0 ;
2021-01-09 16:53:47 +00:00
}
2021-01-30 16:13:27 +00:00
/* ------------------------------------------------------------------------ */
2021-01-17 17:45:39 +00:00
/* TODO:
hcl_cnodetoobj ( hcl_t * hcl , hcl_cnode_t * x )
{
* drop location information and compose object ? ?
2023-05-18 01:24:01 +00:00
* is it doable ? can convert a dotted symbol to a proper value ?
2021-01-17 17:45:39 +00:00
}
2023-05-18 01:24:01 +00:00
*/
2021-01-30 16:13:27 +00:00
2022-05-25 14:23:43 +00:00
/* ---------------------------------------------------------------------- */
2022-07-29 11:29:47 +00:00
static int on_fed_cnode ( hcl_t * hcl , hcl_cnode_t * obj )
{
/* the default handler for a cnode composed via feeding - just compile the object node. */
return hcl_compile ( hcl , obj , 0 ) ;
}
/* ---------------------------------------------------------------------- */
2022-05-25 14:23:43 +00:00
static void init_feed ( hcl_t * hcl )
{
2022-07-26 15:06:53 +00:00
HCL_MEMSET ( & hcl - > c - > feed , 0 , HCL_SIZEOF ( hcl - > c - > feed ) ) ;
2022-07-22 08:02:14 +00:00
hcl - > c - > feed . lx . state = HCL_FLX_START ;
2022-05-25 14:23:43 +00:00
hcl - > c - > feed . lx . loc . line = 1 ;
hcl - > c - > feed . lx . loc . colm = 1 ;
hcl - > c - > feed . lx . loc . file = HCL_NULL ;
2022-07-29 11:29:47 +00:00
hcl - > c - > feed . on_cnode = on_fed_cnode ;
2022-05-25 14:23:43 +00:00
}
2022-07-26 15:06:53 +00:00
/* ------------------------------------------------------------------------ */
2022-07-28 14:07:18 +00:00
static int feed_begin_include ( hcl_t * hcl )
2022-07-26 15:06:53 +00:00
{
2023-11-05 13:31:33 +00:00
hcl_io_cciarg_t * arg ;
2022-07-28 14:07:18 +00:00
const hcl_ooch_t * io_name ;
2022-07-26 15:06:53 +00:00
2023-05-19 03:55:08 +00:00
io_name = add_sr_name ( hcl , TOKEN_NAME ( hcl ) ) ;
2022-07-28 14:07:18 +00:00
if ( HCL_UNLIKELY ( ! io_name ) ) return - 1 ;
2022-07-26 15:06:53 +00:00
2023-11-05 13:31:33 +00:00
arg = ( hcl_io_cciarg_t * ) hcl_callocmem ( hcl , HCL_SIZEOF ( * arg ) ) ;
2024-01-06 09:11:39 +00:00
if ( HCL_UNLIKELY ( ! arg ) )
{
const hcl_ooch_t * orgmsg = hcl_backuperrmsg ( hcl ) ;
2024-02-03 04:36:05 +00:00
hcl_seterrbfmt ( hcl , HCL_ERRNUM ( hcl ) , " failed to allocate source input structure - %js " , orgmsg ) ;
2024-01-06 09:11:39 +00:00
goto oops ;
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
arg - > name = io_name ;
arg - > line = 1 ;
arg - > colm = 1 ;
/*arg->nl = '\0';*/
arg - > includer = hcl - > c - > curinp ;
2023-11-05 13:31:33 +00:00
if ( hcl - > c - > cci_rdr ( hcl , HCL_IO_OPEN , arg ) < = - 1 )
2022-07-26 15:06:53 +00:00
{
2023-05-18 01:24:01 +00:00
const hcl_ooch_t * org_errmsg = hcl_backuperrmsg ( hcl ) ;
2023-11-01 07:06:28 +00:00
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_INCLUDE , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , " unable to include %js - %js " , io_name , org_errmsg ) ;
2022-07-28 14:07:18 +00:00
goto oops ;
}
2022-07-26 15:06:53 +00:00
2023-11-05 13:31:33 +00:00
if ( arg - > includer = = & hcl - > c - > cci_arg ) /* top-level include */
2022-08-02 13:41:13 +00:00
{
2023-05-19 03:55:08 +00:00
/* TODO: remove hcl_readbasesrchar() and clean up this part.
* hcl_readbasesrchar ( ) , if called in the middle of feeds ,
2023-11-05 13:31:33 +00:00
* updates hcl - > c - > cci_arg ' s line and colm . so use a separate
2022-08-02 13:41:13 +00:00
* field to store the current feed location for now */
hcl - > c - > feed . lx . _oloc = hcl - > c - > feed . lx . loc ;
}
else
{
arg - > includer - > name = hcl - > c - > feed . lx . loc . file ;
arg - > includer - > line = hcl - > c - > feed . lx . loc . line ;
arg - > includer - > colm = hcl - > c - > feed . lx . loc . colm ;
}
hcl - > c - > feed . lx . loc . file = arg - > name ;
hcl - > c - > feed . lx . loc . line = arg - > line ;
hcl - > c - > feed . lx . loc . colm = arg - > colm ;
2022-07-28 14:07:18 +00:00
/* switch to the includee's stream */
hcl - > c - > curinp = arg ;
/* hcl->c->depth.incl++; */
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
return 0 ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
oops :
if ( arg ) hcl_freemem ( hcl , arg ) ;
return - 1 ;
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
static int feed_end_include ( hcl_t * hcl )
{
int x ;
2023-11-05 13:31:33 +00:00
hcl_io_cciarg_t * cur ;
2022-07-26 15:06:53 +00:00
2023-11-05 13:31:33 +00:00
if ( hcl - > c - > curinp = = & hcl - > c - > cci_arg ) return 0 ; /* no include */
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
/* if it is an included file, close it and
* retry to read a character from an outer file */
2022-07-26 15:06:53 +00:00
2023-11-05 13:31:33 +00:00
x = hcl - > c - > cci_rdr ( hcl , HCL_IO_CLOSE , hcl - > c - > curinp ) ;
2022-07-26 15:06:53 +00:00
2023-05-18 01:24:01 +00:00
/* if closing has failed, still destroy the sio structure
* first as normal and return the failure below . this way ,
2022-07-29 11:29:47 +00:00
* the caller doesn ' t call HCL_IO_CLOSE on hcl - > c - > curinp again . */
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
cur = hcl - > c - > curinp ;
hcl - > c - > curinp = hcl - > c - > curinp - > includer ;
2022-07-26 15:06:53 +00:00
2023-11-05 13:31:33 +00:00
if ( hcl - > c - > curinp = = & hcl - > c - > cci_arg )
2022-08-02 13:41:13 +00:00
{
hcl - > c - > feed . lx . loc = hcl - > c - > feed . lx . _oloc ;
}
else
{
hcl - > c - > feed . lx . loc . file = hcl - > c - > curinp - > name ;
hcl - > c - > feed . lx . loc . line = hcl - > c - > curinp - > line ;
hcl - > c - > feed . lx . loc . colm = hcl - > c - > curinp - > colm ;
}
2022-07-28 14:07:18 +00:00
HCL_ASSERT ( hcl , cur - > name ! = HCL_NULL ) ;
hcl_freemem ( hcl , cur ) ;
/* hcl->parse.depth.incl--; */
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
if ( x ! = 0 )
{
/* the failure mentioned above is returned here */
return - 1 ;
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
hcl - > c - > lxc = hcl - > c - > curinp - > lxc ;
return 1 ; /* ended the included file successfully */
}
2022-07-26 15:06:53 +00:00
2023-05-19 03:55:08 +00:00
static void feed_clean_up_reader_stack ( hcl_t * hcl )
{
/* clean up the reader stack for a list */
while ( hcl - > c - > r . st )
{
hcl_rstl_t * rstl ;
rstl = hcl - > c - > r . st ;
hcl - > c - > r . st = rstl - > prev ;
if ( rstl - > head ) hcl_freecnode ( hcl , rstl - > head ) ;
hcl_freemem ( hcl , rstl ) ;
}
}
2023-11-10 09:06:49 +00:00
static HCL_INLINE int is_at_block_beginning ( hcl_t * hcl )
{
hcl_rstl_t * rstl ;
rstl = hcl - > c - > r . st ;
return ! rstl | | ( LIST_FLAG_GET_CONCODE ( rstl - > flagv ) = = HCL_CONCODE_BLOCK & & ( hcl - > c - > feed . rd . flagv & AT_BEGINNING ) ) ;
}
2024-01-14 15:47:01 +00:00
static int auto_forge_xlist_if_at_block_beginning ( hcl_t * hcl , hcl_frd_t * frd )
2024-01-14 00:48:57 +00:00
{
if ( is_at_block_beginning ( hcl ) )
{
int forged_flagv ;
/* both MLIST and ALIST begin as XLIST and get converted to MLIST
* or ALIST after more tokens are processed . so handling of MLIST
* or ALIST is needed at this phase */
forged_flagv = AUTO_FORGED ;
LIST_FLAG_SET_CONCODE ( forged_flagv , HCL_CONCODE_XLIST ) ;
/* this portion is similar to the code below the start_list label */
if ( frd - > level > = HCL_TYPE_MAX ( int ) ) /* the nesting level too deep */
{
hcl_setsynerr ( hcl , HCL_SYNERR_NESTING , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
return - 1 ;
}
/* since the actual list opener doesn't exist, the location of the
* first element wil be the location of the list */
if ( enter_list ( hcl , TOKEN_LOC ( hcl ) , forged_flagv ) < = - 1 ) return - 1 ;
frd - > level + + ; /* level after the forged list has been added */
/* a new list has been created automatically. unlike normal list creation
* by an explicit symbol such as a left parenthesis , a left brace , etc ,
* the first element opens up this new list . so the AT_BEGINNING bit is
* turned off here */
frd - > flagv & = ~ AT_BEGINNING ;
}
return 0 ;
}
2024-01-19 04:25:23 +00:00
2022-07-28 14:07:18 +00:00
static int feed_process_token ( hcl_t * hcl )
{
hcl_frd_t * frd = & hcl - > c - > feed . rd ;
2023-11-12 12:54:17 +00:00
hcl_loc_t * list_loc = HCL_NULL ;
2023-11-29 08:12:21 +00:00
int rbrace_again = 0 ;
2023-11-12 12:54:17 +00:00
/* TODO: frd->obj and frd->list_loc can become local variables in this function.. */
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
/* this function composes an s-expression non-recursively
* by manipulating its own stack . */
2022-07-26 15:06:53 +00:00
2022-07-30 03:02:57 +00:00
/*hcl_logbfmt (hcl, HCL_LOG_STDERR, "TOKEN => [%.*js] type=%d LOC=%d.%d\n", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl), TOKEN_TYPE(hcl), TOKEN_LOC(hcl)->line, TOKEN_LOC(hcl)->colm);*/
2022-07-28 14:07:18 +00:00
if ( frd - > expect_include_file )
{
2022-07-30 03:02:57 +00:00
/* the #include directive is an exception to the general expression rule.
* use this exceptional code block to divert the major token processing */
2023-11-07 10:19:06 +00:00
if ( TOKEN_TYPE ( hcl ) ! = HCL_TOK_STRLIT )
2022-07-28 14:07:18 +00:00
{
hcl_setsynerr ( hcl , HCL_SYNERR_STRING , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
frd - > expect_include_file = 0 ;
2022-07-28 14:20:28 +00:00
/* indicate that the file inclusion should be performed soon.
2023-05-18 01:24:01 +00:00
* don ' t perform actual inclusion here so that the return value of
2022-07-28 14:20:28 +00:00
* feed_char ( ) advances the input pointers properly . */
2023-05-18 01:24:01 +00:00
frd - > do_include_file = 1 ;
2022-07-28 14:20:28 +00:00
2022-07-28 14:07:18 +00:00
goto ok ;
}
2023-11-07 10:19:06 +00:00
if ( frd - > expect_vlist_item & & TOKEN_TYPE ( hcl ) ! = HCL_TOK_IDENT & & TOKEN_TYPE ( hcl ) ! = HCL_TOK_VBAR )
2022-07-30 03:02:57 +00:00
{
2024-01-19 16:17:32 +00:00
if ( TOKEN_TYPE ( hcl ) = = HCL_TOK_EOL ) goto ok ; /* ignore EOL inside vlist */
2022-07-30 03:02:57 +00:00
/* vlist also has special requirement that it can only contain variable names. */
hcl_setsynerr ( hcl , HCL_SYNERR_VARNAME , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
}
2022-07-28 14:07:18 +00:00
switch ( TOKEN_TYPE ( hcl ) )
{
default :
hcl_setsynerr ( hcl , HCL_SYNERR_ILTOK , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
2023-11-07 10:19:06 +00:00
case HCL_TOK_EOF :
2022-07-28 14:07:18 +00:00
hcl_setsynerr ( hcl , HCL_SYNERR_EOF , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
2023-11-07 10:19:06 +00:00
case HCL_TOK_INCLUDE :
2022-07-28 14:07:18 +00:00
/* TODO: should i limit where #include can be specified?
* disallow it inside a list literal or an array literal ? */
frd - > expect_include_file = 1 ;
goto ok ;
2023-11-07 15:11:43 +00:00
case HCL_TOK_PRAGMA :
/* TODO: implement this */
hcl_setsynerr ( hcl , HCL_SYNERR_ILTOK , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
2023-11-07 10:19:06 +00:00
case HCL_TOK_VBAR :
2022-07-30 03:02:57 +00:00
if ( frd - > expect_vlist_item )
{
/* closer */
int oldflagv ;
frd - > expect_vlist_item = 0 ;
2023-11-12 12:54:17 +00:00
frd - > obj = leave_list ( hcl , & frd - > list_loc , & frd - > flagv , & oldflagv ) ;
2022-07-30 03:02:57 +00:00
frd - > level - - ;
2023-11-10 09:06:49 +00:00
frd - > flagv | = AT_BEGINNING ;
2023-11-12 12:54:17 +00:00
list_loc = & frd - > list_loc ;
2022-07-30 03:02:57 +00:00
break ;
}
else
{
/* opener */
2023-05-18 01:24:01 +00:00
2022-07-30 03:02:57 +00:00
/* the vlist is different from other lists in that
* it uses the same opener and the closer
* it allows only variable names .
* it prohibits nesting of other lists
*/
2022-07-30 15:21:44 +00:00
if ( hcl - > c - > r . st & & ( hcl - > c - > r . st - > flagv & DATA_LIST ) )
2022-07-30 03:02:57 +00:00
{
2022-07-30 15:21:44 +00:00
/* if the outer list is a data list */
2022-07-30 03:02:57 +00:00
hcl_setsynerr ( hcl , HCL_SYNERR_VBARBANNED , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
}
/* neither a data list nor an executable list. handle this specially using
* a dedicated frd - > expect_vlist_item variable */
2023-05-18 01:24:01 +00:00
frd - > flagv = 0 ;
2022-07-30 03:02:57 +00:00
LIST_FLAG_SET_CONCODE ( frd - > flagv , HCL_CONCODE_VLIST ) ;
frd - > expect_vlist_item = 1 ;
goto start_list ;
}
2023-11-07 10:19:06 +00:00
case HCL_TOK_LBRACK : /* [ */
2024-01-27 15:03:40 +00:00
case HCL_TOK_APAREN : /* #[ */
2024-01-14 00:48:57 +00:00
/* [] is a data list. so let's treat it like other literal
* expressions ( e . g . 1 , " abc " ) . when it ' s placed at the block beginning ,
* create the outer XLIST . */
2024-01-14 15:47:01 +00:00
if ( auto_forge_xlist_if_at_block_beginning ( hcl , frd ) < = - 1 ) goto oops ;
2022-07-30 03:02:57 +00:00
frd - > flagv = DATA_LIST ;
2022-07-28 14:07:18 +00:00
LIST_FLAG_SET_CONCODE ( frd - > flagv , HCL_CONCODE_ARRAY ) ;
goto start_list ;
2024-01-27 15:03:40 +00:00
case HCL_TOK_BAPAREN : /* #b[ */
2024-01-14 15:47:01 +00:00
if ( auto_forge_xlist_if_at_block_beginning ( hcl , frd ) < = - 1 ) goto oops ;
2022-07-30 03:02:57 +00:00
frd - > flagv = DATA_LIST ;
2022-07-28 14:07:18 +00:00
LIST_FLAG_SET_CONCODE ( frd - > flagv , HCL_CONCODE_BYTEARRAY ) ;
goto start_list ;
2024-02-02 05:57:46 +00:00
case HCL_TOK_CAPAREN : /* #c[ */
if ( auto_forge_xlist_if_at_block_beginning ( hcl , frd ) < = - 1 ) goto oops ;
frd - > flagv = DATA_LIST ;
LIST_FLAG_SET_CONCODE ( frd - > flagv , HCL_CONCODE_CHARARRAY ) ;
goto start_list ;
2023-11-07 10:47:37 +00:00
case HCL_TOK_LBRACE : /* { */
2024-01-14 00:48:57 +00:00
/* this is a block opener itself. auto xlist forge at the block beginning */
2023-11-08 10:05:24 +00:00
frd - > flagv = 0 ;
2023-11-07 10:47:37 +00:00
LIST_FLAG_SET_CONCODE ( frd - > flagv , HCL_CONCODE_BLOCK ) ;
goto start_list ;
2023-11-07 10:19:06 +00:00
case HCL_TOK_DLPAREN : /* #{ */
2024-01-14 15:47:01 +00:00
if ( auto_forge_xlist_if_at_block_beginning ( hcl , frd ) < = - 1 ) goto oops ;
2022-07-30 03:02:57 +00:00
frd - > flagv = DATA_LIST ;
2022-07-28 14:07:18 +00:00
LIST_FLAG_SET_CONCODE ( frd - > flagv , HCL_CONCODE_DIC ) ;
goto start_list ;
2023-11-07 10:19:06 +00:00
case HCL_TOK_QLPAREN : /* #( */
2024-01-14 15:47:01 +00:00
if ( auto_forge_xlist_if_at_block_beginning ( hcl , frd ) < = - 1 ) goto oops ;
2022-07-30 03:02:57 +00:00
frd - > flagv = DATA_LIST ;
2022-07-28 14:07:18 +00:00
LIST_FLAG_SET_CONCODE ( frd - > flagv , HCL_CONCODE_QLIST ) ;
goto start_list ;
2024-01-03 12:11:23 +00:00
# if defined(HCL_TOK_LPARCOLON)
2023-11-07 10:19:06 +00:00
case HCL_TOK_LPARCOLON : /* (: */
2022-07-28 14:07:18 +00:00
frd - > flagv = 0 ;
LIST_FLAG_SET_CONCODE ( frd - > flagv , HCL_CONCODE_MLIST ) ;
goto start_list ;
2024-01-03 12:11:23 +00:00
# endif
2023-05-18 01:24:01 +00:00
2023-11-07 10:19:06 +00:00
case HCL_TOK_LPAREN : /* ( */
2022-07-28 14:07:18 +00:00
frd - > flagv = 0 ;
LIST_FLAG_SET_CONCODE ( frd - > flagv , HCL_CONCODE_XLIST ) ;
start_list :
if ( frd - > level > = HCL_TYPE_MAX ( int ) )
2022-07-26 15:06:53 +00:00
{
2022-07-30 03:02:57 +00:00
/* the nesting level has become too deep */
2022-07-28 14:07:18 +00:00
hcl_setsynerr ( hcl , HCL_SYNERR_NESTING , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
/* push some data to simulate recursion into
* a list literal or an array literal */
if ( enter_list ( hcl , TOKEN_LOC ( hcl ) , frd - > flagv ) < = - 1 ) goto oops ;
frd - > level + + ;
2023-11-12 12:54:17 +00:00
frd - > flagv | = AT_BEGINNING ; /* the reader is now at the beginning of a list */
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
/* read the next token */
goto ok ;
2022-07-26 15:06:53 +00:00
2023-11-07 10:19:06 +00:00
case HCL_TOK_DOT :
2022-07-28 14:07:18 +00:00
if ( frd - > level < = 0 | | ! can_dot_list ( hcl ) )
{
/* cannot have a period:
* 1. at the top frd - > level - not inside ( )
* 2. at the beginning of a list
* 3. inside an array , byte - array , dictionary , xlist */
hcl_setsynerr ( hcl , HCL_SYNERR_DOTBANNED , TOKEN_LOC ( hcl ) , HCL_NULL ) ;
goto oops ;
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
goto ok ;
2023-11-07 10:19:06 +00:00
case HCL_TOK_COLON :
2022-07-28 14:07:18 +00:00
if ( frd - > level < = 0 | | ! can_colon_list ( hcl ) )
{
hcl_setsynerr ( hcl , HCL_SYNERR_COLONBANNED , TOKEN_LOC ( hcl ) , HCL_NULL ) ;
goto oops ;
}
goto ok ;
2024-01-06 04:15:15 +00:00
case HCL_TOK_COLONEQ :
if ( frd - > level < = 0 | | ! can_coloneq_list ( hcl ) )
{
hcl_setsynerr ( hcl , HCL_SYNERR_COLONEQBANNED , TOKEN_LOC ( hcl ) , HCL_NULL ) ;
goto oops ;
}
goto ok ;
2023-11-07 10:19:06 +00:00
case HCL_TOK_COMMA :
2022-07-28 14:07:18 +00:00
if ( frd - > level < = 0 | | ! can_comma_list ( hcl ) )
{
hcl_setsynerr ( hcl , HCL_SYNERR_COMMABANNED , TOKEN_LOC ( hcl ) , HCL_NULL ) ;
goto oops ;
}
goto ok ;
2024-01-19 04:25:23 +00:00
case HCL_TOK_EOL : /* EOL returned only under a certain condition */
2023-11-07 12:23:00 +00:00
case HCL_TOK_SEMICOLON :
2023-11-08 10:05:24 +00:00
{
int oldflagv ;
int concode ;
2023-11-10 09:06:49 +00:00
hcl_rstl_t * rstl ;
2023-11-08 10:05:24 +00:00
2023-11-29 08:12:21 +00:00
semicolon :
2024-01-20 17:23:19 +00:00
/* the parent list(rstl) must be inspected instead of the current
* feed / read status pointed to by frd . */
2023-11-10 09:06:49 +00:00
rstl = hcl - > c - > r . st ;
2024-01-20 17:23:19 +00:00
if ( ! rstl ) goto ok ; /* redundant eol/semicolon */
concode = LIST_FLAG_GET_CONCODE ( rstl - > flagv ) ;
if ( ! ( rstl - > flagv & AUTO_FORGED ) )
2023-11-08 10:05:24 +00:00
{
2024-01-20 17:23:19 +00:00
if ( TOKEN_TYPE ( hcl ) = = HCL_TOK_EOL ) goto ok ;
if ( concode = = HCL_CONCODE_BLOCK ) goto ok ;
hcl_setsynerr ( hcl , HCL_SYNERR_SEMICOLON , TOKEN_LOC ( hcl ) , HCL_NULL ) ;
2023-11-08 10:05:24 +00:00
goto oops ;
}
2024-01-20 17:23:19 +00:00
/* if auto-forged */
#if 0
/* TODO: remove this part if the assertion is confirmed true in the #else part... */
2024-01-06 04:15:15 +00:00
if ( concode ! = HCL_CONCODE_XLIST & & concode ! = HCL_CONCODE_MLIST & & concode ! = HCL_CONCODE_ALIST )
2023-11-08 10:05:24 +00:00
{
hcl_setsynerr ( hcl , HCL_SYNERR_UNBALPBB , TOKEN_LOC ( hcl ) , HCL_NULL ) ;
goto oops ;
}
2024-01-20 17:23:19 +00:00
# else
HCL_ASSERT ( hcl , concode = = HCL_CONCODE_XLIST | | concode = = HCL_CONCODE_MLIST | | concode = = HCL_CONCODE_ALIST ) ;
# endif
2023-11-08 10:05:24 +00:00
2023-11-12 12:54:17 +00:00
frd - > obj = leave_list ( hcl , & frd - > list_loc , & frd - > flagv , & oldflagv ) ;
2023-11-08 10:05:24 +00:00
frd - > level - - ;
2023-11-10 09:06:49 +00:00
frd - > flagv | = AT_BEGINNING ; /* the current one is over. move on the beginning for the next expression */
2023-11-12 12:54:17 +00:00
list_loc = & frd - > list_loc ;
2023-11-08 10:05:24 +00:00
break ;
}
2023-11-07 12:23:00 +00:00
2023-11-07 10:19:06 +00:00
case HCL_TOK_RPAREN : /* xlist (), qlist #() */
2023-11-07 12:23:00 +00:00
case HCL_TOK_RBRACK : /* bytearray #[], array [] */
case HCL_TOK_RBRACE : /* dictionary #{}, block {} */
2022-07-28 14:07:18 +00:00
{
int oldflagv ;
int concode ;
2023-11-29 06:54:37 +00:00
hcl_rstl_t * rstl ;
2022-07-28 14:07:18 +00:00
if ( frd - > level < = 0 )
{
hcl_setsynerr ( hcl , HCL_SYNERR_UNBALPBB , TOKEN_LOC ( hcl ) , HCL_NULL ) ;
goto oops ;
}
2023-11-29 06:54:37 +00:00
if ( TOKEN_TYPE ( hcl ) = = HCL_TOK_RBRACE )
{
rstl = hcl - > c - > r . st ; /* check the parent, not the current */
if ( rstl & & ( rstl - > flagv & AUTO_FORGED ) )
{
2023-11-29 08:12:21 +00:00
#if 0
2023-11-29 06:54:37 +00:00
/* the auto-forged list has not been terminated. it must be terminated closed first */
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_SEMICOLON , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , " semicolon expected " ) ;
goto oops ;
2023-11-29 08:12:21 +00:00
# else
/* if the expression inside {} is an auto-forged xlist expression and there is no semiclon provided,
* treat it as if the semiclon is placed before } . e . g . { printf " hello \n " } */
rbrace_again = 1 ;
goto semicolon ;
# endif
2023-11-29 06:54:37 +00:00
}
}
2023-11-29 08:12:21 +00:00
rbrace_ok :
2023-11-29 06:54:37 +00:00
2022-07-28 14:07:18 +00:00
concode = LIST_FLAG_GET_CONCODE ( frd - > flagv ) ;
2023-11-08 10:05:24 +00:00
if ( concode = = HCL_CONCODE_XLIST & & ( frd - > flagv & AUTO_FORGED ) )
{
/* the auto-created xlist can't be terminated with the regular closing symbol
* it must end with the semicolon */
hcl_setsynerr ( hcl , HCL_SYNERR_UNBALPBB , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
}
2023-11-12 12:54:17 +00:00
if ( cons_info [ concode ] . closer ! = TOKEN_TYPE ( hcl ) )
2022-07-28 14:07:18 +00:00
{
2023-11-12 12:54:17 +00:00
hcl_setsynerr ( hcl , cons_info [ concode ] . synerr , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
2022-07-28 14:07:18 +00:00
goto oops ;
}
2022-07-26 15:06:53 +00:00
#if 0
2022-07-28 14:07:18 +00:00
if ( ( flagv & QUOTED ) | | frd - > level < = 0 )
{
/* the right parenthesis can never appear while
* ' quoted ' is true . ' quoted ' is set to false when
* entering a normal list . ' quoted ' is set to true
* when entering a quoted list . a quoted list does
* not have an explicit right parenthesis .
* so the right parenthesis can only pair up with
* the left parenthesis for the normal list .
*
* For example , ' ( 1 2 3 ' ) 5 6 )
*
* this condition is triggerred when the first ) is
* met after the second quote .
*
* also it is illegal to have the right parenthesis
* with no opening ( left ) parenthesis , which is
* indicated by frd - > level < = 0.
*/
hcl_setsynerr ( hcl , HCL_SYNERR_LPAREN , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
}
2022-07-26 15:06:53 +00:00
# endif
2023-11-12 12:54:17 +00:00
frd - > obj = leave_list ( hcl , & frd - > list_loc , & frd - > flagv , & oldflagv ) ;
2022-07-28 14:07:18 +00:00
frd - > level - - ;
2023-11-10 09:06:49 +00:00
frd - > flagv | = AT_BEGINNING ;
2023-11-12 12:54:17 +00:00
list_loc = & frd - > list_loc ;
2022-07-28 14:07:18 +00:00
break ;
}
2022-07-26 15:06:53 +00:00
2023-11-07 10:19:06 +00:00
case HCL_TOK_NIL :
2023-11-27 16:44:13 +00:00
frd - > obj = hcl_makecnodenil ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
2023-11-08 10:05:24 +00:00
goto auto_xlist ;
2022-07-26 15:06:53 +00:00
2023-11-07 10:19:06 +00:00
case HCL_TOK_TRUE :
2023-11-27 16:44:13 +00:00
frd - > obj = hcl_makecnodetrue ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
2023-11-08 10:05:24 +00:00
goto auto_xlist ;
2022-07-26 15:06:53 +00:00
2023-11-07 10:19:06 +00:00
case HCL_TOK_FALSE :
2023-11-27 16:44:13 +00:00
frd - > obj = hcl_makecnodefalse ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
2023-11-08 10:05:24 +00:00
goto auto_xlist ;
2022-07-26 15:06:53 +00:00
2023-11-07 10:19:06 +00:00
case HCL_TOK_SELF :
2023-11-27 16:44:13 +00:00
frd - > obj = hcl_makecnodeself ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
2023-11-08 10:05:24 +00:00
goto auto_xlist ;
2022-07-26 15:06:53 +00:00
2023-11-07 10:19:06 +00:00
case HCL_TOK_SUPER :
2023-11-27 16:44:13 +00:00
frd - > obj = hcl_makecnodesuper ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
2023-11-08 10:05:24 +00:00
goto auto_xlist ;
2022-07-26 15:06:53 +00:00
2023-11-07 10:19:06 +00:00
case HCL_TOK_ELLIPSIS :
2023-11-27 16:44:13 +00:00
frd - > obj = hcl_makecnodeellipsis ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
2023-11-08 10:05:24 +00:00
goto auto_xlist ;
2022-07-26 15:06:53 +00:00
2023-11-07 10:19:06 +00:00
case HCL_TOK_TRPCOLONS :
2023-11-27 16:44:13 +00:00
frd - > obj = hcl_makecnodetrpcolons ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
2024-02-03 16:57:53 +00:00
goto auto_xlist ;
case HCL_TOK_DBLCOLONS :
frd - > obj = hcl_makecnodedblcolons ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
2023-11-08 10:05:24 +00:00
goto auto_xlist ;
2022-07-26 15:06:53 +00:00
2024-02-03 09:59:17 +00:00
case HCL_TOK_COLONGT :
frd - > obj = hcl_makecnodecolongt ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto auto_xlist ;
case HCL_TOK_COLONLT :
frd - > obj = hcl_makecnodecolonlt ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
2024-02-03 09:50:10 +00:00
goto auto_xlist ;
case HCL_TOK_COLONSTAR :
frd - > obj = hcl_makecnodecolonstar ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
2023-11-08 10:05:24 +00:00
goto auto_xlist ;
2022-07-26 15:06:53 +00:00
2023-11-07 10:19:06 +00:00
case HCL_TOK_SMPTRLIT :
2022-07-28 14:07:18 +00:00
{
hcl_oow_t i ;
hcl_oow_t v = 0 ;
HCL_ASSERT ( hcl , TOKEN_NAME_LEN ( hcl ) > = 3 ) ;
for ( i = 2 ; i < TOKEN_NAME_LEN ( hcl ) ; i + + )
2022-07-26 15:06:53 +00:00
{
2022-07-28 14:07:18 +00:00
HCL_ASSERT ( hcl , is_xdigitchar ( TOKEN_NAME_CHAR ( hcl , i ) ) ) ;
2023-12-06 02:09:15 +00:00
v = v * 16 + HCL_CHAR_TO_NUM ( TOKEN_NAME_CHAR ( hcl , i ) , 16 ) ;
2022-07-28 14:07:18 +00:00
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
if ( ! HCL_IN_SMPTR_RANGE ( v ) )
{
hcl_setsynerr ( hcl , HCL_SYNERR_SMPTRLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
}
2022-07-26 15:06:53 +00:00
2023-11-27 16:44:13 +00:00
frd - > obj = hcl_makecnodesmptrlit ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , v ) ;
2023-11-08 10:05:24 +00:00
goto auto_xlist ;
2022-07-28 14:07:18 +00:00
}
2022-07-26 15:06:53 +00:00
2023-11-07 10:19:06 +00:00
case HCL_TOK_ERRLIT :
2022-07-28 14:07:18 +00:00
{
hcl_oow_t i ;
hcl_ooi_t v = 0 ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
HCL_ASSERT ( hcl , TOKEN_NAME_LEN ( hcl ) > = 3 ) ;
for ( i = 2 ; i < TOKEN_NAME_LEN ( hcl ) ; i + + )
2022-07-26 15:06:53 +00:00
{
2022-07-28 14:07:18 +00:00
HCL_ASSERT ( hcl , is_digitchar ( TOKEN_NAME_CHAR ( hcl , i ) ) ) ;
2023-12-06 02:09:15 +00:00
v = v * 10 + HCL_CHAR_TO_NUM ( TOKEN_NAME_CHAR ( hcl , i ) , 10 ) ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
if ( v > HCL_ERROR_MAX )
2022-07-26 15:06:53 +00:00
{
2022-07-28 14:07:18 +00:00
hcl_setsynerr ( hcl , HCL_SYNERR_ERRLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
2022-07-26 15:06:53 +00:00
}
}
2023-11-27 16:44:13 +00:00
frd - > obj = hcl_makecnodeerrlit ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , v ) ;
2023-11-08 10:05:24 +00:00
goto auto_xlist ;
2022-07-28 14:07:18 +00:00
}
2022-07-26 15:06:53 +00:00
2023-11-07 10:19:06 +00:00
case HCL_TOK_CHARLIT :
2023-11-27 16:44:13 +00:00
frd - > obj = hcl_makecnodecharlit ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , TOKEN_NAME_CHAR ( hcl , 0 ) ) ;
2023-11-08 10:05:24 +00:00
goto auto_xlist ;
2022-07-26 15:06:53 +00:00
2024-01-25 14:48:06 +00:00
case HCL_TOK_BCHRLIT :
frd - > obj = hcl_makecnodebchrlit ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , ( hcl_oob_t ) TOKEN_NAME_CHAR ( hcl , 0 ) ) ;
goto auto_xlist ;
2023-11-07 10:19:06 +00:00
case HCL_TOK_NUMLIT :
2023-11-27 16:44:13 +00:00
frd - > obj = hcl_makecnodenumlit ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
2023-11-08 10:05:24 +00:00
goto auto_xlist ;
2022-07-26 15:06:53 +00:00
2023-11-07 10:19:06 +00:00
case HCL_TOK_RADNUMLIT :
2023-11-27 16:44:13 +00:00
frd - > obj = hcl_makecnoderadnumlit ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
2023-11-08 10:05:24 +00:00
goto auto_xlist ;
2022-07-26 15:06:53 +00:00
2023-11-07 10:19:06 +00:00
case HCL_TOK_FPDECLIT :
2023-11-27 16:44:13 +00:00
frd - > obj = hcl_makecnodefpdeclit ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
2023-11-08 10:05:24 +00:00
goto auto_xlist ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
/*
2023-11-07 10:19:06 +00:00
case HCL_TOK_REAL :
2023-12-06 02:09:15 +00:00
frd - > obj = hcl_makecnoderealnum ( hcl , HCL_TOK_RVAL ( hcl ) ) ;
2022-07-28 14:07:18 +00:00
break ;
*/
2022-07-26 15:06:53 +00:00
2023-11-07 10:19:06 +00:00
case HCL_TOK_STRLIT :
2023-11-27 16:44:13 +00:00
frd - > obj = hcl_makecnodestrlit ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
2023-11-08 10:05:24 +00:00
goto auto_xlist ;
2022-07-26 15:06:53 +00:00
2024-01-25 14:48:06 +00:00
case HCL_TOK_BSTRLIT :
frd - > obj = hcl_makecnodebstrlit ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto auto_xlist ;
2024-02-04 12:35:31 +00:00
case HCL_TOK_BINOP : /* TODO: handle this specially as a binary operator */
2023-11-07 10:19:06 +00:00
case HCL_TOK_IDENT :
2023-11-27 16:44:13 +00:00
frd - > obj = hcl_makecnodesymbol ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
2023-11-08 10:05:24 +00:00
goto auto_xlist ;
2022-07-26 15:06:53 +00:00
2023-11-07 10:19:06 +00:00
case HCL_TOK_IDENT_DOTTED :
2023-12-01 18:35:59 +00:00
frd - > obj = hcl_makecnodedsymbol ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , 0 ) ;
goto auto_xlist ;
case HCL_TOK_IDENT_DOTTED_CLA :
frd - > obj = hcl_makecnodedsymbol ( hcl , 0 , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , 1 ) ;
2023-11-08 10:05:24 +00:00
goto auto_xlist ;
auto_xlist :
2024-01-14 15:47:01 +00:00
if ( auto_forge_xlist_if_at_block_beginning ( hcl , frd ) < = - 1 ) goto oops ;
2022-07-28 14:07:18 +00:00
break ;
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
if ( ! frd - > obj ) goto oops ;
2022-07-26 15:06:53 +00:00
#if 0
2022-07-28 14:07:18 +00:00
/* check if the element is read for a quoted list */
while ( flagv & QUOTED )
{
int oldflagv ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
HCL_ASSERT ( hcl , frd - > level > 0 ) ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
/* if so, append the element read into the quote list */
if ( chain_to_list ( hcl , obj ) < = - 1 ) goto oops ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
/* exit out of the quoted list. the quoted list can have one element only. */
obj = leave_list ( hcl , & flagv , & oldflagv ) ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
/* one frd->level up toward the top */
frd - > level - - ;
}
2022-07-26 15:06:53 +00:00
# endif
2022-07-28 14:07:18 +00:00
/* check if we are at the top frd->level */
2023-05-18 01:24:01 +00:00
if ( frd - > level < = 0 )
2022-07-28 14:07:18 +00:00
{
2022-07-29 11:29:47 +00:00
int n ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
/* upon exit, we must be at the top level */
HCL_ASSERT ( hcl , frd - > level = = 0 ) ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
HCL_ASSERT ( hcl , hcl - > c - > r . st = = HCL_NULL ) ;
HCL_ASSERT ( hcl , frd - > obj ! = HCL_NULL ) ;
2022-07-26 15:06:53 +00:00
2022-07-29 11:29:47 +00:00
n = hcl - > c - > feed . on_cnode ( hcl , frd - > obj ) ;
hcl_freecnode ( hcl , frd - > obj ) ; /* not needed any more */
frd - > obj = HCL_NULL ;
if ( n < = - 1 ) goto oops ;
2022-07-28 14:07:18 +00:00
}
else
{
/* if not, append the element read into the current list.
* if we are not at the top frd - > level , we must be in a list */
2023-11-12 12:54:17 +00:00
if ( chain_to_list ( hcl , frd - > obj , list_loc ) < = - 1 ) goto oops ;
2022-07-26 15:06:53 +00:00
2023-05-18 01:24:01 +00:00
/* because it has been chained to the list, it belongs to the current stack top.
2022-07-28 14:07:18 +00:00
* mark that obj is not stand - alone by nullifying it . without this , if a jump
* is made to oops , the momory block pointed to by obj may get freed twice . */
2023-05-18 01:24:01 +00:00
frd - > obj = HCL_NULL ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
clear_comma_colon_flag ( hcl ) ;
2022-07-26 15:06:53 +00:00
}
2022-07-28 14:07:18 +00:00
ok :
2023-11-29 08:12:21 +00:00
if ( rbrace_again )
{
rbrace_again = 0 ;
list_loc = HCL_NULL ;
goto rbrace_ok ;
}
2022-07-26 15:06:53 +00:00
return 0 ;
oops :
2023-05-18 01:24:01 +00:00
if ( frd - > obj )
2022-07-26 15:06:53 +00:00
{
hcl_freecnode ( hcl , frd - > obj ) ;
frd - > obj = HCL_NULL ;
}
/* clean up the reader stack for a list */
2023-05-19 03:55:08 +00:00
feed_clean_up_reader_stack ( hcl ) ;
2022-07-26 15:06:53 +00:00
return - 1 ;
}
/* ------------------------------------------------------------------------ */
2022-05-25 14:23:43 +00:00
struct delim_token_t
{
const char * t_value ;
hcl_oow_t t_len ;
2023-11-04 13:58:31 +00:00
hcl_tok_type_t t_type ;
2022-05-25 14:23:43 +00:00
} ;
typedef struct delim_token_t delim_token_t ;
static delim_token_t delim_token_tab [ ] =
{
2023-05-18 01:24:01 +00:00
/* [NOTE 1]
* if you add a new token , ensure the first character is listed in is_delimchar ( )
*
2022-05-25 14:23:43 +00:00
* [ NOTE 2 ]
* for the implementation limitation in find_delim_token_char ( ) ,
* the entries in this table must be laid out in a certain way .
2023-05-18 01:24:01 +00:00
*
2022-05-25 14:23:43 +00:00
* Group the items with the same prefix together .
* List the shorter before the longer items in the same group .
* The length must not differ by greater than 1 between 2 items in the same group .
2023-05-18 01:24:01 +00:00
*
2022-07-26 00:06:29 +00:00
* [ NOTE 3 ]
2023-11-07 10:47:37 +00:00
* don ' t list # ( , # [ , # { here because of overlapping use of # for various purposes .
2022-07-26 00:06:29 +00:00
* however , # is included in is_delimchar ( ) .
2022-05-25 14:23:43 +00:00
*/
2023-11-07 10:19:06 +00:00
{ " ( " , 1 , HCL_TOK_LPAREN } ,
2024-01-03 12:11:23 +00:00
# if defined(HCL_TOK_LPARCOLON)
2023-11-07 10:19:06 +00:00
{ " (: " , 2 , HCL_TOK_LPARCOLON } ,
2024-01-03 12:11:23 +00:00
# endif
2023-11-07 10:19:06 +00:00
{ " ) " , 1 , HCL_TOK_RPAREN } ,
2022-05-25 14:23:43 +00:00
2023-11-07 10:19:06 +00:00
{ " [ " , 1 , HCL_TOK_LBRACK } ,
{ " ] " , 1 , HCL_TOK_RBRACK } ,
2023-05-18 01:24:01 +00:00
2023-11-07 10:19:06 +00:00
{ " { " , 1 , HCL_TOK_LBRACE } ,
{ " } " , 1 , HCL_TOK_RBRACE } ,
2022-05-25 14:23:43 +00:00
2023-11-07 10:19:06 +00:00
{ " | " , 1 , HCL_TOK_VBAR } ,
{ " , " , 1 , HCL_TOK_COMMA } ,
2022-05-25 14:23:43 +00:00
2023-11-07 10:19:06 +00:00
{ " . " , 1 , HCL_TOK_DOT } ,
{ " .. " , 2 , HCL_TOK_DBLDOTS } ,
2024-01-01 16:41:41 +00:00
{ " ... " , 3 , HCL_TOK_ELLIPSIS } , /* for variable arguments */
2022-05-25 14:23:43 +00:00
2024-01-01 16:41:41 +00:00
{ " : " , 1 , HCL_TOK_COLON } , /* key-value separator in dictionary */
2024-01-06 04:15:15 +00:00
{ " := " , 2 , HCL_TOK_COLONEQ } , /* assignment */
2024-02-03 09:59:17 +00:00
{ " :> " , 2 , HCL_TOK_COLONGT } ,
{ " :< " , 2 , HCL_TOK_COLONLT } ,
2024-02-03 09:50:10 +00:00
{ " :* " , 2 , HCL_TOK_COLONSTAR } , /* class instantiation method */
2023-11-07 10:19:06 +00:00
{ " :: " , 2 , HCL_TOK_DBLCOLONS } ,
2024-01-01 16:41:41 +00:00
{ " ::: " , 3 , HCL_TOK_TRPCOLONS } , /* superclass, class variables, class methods */
2023-11-07 10:47:37 +00:00
{ " ; " , 1 , HCL_TOK_SEMICOLON }
2022-05-25 14:23:43 +00:00
} ;
2022-07-22 08:02:14 +00:00
static int find_delim_token_char ( hcl_t * hcl , const hcl_ooci_t c , int row_start , int row_end , int col , hcl_flx_dt_t * dt )
2022-05-25 14:23:43 +00:00
{
int found = 0 , i ;
for ( i = row_start ; i < = row_end ; i + + )
{
2023-05-18 01:24:01 +00:00
if ( col < delim_token_tab [ i ] . t_len & & c = = delim_token_tab [ i ] . t_value [ col ] )
2022-05-25 14:23:43 +00:00
{
if ( ! found ) dt - > row_start = i ;
dt - > row_end = i ;
found = 1 ;
}
else if ( found ) break ;
}
if ( found ) dt - > col_next = col + 1 ;
return found ;
}
2023-11-04 13:58:31 +00:00
static HCL_INLINE int feed_wrap_up ( hcl_t * hcl , hcl_tok_type_t type )
2022-05-25 14:23:43 +00:00
{
2022-07-26 15:06:53 +00:00
int n ;
2022-05-25 14:23:43 +00:00
SET_TOKEN_TYPE ( hcl , type ) ;
2022-07-26 15:06:53 +00:00
n = feed_process_token ( hcl ) ;
2022-05-25 14:23:43 +00:00
2022-07-22 08:02:14 +00:00
hcl - > c - > feed . lx . state = HCL_FLX_START ;
2022-07-26 15:06:53 +00:00
return n ;
2022-05-25 14:23:43 +00:00
}
2023-11-04 13:58:31 +00:00
static int feed_wrap_up_with_char ( hcl_t * hcl , hcl_ooci_t c , hcl_tok_type_t type )
2022-05-25 14:23:43 +00:00
{
ADD_TOKEN_CHAR ( hcl , c ) ;
return feed_wrap_up ( hcl , type ) ;
}
2023-11-04 13:58:31 +00:00
static int feed_wrap_up_with_str ( hcl_t * hcl , const hcl_ooch_t * str , hcl_oow_t len , hcl_tok_type_t type )
2022-05-25 14:23:43 +00:00
{
ADD_TOKEN_STR ( hcl , str , len ) ;
return feed_wrap_up ( hcl , type ) ;
}
2022-07-22 08:02:14 +00:00
static int feed_continue ( hcl_t * hcl , hcl_flx_state_t state )
2022-05-25 14:23:43 +00:00
{
hcl - > c - > feed . lx . state = state ;
return 0 ;
}
2022-07-22 08:02:14 +00:00
static int feed_continue_with_char ( hcl_t * hcl , hcl_ooci_t c , hcl_flx_state_t state )
2022-05-25 14:23:43 +00:00
{
ADD_TOKEN_CHAR ( hcl , c ) ;
hcl - > c - > feed . lx . state = state ;
return 0 ;
}
# define FEED_WRAP_UP(hcl, type) do { if (feed_wrap_up(hcl, type) <= -1) return -1; } while(0)
# define FEED_WRAP_UP_WITH_CHAR(hcl, c, type) do { if (feed_wrap_up_with_char(hcl, c, type) <= -1) return -1; } while(0)
# define FEED_WRAP_UP_WITH_CHARS(hcl, str, len, type) do { if (feed_wrap_up_with_str(hcl, str, len, type) <= -1) return -1; } while(0)
# define FEED_CONTINUE(hcl, state) do { if (feed_continue(hcl, state) <= -1) return -1; } while(0)
# define FEED_CONTINUE_WITH_CHAR(hcl, c, state) do { if (feed_continue_with_char(hcl, c, state) <= -1) return -1; } while(0)
2022-07-26 15:06:53 +00:00
/* ------------------------------------------------------------------------ */
/* short-cuts to basic lexer data */
2022-07-22 08:02:14 +00:00
# define FLX_STATE(hcl) ((hcl)->c->feed.lx.state)
# define FLX_LOC(hcl) (&((hcl)->c->feed.lx.loc))
/* short-cuts to lexer state data */
# define FLX_DT(hcl) (&((hcl)->c->feed.lx.u.dt))
2022-07-23 06:57:01 +00:00
# define FLX_HC(hcl) (&((hcl)->c->feed.lx.u.hc))
2022-07-23 10:09:36 +00:00
# define FLX_HI(hcl) (&((hcl)->c->feed.lx.u.hi))
2024-01-27 10:10:56 +00:00
# define FLX_HB(hcl) (&((hcl)->c->feed.lx.u.hb))
2022-07-23 10:09:36 +00:00
# define FLX_HN(hcl) (&((hcl)->c->feed.lx.u.hn))
2022-07-26 00:06:29 +00:00
# define FLX_PI(hcl) (&((hcl)->c->feed.lx.u.pi))
2024-02-04 12:35:31 +00:00
# define FLX_BINOP(hcl) (&((hcl)->c->feed.lx.u.binop))
2022-07-26 00:06:29 +00:00
# define FLX_PN(hcl) (&((hcl)->c->feed.lx.u.pn))
2022-07-22 08:02:14 +00:00
# define FLX_QT(hcl) (&((hcl)->c->feed.lx.u.qt))
2022-07-26 00:06:29 +00:00
# define FLX_ST(hcl) (&((hcl)->c->feed.lx.u.st))
2024-02-02 05:57:46 +00:00
# define FLX_BCP(hcl) (&((hcl)->c->feed.lx.u.bcp))
2022-05-25 14:23:43 +00:00
2022-07-23 06:57:01 +00:00
static HCL_INLINE void init_flx_hc ( hcl_flx_hc_t * hc )
{
HCL_MEMSET ( hc , 0 , HCL_SIZEOF ( * hc ) ) ;
}
2022-07-23 10:09:36 +00:00
static HCL_INLINE void init_flx_hi ( hcl_flx_hi_t * hi )
{
HCL_MEMSET ( hi , 0 , HCL_SIZEOF ( * hi ) ) ;
}
2022-07-23 06:57:01 +00:00
2024-02-02 05:57:46 +00:00
static HCL_INLINE void init_flx_hb ( hcl_flx_hb_t * hb , hcl_ooch_t start_c )
2024-01-27 10:10:56 +00:00
{
HCL_MEMSET ( hb , 0 , HCL_SIZEOF ( * hb ) ) ;
2024-02-02 05:57:46 +00:00
hb - > start_c = start_c ;
2024-01-27 10:10:56 +00:00
}
2023-11-04 13:58:31 +00:00
static HCL_INLINE void init_flx_hn ( hcl_flx_hn_t * hn , hcl_tok_type_t tok_type , hcl_synerrnum_t synerr_code , int radix )
2022-07-23 10:09:36 +00:00
{
2022-07-26 00:06:29 +00:00
HCL_MEMSET ( hn , 0 , HCL_SIZEOF ( * hn ) ) ;
hn - > tok_type = tok_type ;
hn - > synerr_code = synerr_code ;
hn - > radix = radix ;
2022-07-23 10:09:36 +00:00
}
2022-07-23 06:57:01 +00:00
2024-01-24 09:38:33 +00:00
static HCL_INLINE void init_flx_qt ( hcl_flx_qt_t * qt , hcl_tok_type_t tok_type , hcl_synerrnum_t synerr_code , hcl_ooch_t end_char , hcl_ooch_t esc_char , hcl_oow_t min_len , hcl_oow_t max_len , int is_byte )
2022-07-23 06:57:01 +00:00
{
HCL_MEMSET ( qt , 0 , HCL_SIZEOF ( * qt ) ) ;
qt - > tok_type = tok_type ;
qt - > synerr_code = synerr_code ;
qt - > end_char = end_char ;
qt - > esc_char = esc_char ;
qt - > min_len = min_len ;
qt - > max_len = max_len ;
2024-01-24 09:38:33 +00:00
qt - > is_byte = is_byte ;
2022-07-23 06:57:01 +00:00
}
2022-07-26 00:06:29 +00:00
static HCL_INLINE void init_flx_pi ( hcl_flx_pi_t * pi )
2022-05-25 14:23:43 +00:00
{
2022-07-26 00:06:29 +00:00
HCL_MEMSET ( pi , 0 , HCL_SIZEOF ( * pi ) ) ;
}
2022-07-22 08:02:14 +00:00
2024-02-04 12:35:31 +00:00
static HCL_INLINE void init_flx_binop ( hcl_flx_binop_t * binop )
{
HCL_MEMSET ( binop , 0 , HCL_SIZEOF ( * binop ) ) ;
}
2022-07-26 00:06:29 +00:00
static HCL_INLINE void init_flx_pn ( hcl_flx_pn_t * pn )
{
HCL_MEMSET ( pn , 0 , HCL_SIZEOF ( * pn ) ) ;
}
static HCL_INLINE void init_flx_st ( hcl_flx_st_t * st , hcl_ooch_t sign_c )
{
HCL_MEMSET ( st , 0 , HCL_SIZEOF ( * st ) ) ;
st - > sign_c = sign_c ;
}
2022-05-25 14:23:43 +00:00
2024-02-02 05:57:46 +00:00
static HCL_INLINE void init_flx_bcp ( hcl_flx_bcp_t * bcp , hcl_ooch_t start_c )
2024-01-24 07:26:30 +00:00
{
2024-02-02 05:57:46 +00:00
HCL_MEMSET ( bcp , 0 , HCL_SIZEOF ( * bcp ) ) ;
bcp - > start_c = start_c ;
2024-01-24 07:26:30 +00:00
}
2022-07-26 00:06:29 +00:00
static void reset_flx_token ( hcl_t * hcl )
{
2022-05-25 14:23:43 +00:00
/* clear the token name, reset its location */
2023-11-07 10:19:06 +00:00
SET_TOKEN_TYPE ( hcl , HCL_TOK_EOF ) ; /* is it correct? */
2022-05-25 14:23:43 +00:00
CLEAR_TOKEN_NAME ( hcl ) ;
2022-07-22 08:02:14 +00:00
SET_TOKEN_LOC ( hcl , & hcl - > c - > feed . lx . loc ) ;
2022-07-26 00:06:29 +00:00
}
static int flx_start ( hcl_t * hcl , hcl_ooci_t c )
{
HCL_ASSERT ( hcl , FLX_STATE ( hcl ) = = HCL_FLX_START ) ;
2024-01-19 04:25:23 +00:00
if ( is_spacechar ( c ) )
2024-01-18 10:16:05 +00:00
{
2024-01-19 04:25:23 +00:00
if ( ( hcl - > option . trait & HCL_TRAIT_LANG_ENABLE_EOL ) & & is_linebreak ( c ) )
{
reset_flx_token ( hcl ) ;
FEED_WRAP_UP_WITH_CHAR ( hcl , c , HCL_TOK_EOL ) ;
}
2024-01-18 10:16:05 +00:00
2024-01-19 04:25:23 +00:00
goto consumed ; /* skip spaces */
}
2022-07-26 00:06:29 +00:00
reset_flx_token ( hcl ) ;
2022-05-25 14:23:43 +00:00
2023-05-18 01:24:01 +00:00
if ( find_delim_token_char ( hcl , c , 0 , HCL_COUNTOF ( delim_token_tab ) - 1 , 0 , FLX_DT ( hcl ) ) )
2022-05-25 14:23:43 +00:00
{
2022-07-22 08:02:14 +00:00
/* the character is one of the first character of a delimiter token such as (, [, :, etc */
2023-05-18 01:24:01 +00:00
if ( FLX_DT ( hcl ) - > row_start = = FLX_DT ( hcl ) - > row_end & &
2022-07-22 08:02:14 +00:00
FLX_DT ( hcl ) - > col_next = = delim_token_tab [ FLX_DT ( hcl ) - > row_start ] . t_len )
2022-05-25 14:23:43 +00:00
{
2022-07-22 08:02:14 +00:00
/* single character delimiter token */
FEED_WRAP_UP_WITH_CHAR ( hcl , c , delim_token_tab [ FLX_DT ( hcl ) - > row_start ] . t_type ) ;
2022-05-25 14:23:43 +00:00
}
else
{
2022-07-22 08:02:14 +00:00
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_DELIM_TOKEN ) ; /* consume c and move to HCL_FLX_DELIM_TOKEN state */
2022-05-25 14:23:43 +00:00
}
goto consumed ;
}
switch ( c )
{
case HCL_OOCI_EOF :
2022-07-29 11:29:47 +00:00
/* only EOF of the top-level stream is supposed to be fed in.
* the internal logic discard EOFs of included streams */
2023-11-07 10:19:06 +00:00
FEED_WRAP_UP_WITH_CHARS ( hcl , vocas [ VOCA_EOF ] . str , vocas [ VOCA_EOF ] . len , HCL_TOK_EOF ) ;
2022-07-26 00:06:29 +00:00
goto consumed ;
2022-05-25 14:23:43 +00:00
2024-01-18 14:55:50 +00:00
case ' \\ ' :
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_BACKSLASHED ) ;
goto consumed ;
2024-01-18 10:16:05 +00:00
/* this part is never hit because the semicolon sign is part of delim_tok_tab{}
TODO : remove this part once the language spec is finalized to not require this
2022-05-25 14:23:43 +00:00
case ' ; ' :
2022-07-22 08:02:14 +00:00
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_COMMENT ) ;
2022-07-26 00:06:29 +00:00
goto consumed ;
2024-01-18 10:16:05 +00:00
*/
2022-05-25 14:23:43 +00:00
case ' # ' :
2022-07-26 00:06:29 +00:00
/* no state date to initialize. just change the state */
2022-07-23 14:06:46 +00:00
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_HMARKED_TOKEN ) ;
2022-07-26 00:06:29 +00:00
goto consumed ;
2022-05-25 14:23:43 +00:00
case ' \" ' :
2024-01-24 09:38:33 +00:00
init_flx_qt ( FLX_QT ( hcl ) , HCL_TOK_STRLIT , HCL_SYNERR_STRLIT , c , ' \\ ' , 0 , HCL_TYPE_MAX ( hcl_oow_t ) , 0 ) ;
2022-07-22 08:02:14 +00:00
FEED_CONTINUE ( hcl , HCL_FLX_QUOTED_TOKEN ) ; /* discard the quote itself. move on the the QUOTED_TOKEN state */
2022-07-26 00:06:29 +00:00
goto consumed ;
2022-05-25 14:23:43 +00:00
case ' \' ' :
2024-01-24 09:38:33 +00:00
init_flx_qt ( FLX_QT ( hcl ) , HCL_TOK_CHARLIT , HCL_SYNERR_CHARLIT , c , ' \\ ' , 1 , 1 , 0 ) ;
2022-07-22 08:02:14 +00:00
FEED_CONTINUE ( hcl , HCL_FLX_QUOTED_TOKEN ) ; /* discard the quote itself. move on the the QUOTED_TOKEN state */
2022-07-26 00:06:29 +00:00
goto consumed ;
2022-05-25 14:23:43 +00:00
case ' + ' :
case ' - ' :
2022-07-26 00:06:29 +00:00
init_flx_st ( FLX_ST ( hcl ) , c ) ;
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_SIGNED_TOKEN ) ;
goto consumed ;
2022-05-25 14:23:43 +00:00
case ' 0 ' : case ' 1 ' : case ' 2 ' : case ' 3 ' : case ' 4 ' :
case ' 5 ' : case ' 6 ' : case ' 7 ' : case ' 8 ' : case ' 9 ' :
2022-07-26 00:06:29 +00:00
init_flx_pn ( FLX_PN ( hcl ) ) ;
FEED_CONTINUE ( hcl , HCL_FLX_PLAIN_NUMBER ) ;
goto not_consumed ;
2022-05-25 14:23:43 +00:00
2024-01-24 07:26:30 +00:00
case ' B ' :
case ' b ' :
2024-02-02 05:57:46 +00:00
case ' C ' :
case ' c ' :
init_flx_bcp ( FLX_BCP ( hcl ) , c ) ;
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_BC_PREFIX ) ;
2024-01-24 07:26:30 +00:00
goto consumed ;
2022-05-25 14:23:43 +00:00
default :
2024-02-04 12:35:31 +00:00
if ( is_binopchar ( c ) )
{
init_flx_binop ( FLX_BINOP ( hcl ) ) ;
FEED_CONTINUE ( hcl , HCL_FLX_BINOP ) ;
}
else
{
init_flx_pi ( FLX_PI ( hcl ) ) ;
FEED_CONTINUE ( hcl , HCL_FLX_PLAIN_IDENT ) ;
}
2022-07-26 00:06:29 +00:00
goto not_consumed ;
2022-05-25 14:23:43 +00:00
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2024-01-18 14:55:50 +00:00
static int flx_backslashed ( hcl_t * hcl , hcl_ooci_t c )
{
if ( is_linebreak ( c ) )
{
FEED_CONTINUE ( hcl , HCL_FLX_START ) ;
return 1 ; /* consumed */
}
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_BACKSLASH , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , " stray backslash " ) ;
return - 1 ;
}
2022-07-23 06:57:01 +00:00
static int flx_comment ( hcl_t * hcl , hcl_ooci_t c )
{
if ( is_linebreak ( c ) ) FEED_CONTINUE ( hcl , HCL_FLX_START ) ;
return 1 ; /* consumed */
}
2022-07-22 08:02:14 +00:00
static int flx_delim_token ( hcl_t * hcl , hcl_ooci_t c )
2022-05-25 14:23:43 +00:00
{
2023-05-18 01:24:01 +00:00
if ( find_delim_token_char ( hcl , c , FLX_DT ( hcl ) - > row_start , FLX_DT ( hcl ) - > row_end , FLX_DT ( hcl ) - > col_next , FLX_DT ( hcl ) ) )
2022-05-25 14:23:43 +00:00
{
2023-05-18 01:24:01 +00:00
if ( FLX_DT ( hcl ) - > row_start = = FLX_DT ( hcl ) - > row_end & &
2022-07-22 08:02:14 +00:00
FLX_DT ( hcl ) - > col_next = = delim_token_tab [ FLX_DT ( hcl ) - > row_start ] . t_len )
2022-05-25 14:23:43 +00:00
{
2022-07-22 08:02:14 +00:00
/* complete token and switch to the HCL_FLX_START state */
2023-05-18 01:24:01 +00:00
FEED_WRAP_UP_WITH_CHAR ( hcl , c , delim_token_tab [ FLX_DT ( hcl ) - > row_start ] . t_type ) ;
2022-05-25 14:23:43 +00:00
}
else
{
ADD_TOKEN_CHAR ( hcl , c ) ;
}
goto consumed ;
}
else
{
/* the longest match so far */
2023-05-18 01:24:01 +00:00
FEED_WRAP_UP ( hcl , delim_token_tab [ FLX_DT ( hcl ) - > row_start ] . t_type ) ;
2022-05-25 14:23:43 +00:00
goto not_consumed ;
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2022-07-23 14:06:46 +00:00
static int flx_hmarked_token ( hcl_t * hcl , hcl_ooci_t c )
2022-05-25 14:23:43 +00:00
{
/*
* # xXXXX hexadecimal
* # oOOOO octal
* # bBBBB binary
* # eDDD error
* # pHHH smptr
* # \ C character
* # \ xHHHH unicode character
* # \ UHHHH unicode character
* # \ uHHHH unicode character
* # \ backspace
* # \ linefeed
* # \ newline
* # \ nul
* # \ page
* # \ return
* # \ rubout
* # \ space
* # \ tab
* # \ vtab
* # include
* # [ ] byte array
* # ( ) qlist
2023-10-23 09:49:15 +00:00
* # { } dictionary
2022-05-25 14:23:43 +00:00
*/
switch ( c )
{
case ' # ' :
case ' ! ' :
/* ## comment start
* # ! also comment start .
* ; comment start */
2022-07-22 08:02:14 +00:00
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_COMMENT ) ;
2022-05-25 14:23:43 +00:00
goto consumed ;
2022-07-23 06:57:01 +00:00
/* --------------------------- */
case ' x ' :
2023-11-07 10:19:06 +00:00
init_flx_hn ( FLX_HN ( hcl ) , HCL_TOK_RADNUMLIT , HCL_SYNERR_NUMLIT , 16 ) ;
2022-07-23 06:57:01 +00:00
goto radixed_number ;
case ' o ' :
2023-11-07 10:19:06 +00:00
init_flx_hn ( FLX_HN ( hcl ) , HCL_TOK_RADNUMLIT , HCL_SYNERR_NUMLIT , 8 ) ;
2022-07-23 06:57:01 +00:00
goto radixed_number ;
case ' b ' :
2024-02-02 05:57:46 +00:00
case ' c ' :
2024-01-27 10:10:56 +00:00
#if 0
2023-11-07 10:19:06 +00:00
init_flx_hn ( FLX_HN ( hcl ) , HCL_TOK_RADNUMLIT , HCL_SYNERR_NUMLIT , 2 ) ;
2022-07-23 06:57:01 +00:00
goto radixed_number ;
2024-01-27 10:10:56 +00:00
# else
/* if #b is followed by [, it is a starter for a byte array */
2024-02-02 05:57:46 +00:00
init_flx_hb ( FLX_HB ( hcl ) , c ) ;
2024-01-27 10:10:56 +00:00
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_HMARKED_B ) ;
break ;
# endif
2022-07-23 06:57:01 +00:00
2024-02-02 05:57:46 +00:00
2022-07-23 06:57:01 +00:00
case ' e ' :
2023-11-07 10:19:06 +00:00
init_flx_hn ( FLX_HN ( hcl ) , HCL_TOK_ERRLIT , HCL_SYNERR_ERRLIT , 10 ) ;
2022-07-23 06:57:01 +00:00
goto radixed_number ;
case ' p ' :
2023-11-07 10:19:06 +00:00
init_flx_hn ( FLX_HN ( hcl ) , HCL_TOK_SMPTRLIT , HCL_SYNERR_SMPTRLIT , 16 ) ;
2022-07-23 06:57:01 +00:00
radixed_number :
2022-07-23 14:06:46 +00:00
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_HMARKED_NUMBER ) ;
2022-07-23 10:09:36 +00:00
goto consumed ;
2022-07-23 06:57:01 +00:00
/* --------------------------- */
case ' \\ ' :
2022-07-23 14:06:46 +00:00
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_HMARKED_CHAR ) ;
2022-07-23 06:57:01 +00:00
goto consumed ;
/* --------------------------- */
2024-01-27 15:03:40 +00:00
case ' [ ' : /* #[ */
FEED_WRAP_UP_WITH_CHAR ( hcl , c , HCL_TOK_APAREN ) ;
2022-05-25 14:23:43 +00:00
goto consumed ;
2024-01-27 15:03:40 +00:00
case ' ( ' : /* #( */
2023-11-07 10:19:06 +00:00
FEED_WRAP_UP_WITH_CHAR ( hcl , c , HCL_TOK_QLPAREN ) ;
2022-05-25 14:23:43 +00:00
goto consumed ;
2024-01-27 15:03:40 +00:00
case ' { ' : /* #{ */
2023-11-07 10:19:06 +00:00
FEED_WRAP_UP_WITH_CHAR ( hcl , c , HCL_TOK_DLPAREN ) ;
2023-10-23 09:49:15 +00:00
goto consumed ;
2022-07-23 06:57:01 +00:00
/* --------------------------- */
2022-05-25 14:23:43 +00:00
default :
2022-07-23 10:09:36 +00:00
/* the character used as case values above can never be the first character of a hash-marked identifier */
init_flx_hi ( FLX_HI ( hcl ) ) ;
2022-07-23 14:06:46 +00:00
FEED_CONTINUE ( hcl , HCL_FLX_HMARKED_IDENT ) ;
2022-07-23 10:09:36 +00:00
goto not_consumed ;
2022-05-25 14:23:43 +00:00
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2022-07-23 14:06:46 +00:00
static int flx_hmarked_char ( hcl_t * hcl , hcl_ooci_t c )
2022-07-23 06:57:01 +00:00
{
hcl_flx_hc_t * hc = FLX_HC ( hcl ) ;
if ( is_delimchar ( c ) )
{
if ( hc - > char_count = = 0 )
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_CHARLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ,
" no valid character in character literal %.*js " , TOKEN_NAME_LEN ( hcl ) , TOKEN_NAME_PTR ( hcl ) ) ;
return - 1 ;
}
if ( TOKEN_NAME_LEN ( hcl ) > = 4 )
{
int max_digit_count = 0 ;
if ( TOKEN_NAME_CHAR ( hcl , 2 ) = = ' x ' )
{
hcl_oow_t i ;
max_digit_count = 2 ;
hexcharlit :
if ( TOKEN_NAME_LEN ( hcl ) - 3 > max_digit_count )
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_CHARLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ,
" invalid hexadecimal character character literal %.*js " , TOKEN_NAME_LEN ( hcl ) , TOKEN_NAME_PTR ( hcl ) ) ;
return - 1 ;
}
c = 0 ;
for ( i = 3 ; i < TOKEN_NAME_LEN ( hcl ) ; i + + )
{
if ( ! is_xdigitchar ( TOKEN_NAME_CHAR ( hcl , i ) ) )
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_CHARLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ,
" invalid hexadecimal character character literal %.*js " , TOKEN_NAME_LEN ( hcl ) , TOKEN_NAME_PTR ( hcl ) ) ;
return - 1 ;
}
2023-12-06 02:09:15 +00:00
c = c * 16 + HCL_CHAR_TO_NUM ( TOKEN_NAME_CHAR ( hcl , i ) , 16 ) ; /* don't care if it is for 'p' */
2022-07-23 06:57:01 +00:00
}
}
# if (HCL_SIZEOF_OOCH_T >= 2)
else if ( TOKEN_NAME_CHAR ( hcl , 2 ) = = ' u ' )
{
max_digit_count = 4 ;
goto hexcharlit ;
}
# endif
# if (HCL_SIZEOF_OOCH_T >= 4)
else if ( TOKEN_NAME_CHAR ( hcl , 2 ) = = ' U ' )
{
max_digit_count = 8 ;
goto hexcharlit ;
}
# endif
2023-11-12 12:54:17 +00:00
else if ( does_token_name_match ( hcl , VOCA_CHAR_BACKSPACE ) ) c = ' \b ' ;
else if ( does_token_name_match ( hcl , VOCA_CHAR_LINEFEED ) ) c = ' \n ' ;
else if ( does_token_name_match ( hcl , VOCA_CHAR_NEWLINE ) ) c = ' \n ' ; /* TODO: convert it to host newline convention. how to handle if it's composed of 2 letters like \r\n? */
else if ( does_token_name_match ( hcl , VOCA_CHAR_NUL ) ) c = ' \0 ' ; /* null character. not the object null */
else if ( does_token_name_match ( hcl , VOCA_CHAR_PAGE ) ) c = ' \f ' ;
else if ( does_token_name_match ( hcl , VOCA_CHAR_RETURN ) ) c = ' \r ' ;
else if ( does_token_name_match ( hcl , VOCA_CHAR_RUBOUT ) ) c = ' \x7F ' ; /* DEL */
else if ( does_token_name_match ( hcl , VOCA_CHAR_SPACE ) ) c = ' ' ;
else if ( does_token_name_match ( hcl , VOCA_CHAR_TAB ) ) c = ' \t ' ;
else if ( does_token_name_match ( hcl , VOCA_CHAR_VTAB ) ) c = ' \v ' ;
2022-07-23 06:57:01 +00:00
else
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_CHARLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ,
" invalid character literal %.*js " , TOKEN_NAME_LEN ( hcl ) , TOKEN_NAME_PTR ( hcl ) ) ;
return - 1 ;
}
}
else
{
HCL_ASSERT ( hcl , TOKEN_NAME_LEN ( hcl ) = = 3 ) ;
c = TOKEN_NAME_CHAR ( hcl , 2 ) ;
}
/* reset the token name to the converted character */
CLEAR_TOKEN_NAME ( hcl ) ;
ADD_TOKEN_CHAR ( hcl , c ) ;
2023-11-07 10:19:06 +00:00
FEED_WRAP_UP ( hcl , HCL_TOK_CHARLIT ) ;
2022-07-23 06:57:01 +00:00
goto not_consumed ;
}
else
{
ADD_TOKEN_CHAR ( hcl , c ) ;
hc - > char_count + + ;
goto consumed ;
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2022-07-23 14:06:46 +00:00
static int flx_hmarked_ident ( hcl_t * hcl , hcl_ooci_t c )
2022-07-23 10:09:36 +00:00
{
hcl_flx_hi_t * hi = FLX_HI ( hcl ) ;
if ( is_delimchar ( c ) )
{
2023-11-04 13:58:31 +00:00
hcl_tok_type_t tok_type ;
2022-07-24 00:49:03 +00:00
2022-07-23 10:09:36 +00:00
if ( hi - > char_count = = 0 )
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_HASHLIT , FLX_LOC ( hcl ) , HCL_NULL ,
2023-10-23 09:49:15 +00:00
" no valid character after hash sign " ) ;
2022-07-23 10:09:36 +00:00
return - 1 ;
}
2022-07-24 00:49:03 +00:00
if ( get_directive_token_type ( hcl , & tok_type ) < = - 1 )
2022-07-23 10:09:36 +00:00
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_HASHLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ,
2022-07-23 14:06:46 +00:00
" invalid hash-marked literal %.*js " , TOKEN_NAME_LEN ( hcl ) , TOKEN_NAME_PTR ( hcl ) ) ;
2022-07-23 10:09:36 +00:00
return - 1 ;
}
2022-07-24 00:49:03 +00:00
else
{
FEED_WRAP_UP ( hcl , tok_type ) ;
goto not_consumed ;
}
2022-07-23 10:09:36 +00:00
}
else
{
ADD_TOKEN_CHAR ( hcl , c ) ;
hi - > char_count + + ;
goto consumed ;
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2024-01-27 05:26:51 +00:00
static int flx_hmarked_b ( hcl_t * hcl , hcl_ooci_t c )
{
2024-02-02 05:57:46 +00:00
hcl_flx_hb_t * hb = FLX_HB ( hcl ) ;
2024-01-27 05:26:51 +00:00
if ( c = = ' [ ' )
{
2024-01-27 10:10:56 +00:00
/* #b[ - byte array starter */
2024-02-02 05:57:46 +00:00
/* TODO: more types.. #w[ .. #u32[ ... etc */
hcl_tok_type_t tt ;
tt = ( hb - > start_c = = ' b ' | | hb - > start_c = = ' B ' ) ? HCL_TOK_BAPAREN : HCL_TOK_CAPAREN ;
FEED_WRAP_UP_WITH_CHAR ( hcl , c , tt ) ;
2024-01-27 10:10:56 +00:00
goto consumed ;
2024-01-27 05:26:51 +00:00
}
else
{
init_flx_hn ( FLX_HN ( hcl ) , HCL_TOK_RADNUMLIT , HCL_SYNERR_NUMLIT , 2 ) ;
FEED_CONTINUE ( hcl , HCL_FLX_HMARKED_NUMBER ) ;
goto not_consumed ;
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2022-07-23 14:06:46 +00:00
static int flx_hmarked_number ( hcl_t * hcl , hcl_ooci_t c )
2022-07-23 06:57:01 +00:00
{
2022-07-23 10:09:36 +00:00
hcl_flx_hn_t * rn = FLX_HN ( hcl ) ;
2022-07-23 06:57:01 +00:00
2023-12-06 02:09:15 +00:00
if ( HCL_CHAR_TO_NUM ( c , rn - > radix ) > = rn - > radix )
2022-07-23 06:57:01 +00:00
{
2022-07-24 00:49:03 +00:00
if ( is_delimchar ( c ) )
2022-07-23 06:57:01 +00:00
{
2022-07-24 00:49:03 +00:00
if ( rn - > digit_count = = 0 )
2022-07-23 06:57:01 +00:00
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_NUMLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ,
2022-07-24 00:49:03 +00:00
" no valid digit after radix specifier in %.*js " , TOKEN_NAME_LEN ( hcl ) , TOKEN_NAME_PTR ( hcl ) ) ;
2022-07-23 06:57:01 +00:00
return - 1 ;
}
2022-07-24 00:49:03 +00:00
else if ( rn - > invalid_digit_count > 0 )
{
2022-07-26 00:06:29 +00:00
/* invalid as a radixed number, but this could be a hash-marked directive */
2023-11-04 13:58:31 +00:00
hcl_tok_type_t tok_type ;
2022-07-24 00:49:03 +00:00
if ( get_directive_token_type ( hcl , & tok_type ) < = - 1 )
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_NUMLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ,
" neither valid radixed number nor valid directive %.*js " , TOKEN_NAME_LEN ( hcl ) , TOKEN_NAME_PTR ( hcl ) ) ;
return - 1 ;
}
else
{
FEED_WRAP_UP ( hcl , tok_type ) ;
goto not_consumed ;
}
}
2022-07-23 06:57:01 +00:00
FEED_WRAP_UP ( hcl , rn - > tok_type ) ;
goto not_consumed ;
}
else
{
ADD_TOKEN_CHAR ( hcl , c ) ;
rn - > digit_count + + ;
2022-07-24 00:49:03 +00:00
rn - > invalid_digit_count + + ;
2022-07-23 06:57:01 +00:00
goto consumed ;
}
}
else
{
HCL_ASSERT ( hcl , ! is_delimchar ( c ) ) ;
ADD_TOKEN_CHAR ( hcl , c ) ;
rn - > digit_count + + ;
goto consumed ;
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2022-07-26 00:06:29 +00:00
static int flx_plain_ident ( hcl_t * hcl , hcl_ooci_t c ) /* identifier */
{
hcl_flx_pi_t * pi = FLX_PI ( hcl ) ;
if ( is_delimchar ( c ) ) /* [NOTE] . is one of the delimiter character */
{
hcl_oow_t start ;
hcl_oocs_t seg ;
2023-11-04 13:58:31 +00:00
hcl_tok_type_t tok_type ;
2022-07-26 00:06:29 +00:00
if ( pi - > seg_len = = 0 )
{
2022-07-28 14:07:18 +00:00
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_MSEGIDENT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , " blank segment " ) ;
2022-07-26 00:06:29 +00:00
return - 1 ;
}
start = TOKEN_NAME_LEN ( hcl ) - pi - > seg_len ;
seg . ptr = & TOKEN_NAME_CHAR ( hcl , start ) ;
seg . len = pi - > seg_len ;
tok_type = classify_ident_token ( hcl , & seg ) ;
2023-11-07 10:19:06 +00:00
if ( tok_type ! = HCL_TOK_IDENT )
2022-07-26 00:06:29 +00:00
{
2023-12-01 12:49:28 +00:00
if ( pi - > seg_count = = 0 & & ( tok_type = = HCL_TOK_SELF | | tok_type = = HCL_TOK_SUPER ) )
{
/* allowed if it begins with self. or super. */
2023-12-01 18:35:59 +00:00
pi - > is_cla = 1 ; /* mark that it's prefixed with self or super */
2023-12-01 12:49:28 +00:00
}
else
{
pi - > non_ident_seg_count + + ;
pi - > last_non_ident_type = tok_type ;
}
2022-07-26 00:06:29 +00:00
}
pi - > seg_len = 0 ; /* the length of the segment to be worked on */
pi - > seg_count + + ; /* total number of segments completed */
if ( c = = ' . ' )
{
/* move on to the next segment */
ADD_TOKEN_CHAR ( hcl , c ) ;
pi - > char_count + + ;
goto consumed ;
}
/* finish */
if ( pi - > non_ident_seg_count > 0 )
{
if ( pi - > seg_count = = 1 )
{
FEED_WRAP_UP ( hcl , pi - > last_non_ident_type ) ;
goto not_consumed ;
}
else
{
hcl_setsynerr ( hcl , HCL_SYNERR_MSEGIDENT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
return - 1 ;
}
}
2023-12-01 12:49:28 +00:00
/* if single-segmented, perform classification(call classify_ident_token()) again
* bcause self and super as the first segment have not been marked as a non - identifier above */
2023-12-01 18:35:59 +00:00
tok_type = ( pi - > seg_count = = 1 ? classify_ident_token ( hcl , TOKEN_NAME ( hcl ) ) :
( pi - > is_cla ? HCL_TOK_IDENT_DOTTED_CLA : HCL_TOK_IDENT_DOTTED ) ) ;
2023-12-01 12:49:28 +00:00
FEED_WRAP_UP ( hcl , tok_type ) ;
2022-07-26 00:06:29 +00:00
goto not_consumed ;
}
else
{
ADD_TOKEN_CHAR ( hcl , c ) ;
pi - > char_count + + ;
pi - > seg_len + + ;
goto consumed ;
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2024-02-04 12:35:31 +00:00
static int flx_binop ( hcl_t * hcl , hcl_ooci_t c ) /* identifier */
{
hcl_flx_binop_t * binop = FLX_BINOP ( hcl ) ;
if ( is_binopchar ( c ) )
{
ADD_TOKEN_CHAR ( hcl , c ) ;
goto consumed ;
}
else
{
FEED_WRAP_UP ( hcl , HCL_TOK_BINOP ) ;
goto not_consumed ;
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2022-07-26 00:06:29 +00:00
static int flx_plain_number ( hcl_t * hcl , hcl_ooci_t c ) /* number */
{
hcl_flx_pn_t * pn = FLX_PN ( hcl ) ;
if ( is_digitchar ( c ) )
{
ADD_TOKEN_CHAR ( hcl , c ) ;
pn - > digit_count [ pn - > fpdec ] + + ;
goto consumed ;
}
else
{
if ( ! pn - > fpdec & & c = = ' . ' )
{
pn - > fpdec = 1 ;
ADD_TOKEN_CHAR ( hcl , c ) ;
goto consumed ;
}
if ( pn - > digit_count [ 0 ] = = 0 )
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_NUMLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , " invalid numeric literal with no digit before decimal point " ) ;
return - 1 ;
}
else if ( pn - > fpdec & & pn - > digit_count [ 1 ] = = 0 )
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_NUMLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , " invalid numeric literal with no digit after decimal point " ) ;
return - 1 ;
}
2023-11-07 10:19:06 +00:00
FEED_WRAP_UP ( hcl , ( pn - > fpdec ? HCL_TOK_FPDECLIT : HCL_TOK_NUMLIT ) ) ;
2022-07-26 00:06:29 +00:00
goto not_consumed ;
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2022-07-23 06:57:01 +00:00
static int flx_quoted_token ( hcl_t * hcl , hcl_ooci_t c ) /* string, character */
2022-05-25 14:23:43 +00:00
{
2022-07-22 08:02:14 +00:00
hcl_flx_qt_t * qt = FLX_QT ( hcl ) ;
2024-01-25 14:48:06 +00:00
hcl_loc_t synerr_loc = * TOKEN_LOC ( hcl ) ;
2022-07-22 08:02:14 +00:00
2024-01-24 09:38:33 +00:00
if ( c = = HCL_OOCI_EOF ) goto invalid_token ;
2022-05-25 14:23:43 +00:00
2024-01-25 14:48:06 +00:00
if ( qt - > is_byte & & c > 0xFF )
{
synerr_loc = * FLX_LOC ( hcl ) ;
goto invalid_token ;
}
2022-07-22 08:02:14 +00:00
if ( qt - > escaped = = 3 )
{
if ( c > = ' 0 ' & & c < = ' 7 ' )
{
/* more octal digits */
qt - > c_acc = qt - > c_acc * 8 + c - ' 0 ' ;
qt - > digit_count + + ;
if ( qt - > digit_count > = qt - > escaped )
{
/* should i limit the max to 0xFF/0377?
* if ( qt - > c_acc > 0377 ) qt - > c_acc = 0377 ; */
ADD_TOKEN_CHAR ( hcl , qt - > c_acc ) ;
qt - > escaped = 0 ;
}
goto consumed ;
}
else
{
ADD_TOKEN_CHAR ( hcl , qt - > c_acc ) ;
qt - > escaped = 0 ;
}
}
else if ( qt - > escaped = = 2 | | qt - > escaped = = 4 | | qt - > escaped = = 8 )
{
if ( c > = ' 0 ' & & c < = ' 9 ' )
{
qt - > c_acc = qt - > c_acc * 16 + c - ' 0 ' ;
qt - > digit_count + + ;
if ( qt - > digit_count > = qt - > escaped )
{
ADD_TOKEN_CHAR ( hcl , qt - > c_acc ) ;
qt - > escaped = 0 ;
}
goto consumed ;
}
else if ( c > = ' A ' & & c < = ' F ' )
{
qt - > c_acc = qt - > c_acc * 16 + c - ' A ' + 10 ;
qt - > digit_count + + ;
if ( qt - > digit_count > = qt - > escaped )
{
ADD_TOKEN_CHAR ( hcl , qt - > c_acc ) ;
qt - > escaped = 0 ;
}
goto consumed ;
}
else if ( c > = ' a ' & & c < = ' f ' )
{
qt - > c_acc = qt - > c_acc * 16 + c - ' a ' + 10 ;
qt - > digit_count + + ;
if ( qt - > digit_count > = qt - > escaped )
{
ADD_TOKEN_CHAR ( hcl , qt - > c_acc ) ;
qt - > escaped = 0 ;
}
goto consumed ;
}
else
{
hcl_ooch_t rc ;
rc = ( qt - > escaped = = 2 ) ? ' x ' :
2024-02-02 05:57:46 +00:00
( qt - > escaped = = 4 ) ? ' u ' : ' U ' ;
2022-07-22 08:02:14 +00:00
if ( qt - > digit_count = = 0 )
ADD_TOKEN_CHAR ( hcl , rc ) ;
else ADD_TOKEN_CHAR ( hcl , qt - > c_acc ) ;
qt - > escaped = 0 ;
}
}
2022-05-25 14:23:43 +00:00
2022-07-22 08:02:14 +00:00
if ( qt - > escaped = = 0 & & c = = qt - > end_char )
{
/* terminating quote */
2024-01-25 14:48:06 +00:00
/* qt->tok_type + qt->is_byte assumes that the token types for
* byte - string and byte - character literals are 1 greater than
* string and charcter literals . * see the definition of
* hcl_tok_type_t in hcl - prv . h */
FEED_WRAP_UP ( hcl , qt - > tok_type + qt - > is_byte ) ; /* HCL_TOK_STRLIT or HCL_TOK_CHARLIT */
2024-01-24 09:38:33 +00:00
if ( TOKEN_NAME_LEN ( hcl ) < qt - > min_len ) goto invalid_token ;
2022-07-22 08:02:14 +00:00
goto consumed ;
}
2022-05-25 14:23:43 +00:00
2022-07-22 08:02:14 +00:00
if ( qt - > escaped = = 0 & & c = = qt - > esc_char )
{
qt - > escaped = 1 ;
goto consumed ;
}
2022-05-25 14:23:43 +00:00
2022-07-22 08:02:14 +00:00
if ( qt - > escaped = = 1 )
{
if ( c = = ' a ' ) c = ' \a ' ;
else if ( c = = ' b ' ) c = ' \b ' ;
else if ( c = = ' f ' ) c = ' \f ' ;
else if ( c = = ' n ' ) c = ' \n ' ;
else if ( c = = ' r ' ) c = ' \r ' ;
else if ( c = = ' t ' ) c = ' \t ' ;
else if ( c = = ' v ' ) c = ' \v ' ;
else if ( c > = ' 0 ' & & c < = ' 7 ' & & ! qt - > regex )
{
/* i don't support the octal notation for a regular expression.
* it conflicts with the backreference notation between \ 1 and \ 7 inclusive . */
qt - > escaped = 3 ;
qt - > digit_count = 1 ;
qt - > c_acc = c - ' 0 ' ;
goto consumed ;
}
else if ( c = = ' x ' )
{
qt - > escaped = 2 ;
qt - > digit_count = 0 ;
qt - > c_acc = 0 ;
goto consumed ;
}
# if (HCL_SIZEOF_OOCH_T >= 2)
2024-01-25 14:48:06 +00:00
else if ( c = = ' u ' & & ! qt - > is_byte )
2022-07-22 08:02:14 +00:00
{
2024-01-25 14:48:06 +00:00
#if 0
if ( qt - > is_byte )
{
synerr_loc = * FLX_LOC ( hcl ) ;
goto invalid_token ;
}
# endif
2022-07-22 08:02:14 +00:00
qt - > escaped = 4 ;
qt - > digit_count = 0 ;
qt - > c_acc = 0 ;
goto consumed ;
}
# endif
# if (HCL_SIZEOF_OOCH_T >= 4)
2024-01-25 14:48:06 +00:00
else if ( c = = ' U ' & & ! qt - > is_byte )
2022-07-22 08:02:14 +00:00
{
2024-01-25 14:48:06 +00:00
#if 0
if ( qt - > is_byte )
{
synerr_loc = * FLX_LOC ( hcl ) ;
goto invalid_token ;
}
# endif
2022-07-22 08:02:14 +00:00
qt - > escaped = 8 ;
qt - > digit_count = 0 ;
qt - > c_acc = 0 ;
goto consumed ;
}
# endif
else if ( qt - > regex )
{
/* if the following character doesn't compose a proper
* escape sequence , keep the escape character .
* an unhandled escape sequence can be handled
* outside this function since the escape character
* is preserved . */
ADD_TOKEN_CHAR ( hcl , qt - > esc_char ) ;
}
qt - > escaped = 0 ;
}
ADD_TOKEN_CHAR ( hcl , c ) ;
2022-05-25 14:23:43 +00:00
2022-07-22 08:02:14 +00:00
consumed :
2024-01-24 09:38:33 +00:00
if ( TOKEN_NAME_LEN ( hcl ) > qt - > max_len ) goto invalid_token ;
2022-07-22 08:02:14 +00:00
return 1 ;
2024-01-24 09:38:33 +00:00
invalid_token :
2024-01-25 14:48:06 +00:00
hcl_setsynerr ( hcl , qt - > synerr_code , & synerr_loc , HCL_NULL ) ;
2024-01-24 09:38:33 +00:00
return - 1 ;
2022-07-22 08:02:14 +00:00
}
2022-07-26 00:06:29 +00:00
static int flx_signed_token ( hcl_t * hcl , hcl_ooci_t c )
{
hcl_flx_st_t * st = FLX_ST ( hcl ) ;
if ( st - > char_count = = 0 & & c = = ' # ' )
{
ADD_TOKEN_CHAR ( hcl , c ) ;
st - > hmarked = 1 ;
st - > char_count + + ;
goto consumed ;
}
if ( st - > hmarked )
{
HCL_ASSERT ( hcl , st - > char_count = = 1 ) ;
if ( c = = ' b ' | | c = = ' o ' | | c = = ' x ' )
{
2023-11-07 10:19:06 +00:00
init_flx_hn ( FLX_HN ( hcl ) , HCL_TOK_RADNUMLIT , HCL_SYNERR_NUMLIT , ( c = = ' b ' ? 2 : ( c = = ' o ' ? 8 : 16 ) ) ) ;
2022-07-26 00:06:29 +00:00
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_HMARKED_NUMBER ) ;
goto consumed ;
}
else
{
/* at this point, the token name buffer holds +# or -# */
HCL_ASSERT ( hcl , TOKEN_NAME_LEN ( hcl ) = = 2 ) ;
TOKEN_NAME_LEN ( hcl ) - - ; /* remove the ending # from the name buffer */
2023-11-07 10:19:06 +00:00
FEED_WRAP_UP ( hcl , HCL_TOK_IDENT ) ;
2022-07-26 00:06:29 +00:00
/* reset the token information as if it enters HMARKED_TOKEN from START */
reset_flx_token ( hcl ) ;
/* the current character is on the same line as the hash mark, the column must be greater than 1 */
2023-05-18 01:24:01 +00:00
HCL_ASSERT ( hcl , FLX_LOC ( hcl ) - > colm > 1 ) ;
2022-07-26 00:06:29 +00:00
FLX_LOC ( hcl ) - > colm - - ; /* move back one character location by decrementing the column number */
ADD_TOKEN_CHAR ( hcl , ' # ' ) ;
FEED_CONTINUE ( hcl , HCL_FLX_HMARKED_TOKEN ) ;
goto not_consumed ;
}
}
HCL_ASSERT ( hcl , st - > char_count = = 0 ) ;
if ( is_digitchar ( c ) )
{
init_flx_pn ( FLX_PN ( hcl ) ) ; /* the sign is not part of the pn->digit_count[0] so keep it at 0 here */
FEED_CONTINUE ( hcl , HCL_FLX_PLAIN_NUMBER ) ;
goto not_consumed ;
}
else
{
init_flx_pi ( FLX_PI ( hcl ) ) ;
2022-07-28 14:07:18 +00:00
2023-05-18 01:24:01 +00:00
/* the sign is already in the token name buffer.
2022-07-28 14:07:18 +00:00
* adjust the state data for the sign . */
HCL_ASSERT ( hcl , TOKEN_NAME_LEN ( hcl ) = = 1 ) ;
2023-05-18 01:24:01 +00:00
FLX_PI ( hcl ) - > char_count + + ;
2022-07-28 14:07:18 +00:00
FLX_PI ( hcl ) - > seg_len + + ;
/* let refeeding of 'c' happen at the next iteration */
2022-07-26 00:06:29 +00:00
FEED_CONTINUE ( hcl , HCL_FLX_PLAIN_IDENT ) ;
goto not_consumed ;
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2024-02-02 05:57:46 +00:00
static int flx_bc_prefix ( hcl_t * hcl , hcl_ooci_t c )
2024-01-24 07:26:30 +00:00
{
2024-02-02 05:57:46 +00:00
hcl_flx_bcp_t * bcp = FLX_BCP ( hcl ) ;
2024-01-24 07:26:30 +00:00
2024-02-02 05:57:46 +00:00
if ( c = = ' \" ' ) /* b" B" c" C" */
2024-01-24 07:26:30 +00:00
{
2024-02-02 05:57:46 +00:00
int is_byte = ( bcp - > start_c = = ' b ' | | bcp - > start_c = = ' B ' ) ;
2024-01-24 07:26:30 +00:00
reset_flx_token ( hcl ) ;
2024-01-24 09:38:33 +00:00
init_flx_qt ( FLX_QT ( hcl ) , HCL_TOK_STRLIT , HCL_SYNERR_STRLIT , c , ' \\ ' , 0 , HCL_TYPE_MAX ( hcl_oow_t ) , is_byte ) ;
2024-01-24 07:26:30 +00:00
FEED_CONTINUE ( hcl , HCL_FLX_QUOTED_TOKEN ) ; /* discard prefix, quote and move on */
goto consumed ;
}
2024-02-02 05:57:46 +00:00
else if ( c = = ' \' ' ) /* b' B' c' C' */
2024-01-24 07:26:30 +00:00
{
2024-02-02 05:57:46 +00:00
int is_byte = ( bcp - > start_c = = ' b ' | | bcp - > start_c = = ' B ' ) ;
2024-01-24 07:26:30 +00:00
reset_flx_token ( hcl ) ;
2024-01-24 09:38:33 +00:00
init_flx_qt ( FLX_QT ( hcl ) , HCL_TOK_CHARLIT , HCL_SYNERR_CHARLIT , c , ' \\ ' , 1 , 1 , is_byte ) ;
2024-01-24 07:26:30 +00:00
FEED_CONTINUE ( hcl , HCL_FLX_QUOTED_TOKEN ) ; /* dicard prefix, quote, and move on */
goto consumed ;
}
else
{
/* not followed by a quote. switch to the plain identifier */
init_flx_pi ( FLX_PI ( hcl ) ) ;
/* the prefix is already in the token buffer. just adjust state data */
FLX_PI ( hcl ) - > char_count + + ;
FLX_PI ( hcl ) - > seg_len + + ;
/* refeed c */
FEED_CONTINUE ( hcl , HCL_FLX_PLAIN_IDENT ) ;
goto not_consumed ;
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2022-07-26 15:06:53 +00:00
/* ------------------------------------------------------------------------ */
2022-07-22 08:02:14 +00:00
static int feed_char ( hcl_t * hcl , hcl_ooci_t c )
{
2022-07-28 14:07:18 +00:00
/*hcl_logbfmt (hcl, HCL_LOG_STDERR, "FEED->[%jc] %d STATE->%d\n", c, c, FLX_STATE(hcl));*/
2022-07-22 08:02:14 +00:00
switch ( FLX_STATE ( hcl ) )
{
2022-07-23 06:57:01 +00:00
case HCL_FLX_START : return flx_start ( hcl , c ) ;
2024-01-18 14:55:50 +00:00
case HCL_FLX_BACKSLASHED : return flx_backslashed ( hcl , c ) ;
2022-07-23 06:57:01 +00:00
case HCL_FLX_COMMENT : return flx_comment ( hcl , c ) ;
case HCL_FLX_DELIM_TOKEN : return flx_delim_token ( hcl , c ) ;
2022-07-23 14:06:46 +00:00
case HCL_FLX_HMARKED_TOKEN : return flx_hmarked_token ( hcl , c ) ;
2024-01-27 10:10:56 +00:00
case HCL_FLX_HMARKED_B : return flx_hmarked_b ( hcl , c ) ;
2022-07-23 14:06:46 +00:00
case HCL_FLX_HMARKED_CHAR : return flx_hmarked_char ( hcl , c ) ;
case HCL_FLX_HMARKED_IDENT : return flx_hmarked_ident ( hcl , c ) ;
2022-07-26 00:06:29 +00:00
case HCL_FLX_HMARKED_NUMBER : return flx_hmarked_number ( hcl , c ) ;
case HCL_FLX_PLAIN_IDENT : return flx_plain_ident ( hcl , c ) ;
2024-02-04 12:35:31 +00:00
case HCL_FLX_BINOP : return flx_binop ( hcl , c ) ;
2022-07-26 15:06:53 +00:00
case HCL_FLX_PLAIN_NUMBER : return flx_plain_number ( hcl , c ) ;
2022-07-23 06:57:01 +00:00
case HCL_FLX_QUOTED_TOKEN : return flx_quoted_token ( hcl , c ) ;
2022-07-26 00:06:29 +00:00
case HCL_FLX_SIGNED_TOKEN : return flx_signed_token ( hcl , c ) ;
2024-02-02 05:57:46 +00:00
case HCL_FLX_BC_PREFIX : return flx_bc_prefix ( hcl , c ) ;
2022-05-25 14:23:43 +00:00
default :
2022-07-26 15:06:53 +00:00
/* unknown state */
2022-05-25 14:23:43 +00:00
break ;
}
2022-07-26 15:06:53 +00:00
HCL_ASSERT ( hcl , ! " internal error - this must never happen " ) ;
2022-08-02 13:41:13 +00:00
hcl_seterrbfmt ( hcl , HCL_EINTERN , " internal error - unknown flx state - %d " , FLX_STATE ( hcl ) ) ;
2022-07-26 15:06:53 +00:00
return - 1 ;
2022-05-25 14:23:43 +00:00
}
2022-08-02 13:41:13 +00:00
static void feed_update_lx_loc ( hcl_t * hcl , hcl_ooci_t ch )
{
if ( is_linebreak ( ch ) )
{
hcl - > c - > feed . lx . loc . line + + ;
hcl - > c - > feed . lx . loc . colm = 1 ;
}
else
{
hcl - > c - > feed . lx . loc . colm + + ;
}
}
2022-07-28 14:07:18 +00:00
2022-08-22 04:30:35 +00:00
static int feed_from_includee ( hcl_t * hcl )
2022-07-28 14:07:18 +00:00
{
int x ;
2023-11-05 13:31:33 +00:00
HCL_ASSERT ( hcl , hcl - > c - > curinp ! = HCL_NULL & & hcl - > c - > curinp ! = & hcl - > c - > cci_arg ) ;
2022-07-28 14:07:18 +00:00
do
{
if ( hcl - > c - > curinp - > b . pos > = hcl - > c - > curinp - > b . len )
{
2023-11-05 13:31:33 +00:00
if ( hcl - > c - > cci_rdr ( hcl , HCL_IO_READ , hcl - > c - > curinp ) < = - 1 )
2022-07-28 14:07:18 +00:00
{
return - 1 ;
}
if ( hcl - > c - > curinp - > xlen < = 0 )
{
2022-07-29 11:29:47 +00:00
/* got EOF from an included stream */
2022-07-28 14:07:18 +00:00
feed_end_include ( hcl ) ;
continue ;
}
hcl - > c - > curinp - > b . pos = 0 ;
hcl - > c - > curinp - > b . len = hcl - > c - > curinp - > xlen ;
}
2022-07-29 11:29:47 +00:00
x = feed_char ( hcl , hcl - > c - > curinp - > buf [ hcl - > c - > curinp - > b . pos ] ) ;
2022-07-28 14:07:18 +00:00
if ( x < = - 1 ) return - 1 ;
2023-05-18 01:24:01 +00:00
if ( x > = 1 )
2022-08-02 13:41:13 +00:00
{
/* consumed */
feed_update_lx_loc ( hcl , hcl - > c - > curinp - > buf [ hcl - > c - > curinp - > b . pos ] ) ;
hcl - > c - > curinp - > b . pos + = x ;
}
2022-07-28 14:20:28 +00:00
if ( hcl - > c - > feed . rd . do_include_file )
{
2023-05-18 01:24:01 +00:00
/* feed_process_token(), called for the "filename" token for the #include
2022-07-29 11:29:47 +00:00
* directive , sets hcl - > c - > feed . rd . do_include_file to 1 instead of attepmting
* to include the file . the file inclusion is attempted here after the return
* value of feed_char ( ) is used to advance the hcl - > c - > curinp - > b . pos pointer . */
hcl - > c - > feed . rd . do_include_file = 0 ; /* clear this regardless of inclusion result */
2022-07-28 14:20:28 +00:00
if ( feed_begin_include ( hcl ) < = - 1 ) return - 1 ;
}
2022-07-28 14:07:18 +00:00
}
2023-11-05 13:31:33 +00:00
while ( hcl - > c - > curinp ! = & hcl - > c - > cci_arg ) ;
2022-07-28 14:07:18 +00:00
return 0 ;
}
2023-05-18 01:24:01 +00:00
int hcl_beginfeed ( hcl_t * hcl , hcl_on_cnode_t on_cnode )
2022-07-29 11:29:47 +00:00
{
2024-02-02 05:57:46 +00:00
/* if the fed data contains @include, you must call hcl_attachccio() first */
if ( ! hcl - > c & & init_compiler ( hcl ) < = - 1 ) return - 1 ;
2023-05-18 01:24:01 +00:00
2022-07-29 14:41:00 +00:00
init_feed ( hcl ) ;
if ( on_cnode ) hcl - > c - > feed . on_cnode = on_cnode ;
2023-05-18 01:24:01 +00:00
/* if you pass HCL_NULL for on_cnode, hcl->c->feed.on_cnode resets
2022-07-29 14:41:00 +00:00
* back to the default handler in init_feed ( ) */
2023-05-18 01:24:01 +00:00
return 0 ;
}
int hcl_endfeed ( hcl_t * hcl )
{
return hcl_feed ( hcl , HCL_NULL , 0 ) ;
2022-07-29 11:29:47 +00:00
}
2022-05-25 14:23:43 +00:00
int hcl_feed ( hcl_t * hcl , const hcl_ooch_t * data , hcl_oow_t len )
{
/* TODO: need to return the number of processed characters?
* need to stop after the first complete expression ? */
hcl_oow_t i ;
int x ;
2023-05-18 01:24:01 +00:00
HCL_ASSERT ( hcl , hcl - > c ! = HCL_NULL ) ;
2023-11-03 09:04:46 +00:00
# if defined(HCL_OOCH_IS_UCH)
if ( hcl - > c - > feed . rsd . len > 0 & & ! hcl - > c - > feed . rsd . no_check )
{
hcl_seterrbfmt ( hcl , HCL_EPERM , " feed disallowed for incomplete sequence pending more feeding " ) ;
return - 1 ;
}
# endif
2023-05-18 01:24:01 +00:00
if ( data )
2022-05-25 14:23:43 +00:00
{
2023-05-18 01:24:01 +00:00
for ( i = 0 ; i < len ; )
2022-05-25 14:23:43 +00:00
{
x = feed_char ( hcl , data [ i ] ) ;
2023-05-19 03:55:08 +00:00
if ( x < = - 1 ) goto oops ; /* TODO: return the number of processed characters via an argument? */
2023-05-18 01:24:01 +00:00
2022-05-25 14:23:43 +00:00
if ( x > 0 )
{
2022-07-18 10:39:10 +00:00
/* consumed */
2022-08-02 13:41:13 +00:00
feed_update_lx_loc ( hcl , data [ i ] ) ;
2022-07-28 14:07:18 +00:00
i + = x ; /* x is supposed to be 1. otherwise, some characters may get skipped. */
}
2022-07-28 14:20:28 +00:00
if ( hcl - > c - > feed . rd . do_include_file )
{
2023-05-19 03:55:08 +00:00
if ( feed_begin_include ( hcl ) < = - 1 ) goto oops ;
2022-07-28 14:20:28 +00:00
hcl - > c - > feed . rd . do_include_file = 0 ;
}
2023-11-05 13:31:33 +00:00
if ( hcl - > c - > curinp & & hcl - > c - > curinp ! = & hcl - > c - > cci_arg & & feed_from_includee ( hcl ) < = - 1 )
2022-07-28 14:07:18 +00:00
{
/* TODO: return the number of processed characters via an argument? */
2023-05-19 03:55:08 +00:00
goto oops ;
2022-05-25 14:23:43 +00:00
}
2022-07-28 14:07:18 +00:00
/* feed data[i] again if not consumed */
2022-05-25 14:23:43 +00:00
}
}
else
{
2022-07-29 11:29:47 +00:00
for ( i = 0 ; i < 1 ; ) /* weird loop in case feed_char() returns 0 */
2022-05-25 14:23:43 +00:00
{
x = feed_char ( hcl , HCL_OOCI_EOF ) ;
2023-05-18 01:24:01 +00:00
if ( x < = - 1 )
2022-07-29 11:29:47 +00:00
{
2024-02-03 04:36:05 +00:00
if ( hcl - > c - > feed . rd . level < = 0 & & HCL_ERRNUM ( hcl ) = = HCL_ESYNERR & & hcl_getsynerrnum ( hcl ) = = HCL_SYNERR_EOF )
2022-07-29 11:29:47 +00:00
{
2023-05-18 01:24:01 +00:00
/* convert this EOF error to success as the caller knows EOF in the feed mode.
* the caller can safely stop feeding after gettting success from hcl_feed ( hcl , HCL_NULL , 0 ) ;
2022-07-29 11:29:47 +00:00
* in the feed mode , this function doesn ' t set HCL_EFINIS . */
x = 1 ;
}
else
{
2023-05-19 03:55:08 +00:00
goto oops ;
2022-07-29 11:29:47 +00:00
}
}
2022-05-25 14:23:43 +00:00
i + = x ;
}
}
return 0 ;
2023-05-19 03:55:08 +00:00
oops :
2023-05-19 14:25:50 +00:00
/* if enter_list() is in feed_process_token(), the stack grows.
* leave_list ( ) pops an element off the stack . the stack can be
* not empty if an error occurs outside feed_process_token ( ) after
* leave_list ( ) in it . for example ,
*
* ( # aaa
* ^ ^
* leave_list ( ) error in flx_hmarked_ident ( ) before a full cnode is processed
*/
2023-05-19 03:55:08 +00:00
feed_clean_up_reader_stack ( hcl ) ;
return - 1 ;
2022-05-25 14:23:43 +00:00
}
2023-11-03 09:04:46 +00:00
int hcl_feedbchars ( hcl_t * hcl , const hcl_bch_t * data , hcl_oow_t len )
{
# if defined(HCL_OOCH_IS_UCH)
hcl_uch_t outbuf [ 128 ] ;
hcl_oow_t inlen , outlen , inpos , brwlen ;
int n ;
HCL_ASSERT ( hcl , hcl - > c ! = HCL_NULL ) ;
inpos = 0 ;
if ( hcl - > c - > feed . rsd . len > 0 )
{
hcl_oow_t rsdlen ;
/* handle the residue bytes from the previous feeding */
rsdlen = hcl - > c - > feed . rsd . len ; /* original residue length*/
brwlen = HCL_COUNTOF ( hcl - > c - > feed . rsd . buf ) - rsdlen ;
if ( len < brwlen ) brwlen = len ;
HCL_MEMCPY ( & hcl - > c - > feed . rsd . buf [ rsdlen ] , data , brwlen ) ;
hcl - > c - > feed . rsd . len + = brwlen ;
inlen = hcl - > c - > feed . rsd . len ;
outlen = 1 ; /* ensure that it can only convert 1 character */
n = hcl_conv_bchars_to_uchars_with_cmgr ( hcl - > c - > feed . rsd . buf , & inlen , outbuf , & outlen , hcl_getcmgr ( hcl ) , 0 ) ;
if ( outlen > 0 )
{
int x ;
hcl - > c - > feed . rsd . no_check = 1 ;
x = hcl_feed ( hcl , outbuf , outlen ) ;
hcl - > c - > feed . rsd . no_check = 0 ;
if ( x < = - 1 ) return - 1 ;
}
if ( n < = - 1 )
{
if ( n = = - 3 | | ( n = = - 2 & & outlen > 0 ) )
{
/* n == -3. invalid sequence. more feeding is required */
/* n == -2. there were extra bytes for the second character in the input */
HCL_ASSERT ( hcl , ( n = = - 3 & & inlen = = 0 & & outlen = = 0 ) | | ( n = = - 2 & & inlen > 0 ) ) ;
/* nothing to do. carry on */
}
else
{
hcl_seterrnum ( hcl , ( n = = - 2 ) ? HCL_EBUFFULL : HCL_EECERR ) ;
return - 1 ;
}
}
/*
* | rsdlen | brwlen |
* | inlen |
*/
if ( inlen < rsdlen )
{
HCL_ASSERT ( hcl , inlen = = 0 ) ;
HCL_ASSERT ( hcl , brwlen = = len ) ;
/* brwlen needs no change */
/* hcl->c->feed.rsd.len nees no change */
}
else
{
HCL_ASSERT ( hcl , inlen > rsdlen ) ;
brwlen = inlen - rsdlen ; /* actual bytes borrowed and converted */
hcl - > c - > feed . rsd . len = 0 ;
}
inpos + = brwlen ;
len - = brwlen ;
}
while ( len > 0 )
{
inlen = len ;
outlen = HCL_COUNTOF ( outbuf ) ;
/* hcl_convbtouchars() does not differentiate between illegal charcter and incomplete sequence.
* use a lower - level function that hcl_convbtouchars ( ) uses */
n = hcl_conv_bchars_to_uchars_with_cmgr ( & data [ inpos ] , & inlen , outbuf , & outlen , hcl_getcmgr ( hcl ) , 0 ) ;
if ( outlen > 0 & & hcl_feed ( hcl , outbuf , outlen ) < = - 1 ) return - 1 ;
if ( n < = - 1 )
{
2023-11-09 17:59:41 +00:00
if ( n = = - 2 & & outlen > 0 ) goto ok ;
2023-11-03 09:04:46 +00:00
if ( n = = - 2 | | n = = - 3 )
{
hcl_oow_t rsdlen ;
HCL_ASSERT ( hcl , len > inlen ) ;
rsdlen = len - inlen ;
HCL_ASSERT ( hcl , rsdlen < = HCL_COUNTOF ( hcl - > c - > feed . rsd . buf ) ) ;
HCL_MEMCPY ( hcl - > c - > feed . rsd . buf , & data [ inpos + inlen ] , rsdlen ) ;
hcl - > c - > feed . rsd . len = len - inlen ;
break ;
}
hcl_seterrnum ( hcl , HCL_EECERR ) ;
return - 1 ;
}
2023-11-09 17:59:41 +00:00
ok :
2023-11-03 09:04:46 +00:00
inpos + = inlen ;
len - = inlen ;
}
return 0 ;
# else
return hcl_feed ( hcl , data , len ) ;
# endif
}
int hcl_feeduchars ( hcl_t * hcl , const hcl_uch_t * data , hcl_oow_t len )
{
# if defined(HCL_OOCH_IS_UCH)
return hcl_feed ( hcl , data , len ) ;
# else
hcl_bch_t outbuf [ HCL_BCSIZE_MAX * 128 ] ;
hcl_oow_t inlen , outlen , inpos ;
2023-12-07 14:01:17 +00:00
int n ;
2023-11-03 09:04:46 +00:00
inpos = 0 ;
while ( len > 0 )
{
inlen = len ;
outlen = HCL_COUNTOF ( outbuf ) ;
n = hcl_convutobchars ( hcl , & data [ inpos ] , & inlen , outbuf , & outlen ) ;
if ( outlen > 0 & & hcl_feed ( hcl , outbuf , outlen ) < = - 1 ) return - 1 ;
inpos + = inlen ;
len - = inlen ;
2023-12-07 14:01:17 +00:00
if ( n < = - 1 ) return - 1 ;
2023-11-03 09:04:46 +00:00
}
return 0 ;
# endif
}
2022-05-25 14:23:43 +00:00
/*
hcl_setopt ( ON_EXPRESSION CALLBACK ? ? ? ) ;
hcl_feed ( hcl , " (hello) (10) " , 12 ) ;
> on_token
> on_expression
> on_eof
default callback for on_expression ?
compile
execute ? ? / if in the interactive mode ? ( say it ' s used as a network protocol . execute each expression when received . . . . )
default callback for on_eof ?
execute or terminate ?
*/
2022-07-22 08:02:14 +00:00
/* ------------------------------------------------------------------------ */
2023-11-04 14:23:20 +00:00
/* TODO: rename compiler to something else that can include reader, udo_wrtr, and compiler
2022-07-22 08:02:14 +00:00
* move compiler intialization / finalization here to more common place */
2023-05-18 01:24:01 +00:00
static void gc_compiler_cb ( hcl_t * hcl )
2022-07-22 08:02:14 +00:00
{
2023-05-18 01:24:01 +00:00
if ( hcl - > c )
{
hcl - > c - > r . s = hcl_moveoop ( hcl , hcl - > c - > r . s ) ;
hcl - > c - > r . e = hcl_moveoop ( hcl , hcl - > c - > r . e ) ;
}
2022-07-22 08:02:14 +00:00
}
2023-05-18 01:24:01 +00:00
static void fini_compiler_cb ( hcl_t * hcl )
2022-07-22 08:02:14 +00:00
{
/* called before the hcl object is closed */
if ( hcl - > c )
{
if ( hcl - > c - > cfs . ptr )
{
hcl_freemem ( hcl , hcl - > c - > cfs . ptr ) ;
hcl - > c - > cfs . ptr = HCL_NULL ;
hcl - > c - > cfs . top = - 1 ;
hcl - > c - > cfs . capa = 0 ;
}
if ( hcl - > c - > tv . s . ptr )
{
hcl_freemem ( hcl , hcl - > c - > tv . s . ptr ) ;
hcl - > c - > tv . s . ptr = HCL_NULL ;
hcl - > c - > tv . s . len = 0 ;
hcl - > c - > tv . capa = 0 ;
hcl - > c - > tv . wcount = 0 ;
}
HCL_ASSERT ( hcl , hcl - > c - > tv . capa = = 0 ) ;
HCL_ASSERT ( hcl , hcl - > c - > tv . wcount = = 0 ) ;
if ( hcl - > c - > cblk . info )
{
hcl_freemem ( hcl , hcl - > c - > cblk . info ) ;
hcl - > c - > cblk . info = HCL_NULL ;
hcl - > c - > cblk . info_capa = 0 ;
hcl - > c - > cblk . depth = - 1 ;
}
if ( hcl - > c - > clsblk . info )
{
hcl_freemem ( hcl , hcl - > c - > clsblk . info ) ;
hcl - > c - > clsblk . info = HCL_NULL ;
hcl - > c - > clsblk . info_capa = 0 ;
hcl - > c - > clsblk . depth = - 1 ;
}
if ( hcl - > c - > fnblk . info )
{
hcl_freemem ( hcl , hcl - > c - > fnblk . info ) ;
hcl - > c - > fnblk . info = HCL_NULL ;
hcl - > c - > fnblk . info_capa = 0 ;
hcl - > c - > fnblk . depth = - 1 ;
}
2023-05-19 03:55:08 +00:00
clear_sr_names ( hcl ) ;
2022-07-22 08:02:14 +00:00
if ( hcl - > c - > tok . name . ptr ) hcl_freemem ( hcl , hcl - > c - > tok . name . ptr ) ;
2023-11-05 13:31:33 +00:00
hcl_detachccio ( hcl ) ;
2022-07-22 08:02:14 +00:00
hcl_freemem ( hcl , hcl - > c ) ;
hcl - > c = HCL_NULL ;
}
}
2023-05-18 01:24:01 +00:00
static void fini_compiler ( hcl_t * hcl )
2022-07-22 08:02:14 +00:00
{
2023-05-18 01:24:01 +00:00
/* unlike fini_compiler_cb(), this is to be used in some error handling
* between init_compiler success and subquent operation failure */
if ( hcl - > c )
{
hcl_deregcb ( hcl , hcl - > c - > cbp ) ;
fini_compiler_cb ( hcl ) ;
}
}
2022-07-22 08:02:14 +00:00
2023-05-18 01:24:01 +00:00
static int init_compiler ( hcl_t * hcl )
{
hcl_cb_t cb , * cbp = HCL_NULL ;
HCL_ASSERT ( hcl , hcl - > c = = HCL_NULL ) ;
HCL_MEMSET ( & cb , 0 , HCL_SIZEOF ( cb ) ) ;
cb . gc = gc_compiler_cb ;
cb . fini = fini_compiler_cb ;
cbp = hcl_regcb ( hcl , & cb ) ;
if ( HCL_UNLIKELY ( ! cbp ) ) return - 1 ;
hcl - > c = ( hcl_compiler_t * ) hcl_callocmem ( hcl , HCL_SIZEOF ( * hcl - > c ) ) ;
if ( HCL_UNLIKELY ( ! hcl - > c ) )
2022-07-22 08:02:14 +00:00
{
2024-01-06 09:11:39 +00:00
const hcl_ooch_t * orgmsg = hcl_backuperrmsg ( hcl ) ;
2024-02-03 04:36:05 +00:00
hcl_seterrbfmt ( hcl , HCL_ERRNUM ( hcl ) , " failed to allocate compiler - %js " , orgmsg ) ;
2023-05-18 01:24:01 +00:00
hcl_deregcb ( hcl , cbp ) ;
2022-07-22 08:02:14 +00:00
return - 1 ;
}
2023-05-18 01:24:01 +00:00
hcl - > c - > ilchr_ucs . ptr = & hcl - > c - > ilchr ;
hcl - > c - > ilchr_ucs . len = 1 ;
2022-07-22 08:02:14 +00:00
2023-05-18 01:24:01 +00:00
hcl - > c - > r . s = hcl - > _nil ;
hcl - > c - > r . e = hcl - > _nil ;
2022-07-22 08:02:14 +00:00
2023-05-18 01:24:01 +00:00
hcl - > c - > cfs . top = - 1 ;
hcl - > c - > cblk . depth = - 1 ;
hcl - > c - > clsblk . depth = - 1 ;
hcl - > c - > fnblk . depth = - 1 ;
2022-07-22 08:02:14 +00:00
2023-05-18 01:24:01 +00:00
init_feed ( hcl ) ;
hcl - > c - > cbp = cbp ;
2022-07-22 08:02:14 +00:00
2023-11-12 12:54:17 +00:00
/* initialize the internal cons to represent a cell pointing to `null` in the `car` part */
hcl - > c - > fake_cnode . nil . cn_type = HCL_CNODE_NIL ;
hcl - > c - > fake_cnode . nil . cn_tok . ptr = vocas [ VOCA_KW_NULL ] . str ;
hcl - > c - > fake_cnode . nil . cn_tok . len = vocas [ VOCA_KW_NULL ] . len ;
hcl - > c - > fake_cnode . cons_to_nil . cn_type = HCL_CNODE_CONS ;
hcl - > c - > fake_cnode . cons_to_nil . u . cons . car = & hcl - > c - > fake_cnode . nil ;
hcl - > c - > fake_cnode . cons_to_nil . u . cons . cdr = HCL_NULL ;
2023-05-18 01:24:01 +00:00
return 0 ;
}
2022-07-22 08:02:14 +00:00
2023-11-05 13:31:33 +00:00
int hcl_attachccio ( hcl_t * hcl , hcl_io_impl_t cci_rdr )
2023-05-18 01:24:01 +00:00
{
int n ;
int inited_compiler = 0 ;
2023-11-05 13:31:33 +00:00
hcl_io_cciarg_t new_cciarg ;
2022-07-22 08:02:14 +00:00
2023-05-28 16:49:13 +00:00
if ( ! hcl - > c )
2022-07-22 08:02:14 +00:00
{
2023-05-28 16:49:13 +00:00
if ( init_compiler ( hcl ) < = - 1 ) return - 1 ;
inited_compiler = 1 ;
2022-07-22 08:02:14 +00:00
}
2023-11-05 07:58:45 +00:00
2023-11-05 13:31:33 +00:00
if ( cci_rdr )
2023-05-18 01:24:01 +00:00
{
2023-05-19 03:55:08 +00:00
/* The name field and the includer field are HCL_NULL
* for the main stream */
2023-11-05 13:31:33 +00:00
HCL_MEMSET ( & new_cciarg , 0 , HCL_SIZEOF ( new_cciarg ) ) ;
new_cciarg . line = 1 ;
new_cciarg . colm = 1 ;
2023-05-19 03:55:08 +00:00
/* open the top-level source input stream */
2023-11-05 13:31:33 +00:00
n = cci_rdr ( hcl , HCL_IO_OPEN , & new_cciarg ) ;
2023-05-19 03:55:08 +00:00
if ( n < = - 1 ) goto oops ;
2023-11-05 07:58:45 +00:00
2023-11-05 13:31:33 +00:00
if ( hcl - > c - > cci_rdr )
2023-11-05 07:58:45 +00:00
{
/* close the old source input stream */
2023-11-05 13:31:33 +00:00
hcl - > c - > cci_rdr ( hcl , HCL_IO_CLOSE , & hcl - > c - > cci_arg ) ;
2023-11-05 07:58:45 +00:00
}
2023-11-05 13:31:33 +00:00
hcl - > c - > cci_rdr = cci_rdr ;
hcl - > c - > cci_arg = new_cciarg ;
2023-11-05 07:58:45 +00:00
/* clear unneeded source stream names */
/*clear_sr_names (hcl); <---- TODO: tricky to clean up here */
/* initialize some other key fields */
hcl - > c - > nungots = 0 ;
/* the source stream is open. set it as the current input stream */
2023-11-05 13:31:33 +00:00
hcl - > c - > curinp = & hcl - > c - > cci_arg ;
2023-11-05 07:58:45 +00:00
}
return 0 ;
oops :
if ( inited_compiler ) fini_compiler ( hcl ) ;
return - 1 ;
}
2023-11-05 13:31:33 +00:00
void hcl_detachccio ( hcl_t * hcl )
2023-11-05 07:58:45 +00:00
{
/* an error occurred and control has reached here
* probably , some included files might not have been
* closed . close them */
if ( hcl - > c )
{
2023-11-05 13:31:33 +00:00
if ( hcl - > c - > cci_rdr )
2023-11-05 07:58:45 +00:00
{
2023-11-05 13:31:33 +00:00
while ( hcl - > c - > curinp ! = & hcl - > c - > cci_arg )
2023-11-05 07:58:45 +00:00
{
2023-11-05 13:31:33 +00:00
hcl_io_cciarg_t * prev ;
2023-11-05 07:58:45 +00:00
/* nothing much to do about a close error */
2023-11-05 13:31:33 +00:00
hcl - > c - > cci_rdr ( hcl , HCL_IO_CLOSE , hcl - > c - > curinp ) ;
2023-11-05 07:58:45 +00:00
prev = hcl - > c - > curinp - > includer ;
HCL_ASSERT ( hcl , hcl - > c - > curinp - > name ! = HCL_NULL ) ;
hcl_freemem ( hcl , hcl - > c - > curinp ) ;
hcl - > c - > curinp = prev ;
}
2023-11-05 13:31:33 +00:00
hcl - > c - > cci_rdr ( hcl , HCL_IO_CLOSE , hcl - > c - > curinp ) ;
hcl - > c - > cci_rdr = HCL_NULL ; /* ready for another attachment */
2023-11-05 07:58:45 +00:00
}
2023-05-18 01:24:01 +00:00
}
2023-11-05 07:58:45 +00:00
}
int hcl_attachudio ( hcl_t * hcl , hcl_io_impl_t udi_rdr , hcl_io_impl_t udo_wrtr )
{
int n ;
hcl_io_udiarg_t new_udiarg ;
hcl_io_udoarg_t new_udoarg ;
2023-05-18 01:24:01 +00:00
2023-11-04 14:23:20 +00:00
if ( udi_rdr )
2023-05-19 03:55:08 +00:00
{
2023-11-04 14:23:20 +00:00
HCL_MEMSET ( & new_udiarg , 0 , HCL_SIZEOF ( new_udiarg ) ) ;
n = udi_rdr ( hcl , HCL_IO_OPEN , & new_udiarg ) ;
2023-05-19 03:55:08 +00:00
if ( n < = - 1 )
{
goto oops ;
}
}
2022-07-22 08:02:14 +00:00
2023-11-04 14:23:20 +00:00
if ( udo_wrtr )
2023-05-19 03:55:08 +00:00
{
/* open the new output stream */
2023-11-04 14:23:20 +00:00
HCL_MEMSET ( & new_udoarg , 0 , HCL_SIZEOF ( new_udoarg ) ) ;
n = udo_wrtr ( hcl , HCL_IO_OPEN , & new_udoarg ) ;
2023-05-19 03:55:08 +00:00
if ( n < = - 1 )
{
2023-11-04 14:23:20 +00:00
if ( udi_rdr ) udi_rdr ( hcl , HCL_IO_CLOSE , & new_udiarg ) ;
2023-05-19 03:55:08 +00:00
goto oops ;
}
}
2022-07-22 08:02:14 +00:00
2023-11-04 14:23:20 +00:00
if ( udi_rdr )
2022-07-22 08:02:14 +00:00
{
2023-11-04 14:23:20 +00:00
if ( hcl - > io . udi_rdr )
2023-05-19 03:55:08 +00:00
{
/* close the old input stream */
2023-11-04 14:23:20 +00:00
hcl - > io . udi_rdr ( hcl , HCL_IO_CLOSE , & hcl - > io . udi_arg ) ;
2023-05-19 03:55:08 +00:00
}
2023-11-04 14:23:20 +00:00
hcl - > io . udi_rdr = udi_rdr ;
hcl - > io . udi_arg = new_udiarg ;
2022-07-22 08:02:14 +00:00
}
2023-05-18 01:24:01 +00:00
2023-11-04 14:23:20 +00:00
if ( udo_wrtr )
2022-07-22 08:02:14 +00:00
{
2023-11-04 14:23:20 +00:00
if ( hcl - > io . udo_wrtr )
2023-05-19 03:55:08 +00:00
{
/* close the old output stream */
2023-11-04 14:23:20 +00:00
hcl - > io . udo_wrtr ( hcl , HCL_IO_CLOSE , & hcl - > io . udo_arg ) ;
2023-05-19 03:55:08 +00:00
}
2023-11-04 14:23:20 +00:00
hcl - > io . udo_wrtr = udo_wrtr ;
hcl - > io . udo_arg = new_udoarg ;
2022-07-22 08:02:14 +00:00
}
2023-05-18 01:24:01 +00:00
return 0 ;
oops :
2022-07-22 08:02:14 +00:00
return - 1 ;
}
2023-11-05 07:58:45 +00:00
void hcl_detachudio ( hcl_t * hcl )
2022-07-22 08:02:14 +00:00
{
2023-11-04 14:23:20 +00:00
if ( hcl - > io . udi_rdr )
2023-05-19 03:55:08 +00:00
{
2023-11-04 14:23:20 +00:00
hcl - > io . udi_rdr ( hcl , HCL_IO_CLOSE , & hcl - > io . udi_arg ) ;
hcl - > io . udi_rdr = HCL_NULL ; /* ready for another attachment */
2023-05-19 03:55:08 +00:00
}
2023-11-04 14:23:20 +00:00
if ( hcl - > io . udo_wrtr )
2023-05-18 01:24:01 +00:00
{
2023-11-04 14:23:20 +00:00
hcl - > io . udo_wrtr ( hcl , HCL_IO_CLOSE , & hcl - > io . udo_arg ) ;
hcl - > io . udo_wrtr = HCL_NULL ; /* ready for another attachment */
2022-07-22 08:02:14 +00:00
}
}
2022-07-29 14:41:00 +00:00
2023-11-05 07:58:45 +00:00
void hcl_flushudio ( hcl_t * hcl )
{
if ( hcl - > io . udo_wrtr ) hcl - > io . udo_wrtr ( hcl , HCL_IO_FLUSH , & hcl - > io . udo_arg ) ;
}
2023-11-07 10:19:06 +00:00
/* TODO: discard the fwollowing three functions - hcl_setbasesrloc, hcl_readbasesrchar, hcl_readbasesrraw */
2023-05-19 03:55:08 +00:00
void hcl_setbasesrloc ( hcl_t * hcl , hcl_oow_t line , hcl_oow_t colm )
2022-07-31 13:17:44 +00:00
{
2023-11-05 13:31:33 +00:00
hcl - > c - > cci_arg . line = line ;
hcl - > c - > cci_arg . colm = colm ;
2022-07-31 13:17:44 +00:00
}
2022-08-02 13:41:13 +00:00
2023-11-04 13:58:31 +00:00
hcl_lxc_t * hcl_readbasesrchar ( hcl_t * hcl )
2022-08-02 13:41:13 +00:00
{
2023-05-18 01:24:01 +00:00
/* read a character using the base input stream. the caller must care extra
2022-08-02 13:41:13 +00:00
* care when using this function . this function reads the main stream regardless
* of the inclusion status and ignores the ungot characters . */
2023-11-05 13:31:33 +00:00
int n = _get_char ( hcl , & hcl - > c - > cci_arg ) ;
2022-08-02 13:41:13 +00:00
if ( n < = - 1 ) return HCL_NULL ;
2023-11-05 13:31:33 +00:00
return & hcl - > c - > cci_arg . lxc ;
2022-08-02 13:41:13 +00:00
}
2023-05-18 01:24:01 +00:00
2023-05-19 03:55:08 +00:00
hcl_ooch_t * hcl_readbasesrraw ( hcl_t * hcl , hcl_oow_t * xlen )
2023-05-18 01:24:01 +00:00
{
2023-05-18 15:16:51 +00:00
/* this function provides the raw input interface to the attached source
* input handler . it doesn ' t increment line / column number , nor does it
* care about ungot characters . it must be used with extra care */
2023-05-19 03:55:08 +00:00
HCL_ASSERT ( hcl , hcl - > c ! = HCL_NULL ) ; /* call hio_attachio() or hio_attachiostd() with proper arguments first */
2023-05-18 15:16:51 +00:00
2023-11-05 13:31:33 +00:00
if ( hcl - > c - > cci_rdr ( hcl , HCL_IO_READ , & hcl - > c - > cci_arg ) < = - 1 ) return HCL_NULL ;
* xlen = hcl - > c - > cci_arg . xlen ;
return hcl - > c - > cci_arg . buf ;
2023-05-18 01:24:01 +00:00
}