2021-01-09 16:53:47 +00:00
/*
* $ Id $
*
Copyright ( c ) 2016 - 2018 Chung , Hyung - Hwan . All rights reserved .
Redistribution and use in source and binary forms , with or without
modification , are permitted provided that the following conditions
are met :
1. Redistributions of source code must retain the above copyright
notice , this list of conditions and the following disclaimer .
2. Redistributions in binary form must reproduce the above copyright
notice , this list of conditions and the following disclaimer in the
documentation and / or other materials provided with the distribution .
THIS SOFTWARE IS PROVIDED BY THE AUTHOR " AS IS " AND ANY EXPRESS OR
2021-01-12 00:21:43 +00:00
IMPLIED WARRANTIES , INCLUDING , BUT NOT LIMITED TO , THE IMPLIED WARRANTIES
2021-01-09 16:53:47 +00:00
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED .
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT , INDIRECT ,
INCIDENTAL , SPECIAL , EXEMPLARY , OR CONSEQUENTIAL DAMAGES ( INCLUDING , BUT
NOT LIMITED TO , PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE ,
DATA , OR PROFITS ; OR BUSINESS INTERRUPTION ) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY , WHETHER IN CONTRACT , STRICT LIABILITY , OR TORT
( INCLUDING NEGLIGENCE OR OTHERWISE ) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE , EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE .
*/
# include "hcl-prv.h"
# define BUFFER_ALIGN 128
# define BALIT_BUFFER_ALIGN 128
# define SALIT_BUFFER_ALIGN 128
# define ARLIT_BUFFER_ALIGN 128
# define CHAR_TO_NUM(c,base) \
( ( c > = ' 0 ' & & c < = ' 9 ' ) ? ( ( c - ' 0 ' < base ) ? ( c - ' 0 ' ) : base ) : \
( c > = ' A ' & & c < = ' Z ' ) ? ( ( c - ' A ' + 10 < base ) ? ( c - ' A ' + 10 ) : base ) : \
( c > = ' a ' & & c < = ' z ' ) ? ( ( c - ' a ' + 10 < base ) ? ( c - ' a ' + 10 ) : base ) : base )
static struct voca_t
{
hcl_oow_t len ;
hcl_ooch_t str [ 11 ] ;
} vocas [ ] =
{
{ 8 , { ' # ' , ' i ' , ' n ' , ' c ' , ' l ' , ' u ' , ' d ' , ' e ' } } ,
2022-07-24 00:49:03 +00:00
{ 7 , { ' # ' , ' p ' , ' r ' , ' a ' , ' g ' , ' m ' , ' a ' } } ,
2021-01-09 16:53:47 +00:00
{ 11 , { ' # ' , ' \\ ' , ' b ' , ' a ' , ' c ' , ' k ' , ' s ' , ' p ' , ' a ' , ' c ' , ' e ' } } ,
{ 10 , { ' # ' , ' \\ ' , ' l ' , ' i ' , ' n ' , ' e ' , ' f ' , ' e ' , ' e ' , ' d ' } } ,
{ 9 , { ' # ' , ' \\ ' , ' n ' , ' e ' , ' w ' , ' l ' , ' i ' , ' n ' , ' e ' } } ,
{ 5 , { ' # ' , ' \\ ' , ' n ' , ' u ' , ' l ' } } ,
{ 6 , { ' # ' , ' \\ ' , ' p ' , ' a ' , ' g ' , ' e ' } } ,
{ 8 , { ' # ' , ' \\ ' , ' r ' , ' e ' , ' t ' , ' u ' , ' r ' , ' n ' } } ,
{ 8 , { ' # ' , ' \\ ' , ' r ' , ' u ' , ' b ' , ' o ' , ' u ' , ' t ' } } ,
{ 7 , { ' # ' , ' \\ ' , ' s ' , ' p ' , ' a ' , ' c ' , ' e ' } } ,
{ 5 , { ' # ' , ' \\ ' , ' t ' , ' a ' , ' b ' } } ,
{ 6 , { ' # ' , ' \\ ' , ' v ' , ' t ' , ' a ' , ' b ' } } ,
{ 5 , { ' < ' , ' E ' , ' O ' , ' L ' , ' > ' } } ,
{ 5 , { ' < ' , ' E ' , ' O ' , ' F ' , ' > ' } }
} ;
enum voca_id_t
{
VOCA_INCLUDE ,
2022-07-24 00:49:03 +00:00
VOCA_PRAGMA ,
2021-01-09 16:53:47 +00:00
VOCA_BACKSPACE ,
VOCA_LINEFEED ,
VOCA_NEWLINE ,
VOCA_NUL ,
VOCA_PAGE ,
VOCA_RETURN ,
VOCA_RUBOUT ,
VOCA_SPACE ,
VOCA_TAB ,
VOCA_VTAB ,
VOCA_EOL ,
VOCA_EOF
} ;
typedef enum voca_id_t voca_id_t ;
enum list_flag_t
{
QUOTED = ( 1 < < 0 ) ,
DOTTED = ( 1 < < 1 ) ,
COMMAED = ( 1 < < 2 ) ,
COLONED = ( 1 < < 3 ) ,
CLOSED = ( 1 < < 4 ) ,
2022-07-30 03:02:57 +00:00
JSON = ( 1 < < 5 ) ,
DATA_LIST = ( 1 < < 6 )
2021-01-09 16:53:47 +00:00
} ;
# define LIST_FLAG_GET_CONCODE(x) (((x) >> 8) & 0xFF)
# define LIST_FLAG_SET_CONCODE(x,type) ((x) = ((x) & ~0xFF00) | ((type) << 8))
2023-05-18 01:24:01 +00:00
static int init_compiler ( hcl_t * hcl ) ;
2021-01-09 16:53:47 +00:00
static int string_to_ooi ( hcl_t * hcl , hcl_oocs_t * str , int radixed , hcl_ooi_t * num )
{
/* it is not a generic conversion function.
* it assumes a certain pre - sanity check on the string
* done by the lexical analyzer */
int v , negsign , base ;
const hcl_ooch_t * ptr , * end ;
hcl_oow_t value , old_value ;
negsign = 0 ;
ptr = str - > ptr ,
end = str - > ptr + str - > len ;
HCL_ASSERT ( hcl , ptr < end ) ;
if ( * ptr = = ' + ' | | * ptr = = ' - ' )
{
negsign = * ptr - ' + ' ;
ptr + + ;
}
if ( radixed )
{
HCL_ASSERT ( hcl , ptr < end ) ;
if ( * ptr ! = ' # ' )
{
hcl_seterrbfmt ( hcl , HCL_EINVAL , " radixed number not starting with # - %*.js " , str - > len , str - > ptr ) ;
return - 1 ;
}
ptr + + ; /* skip '#' */
if ( * ptr = = ' x ' ) base = 16 ;
else if ( * ptr = = ' o ' ) base = 8 ;
else if ( * ptr = = ' b ' ) base = 2 ;
else
{
hcl_seterrbfmt ( hcl , HCL_EINVAL , " invalid radix specifier - %c " , * ptr ) ;
return - 1 ;
}
ptr + + ;
}
else base = 10 ;
HCL_ASSERT ( hcl , ptr < end ) ;
value = old_value = 0 ;
while ( ptr < end & & ( v = CHAR_TO_NUM ( * ptr , base ) ) < base )
{
value = value * base + v ;
if ( value < old_value )
{
/* overflow must have occurred */
hcl_seterrbfmt ( hcl , HCL_ERANGE , " number too big - %.*js " , str - > len , str - > ptr ) ;
return - 1 ;
}
old_value = value ;
ptr + + ;
}
if ( ptr < end )
{
/* trailing garbage? */
hcl_seterrbfmt ( hcl , HCL_EINVAL , " trailing garbage after numeric literal - %.*js " , str - > len , str - > ptr ) ;
return - 1 ;
}
if ( value > HCL_TYPE_MAX ( hcl_ooi_t ) + ( negsign ? 1 : 0 ) ) /* assume 2's complement */
{
hcl_seterrbfmt ( hcl , HCL_ERANGE , " number too big - %.*js " , str - > len , str - > ptr ) ;
return - 1 ;
}
* num = value ;
if ( negsign ) * num * = - 1 ;
return 0 ;
}
static hcl_oop_t string_to_num ( hcl_t * hcl , hcl_oocs_t * str , int radixed )
{
int negsign , base ;
const hcl_ooch_t * ptr , * end ;
negsign = 0 ;
ptr = str - > ptr ,
end = str - > ptr + str - > len ;
HCL_ASSERT ( hcl , ptr < end ) ;
if ( * ptr = = ' + ' | | * ptr = = ' - ' )
{
negsign = * ptr - ' + ' ;
ptr + + ;
}
#if 0
if ( radixed )
{
HCL_ASSERT ( hcl , ptr < end ) ;
base = 0 ;
do
{
base = base * 10 + CHAR_TO_NUM ( * ptr , 10 ) ;
ptr + + ;
}
while ( * ptr ! = ' r ' ) ;
ptr + + ;
}
else base = 10 ;
# else
if ( radixed )
{
HCL_ASSERT ( hcl , ptr < end ) ;
if ( * ptr ! = ' # ' )
{
hcl_seterrbfmt ( hcl , HCL_EINVAL , " radixed number not starting with # - %.*js " , str - > len , str - > ptr ) ;
return HCL_NULL ;
}
ptr + + ; /* skip '#' */
if ( * ptr = = ' x ' ) base = 16 ;
else if ( * ptr = = ' o ' ) base = 8 ;
else if ( * ptr = = ' b ' ) base = 2 ;
else
{
hcl_seterrbfmt ( hcl , HCL_EINVAL , " invalid radix specifier - %c " , * ptr ) ;
return HCL_NULL ;
}
ptr + + ;
}
else base = 10 ;
# endif
/* TODO: handle floating point numbers ... etc */
if ( negsign ) base = - base ;
return hcl_strtoint ( hcl , ptr , end - ptr , base ) ;
}
static hcl_oop_t string_to_fpdec ( hcl_t * hcl , hcl_oocs_t * str , const hcl_ioloc_t * loc )
{
hcl_oow_t pos ;
hcl_oow_t scale = 0 ;
hcl_oop_t v ;
pos = str - > len ;
while ( pos > 0 )
{
pos - - ;
if ( str - > ptr [ pos ] = = ' . ' )
{
scale = str - > len - pos - 1 ;
if ( scale > HCL_SMOOI_MAX )
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_NUMRANGE , loc , str , " too many digits after decimal point " ) ;
return HCL_NULL ;
}
HCL_ASSERT ( hcl , scale > 0 ) ;
/*if (scale > 0)*/ HCL_MEMMOVE ( & str - > ptr [ pos ] , & str - > ptr [ pos + 1 ] , scale * HCL_SIZEOF ( str - > ptr [ 0 ] ) ) ; /* remove the decimal point */
break ;
}
}
/* if no decimal point is included or no digit after the point , you must not call this function */
HCL_ASSERT ( hcl , scale > 0 ) ;
v = hcl_strtoint ( hcl , str - > ptr , str - > len - 1 , 10 ) ;
if ( ! v ) return HCL_NULL ;
return hcl_makefpdec ( hcl , v , scale ) ;
}
static HCL_INLINE int is_spacechar ( hcl_ooci_t c )
{
/* TODO: handle other space unicode characters */
switch ( c )
{
case ' ' :
case ' \f ' : /* formfeed */
case ' \n ' : /* linefeed */
case ' \r ' : /* carriage return */
case ' \t ' : /* horizon tab */
case ' \v ' : /* vertical tab */
return 1 ;
default :
return 0 ;
}
}
2022-05-25 14:23:43 +00:00
static HCL_INLINE int is_linebreak ( hcl_ooci_t c )
{
/* TODO: different line end conventions? */
return c = = ' \n ' ;
}
2021-01-09 16:53:47 +00:00
static HCL_INLINE int is_alphachar ( hcl_ooci_t c )
{
/* TODO: support full unicode */
return ( c > = ' a ' & & c < = ' z ' ) | | ( c > = ' A ' & & c < = ' Z ' ) ;
}
static HCL_INLINE int is_digitchar ( hcl_ooci_t c )
{
/* TODO: support full unicode */
return ( c > = ' 0 ' & & c < = ' 9 ' ) ;
}
static HCL_INLINE int is_xdigitchar ( hcl_ooci_t c )
{
/* TODO: support full unicode */
return ( c > = ' 0 ' & & c < = ' 9 ' ) | | ( c > = ' A ' & & c < = ' F ' ) | | ( c > = ' a ' & & c < = ' f ' ) ;
}
static HCL_INLINE int is_alnumchar ( hcl_ooci_t c )
{
/* TODO: support full unicode */
return ( c > = ' a ' & & c < = ' z ' ) | | ( c > = ' A ' & & c < = ' Z ' ) | | ( c > = ' 0 ' & & c < = ' 9 ' ) ;
}
2022-05-25 14:23:43 +00:00
static HCL_INLINE int is_delimchar ( hcl_ooci_t c )
2021-01-09 16:53:47 +00:00
{
return c = = ' ( ' | | c = = ' ) ' | | c = = ' [ ' | | c = = ' ] ' | | c = = ' { ' | | c = = ' } ' | |
2023-05-18 01:24:01 +00:00
c = = ' ; ' | | c = = ' | ' | | c = = ' , ' | | c = = ' . ' | | c = = ' : ' | |
2022-05-25 14:23:43 +00:00
/* the first characters of tokens in delim_token_tab up to this point */
c = = ' # ' | | c = = ' \" ' | | c = = ' \' ' | | is_spacechar ( c ) | | c = = HCL_UCI_EOF ;
2021-01-09 16:53:47 +00:00
}
static int copy_string_to ( hcl_t * hcl , const hcl_oocs_t * src , hcl_oocs_t * dst , hcl_oow_t * dst_capa , int append , hcl_ooch_t add_delim )
{
hcl_oow_t len , pos ;
if ( append )
{
pos = dst - > len ;
len = dst - > len + src - > len ;
if ( add_delim ! = ' \0 ' ) len + + ;
}
else
{
pos = 0 ;
len = src - > len ;
}
if ( len > * dst_capa )
{
hcl_ooch_t * tmp ;
hcl_oow_t capa ;
capa = HCL_ALIGN ( len , BUFFER_ALIGN ) ;
tmp = ( hcl_ooch_t * ) hcl_reallocmem ( hcl , dst - > ptr , HCL_SIZEOF ( * tmp ) * capa ) ;
if ( HCL_UNLIKELY ( ! tmp ) ) return - 1 ;
dst - > ptr = tmp ;
* dst_capa = capa ;
}
if ( append & & add_delim ) dst - > ptr [ pos + + ] = add_delim ;
hcl_copy_oochars ( & dst - > ptr [ pos ] , src - > ptr , src - > len ) ;
dst - > len = len ;
return 0 ;
}
# define GET_CHAR(hcl) \
do { if ( get_char ( hcl ) < = - 1 ) return - 1 ; } while ( 0 )
# define GET_CHAR_TO(hcl,c) \
do { \
if ( get_char ( hcl ) < = - 1 ) return - 1 ; \
c = ( hcl ) - > c - > lxc . c ; \
} while ( 0 )
# define ADD_TOKEN_STR(hcl,s,l) \
do { if ( add_token_str ( hcl , s , l ) < = - 1 ) return - 1 ; } while ( 0 )
# define ADD_TOKEN_CHAR(hcl,c) \
do { if ( add_token_char ( hcl , c ) < = - 1 ) return - 1 ; } while ( 0 )
# define CLEAR_TOKEN_NAME(hcl) ((hcl)->c->tok.name.len = 0)
# define SET_TOKEN_TYPE(hcl,tv) ((hcl)->c->tok.type = (tv))
2022-07-22 08:02:14 +00:00
# define SET_TOKEN_LOC(hcl,locv) ((hcl)->c->tok.loc = *(locv))
2021-01-09 16:53:47 +00:00
# define TOKEN_TYPE(hcl) ((hcl)->c->tok.type)
# define TOKEN_NAME(hcl) (&(hcl)->c->tok.name)
# define TOKEN_NAME_CAPA(hcl) ((hcl)->c->tok.name_capa)
# define TOKEN_NAME_LEN(hcl) ((hcl)->c->tok.name.len)
# define TOKEN_NAME_PTR(hcl) ((hcl)->c->tok.name.ptr)
# define TOKEN_NAME_CHAR(hcl,index) ((hcl)->c->tok.name.ptr[index])
# define TOKEN_LOC(hcl) (&(hcl)->c->tok.loc)
# define LEXER_LOC(hcl) (&(hcl)->c->lxc.l)
static HCL_INLINE int add_token_str ( hcl_t * hcl , const hcl_ooch_t * ptr , hcl_oow_t len )
{
hcl_oocs_t tmp ;
tmp . ptr = ( hcl_ooch_t * ) ptr ;
tmp . len = len ;
return copy_string_to ( hcl , & tmp , TOKEN_NAME ( hcl ) , & TOKEN_NAME_CAPA ( hcl ) , 1 , ' \0 ' ) ;
}
static HCL_INLINE int does_token_name_match ( hcl_t * hcl , voca_id_t id )
{
return hcl - > c - > tok . name . len = = vocas [ id ] . len & &
hcl_equal_oochars ( hcl - > c - > tok . name . ptr , vocas [ id ] . str , vocas [ id ] . len ) ;
}
static HCL_INLINE int add_token_char ( hcl_t * hcl , hcl_ooch_t c )
{
hcl_oocs_t tmp ;
tmp . ptr = & c ;
tmp . len = 1 ;
2021-01-15 09:12:28 +00:00
return copy_string_to ( hcl , & tmp , TOKEN_NAME ( hcl ) , & TOKEN_NAME_CAPA ( hcl ) , 1 , ' \0 ' ) ;
2021-01-09 16:53:47 +00:00
}
static HCL_INLINE void unget_char ( hcl_t * hcl , const hcl_iolxc_t * c )
{
/* Make sure that the unget buffer is large enough */
HCL_ASSERT ( hcl , hcl - > c - > nungots < HCL_COUNTOF ( hcl - > c - > ungot ) ) ;
hcl - > c - > ungot [ hcl - > c - > nungots + + ] = * c ;
}
2022-07-24 00:49:03 +00:00
static int get_directive_token_type ( hcl_t * hcl , hcl_iotok_type_t * tok_type )
{
2023-05-18 01:24:01 +00:00
if ( does_token_name_match ( hcl , VOCA_INCLUDE ) )
2022-07-24 00:49:03 +00:00
{
* tok_type = HCL_IOTOK_INCLUDE ;
return 0 ;
}
2023-05-18 01:24:01 +00:00
else if ( does_token_name_match ( hcl , VOCA_PRAGMA ) )
2022-07-24 00:49:03 +00:00
{
* tok_type = HCL_IOTOK_PRAGMA ;
return 0 ;
}
return - 1 ;
}
2023-05-19 03:55:08 +00:00
static int _get_char ( hcl_t * hcl , hcl_iosrarg_t * inp )
2021-01-09 16:53:47 +00:00
{
hcl_ooci_t lc ;
2022-08-02 13:41:13 +00:00
if ( inp - > b . pos > = inp - > b . len )
2021-01-09 16:53:47 +00:00
{
2022-08-02 13:41:13 +00:00
if ( hcl - > c - > reader ( hcl , HCL_IO_READ , inp ) < = - 1 ) return - 1 ;
2021-01-09 16:53:47 +00:00
2022-08-02 13:41:13 +00:00
if ( inp - > xlen < = 0 )
2021-01-09 16:53:47 +00:00
{
2022-08-02 13:41:13 +00:00
inp - > lxc . c = HCL_OOCI_EOF ;
inp - > lxc . l . line = inp - > line ;
inp - > lxc . l . colm = inp - > colm ;
inp - > lxc . l . file = inp - > name ;
2021-01-09 16:53:47 +00:00
/* indicate that EOF has been read. lxc.c is also set to EOF. */
return 0 ;
}
2022-08-02 13:41:13 +00:00
inp - > b . pos = 0 ;
inp - > b . len = inp - > xlen ;
2021-01-09 16:53:47 +00:00
}
2022-08-02 13:41:13 +00:00
if ( inp - > lxc . c = = ' \n ' | | inp - > lxc . c = = ' \r ' )
2021-01-09 16:53:47 +00:00
{
2022-08-02 13:41:13 +00:00
/* inp->lxc.c is a previous character. the new character
* to be read is still in the buffer ( inp - > buf ) .
2021-01-09 16:53:47 +00:00
* hcl - > cu - > curinp - > colm has been incremented when the previous
* character has been read . */
2022-08-02 13:41:13 +00:00
if ( inp - > line > 1 & & inp - > colm = = 2 & & inp - > nl ! = inp - > lxc . c )
2021-01-09 16:53:47 +00:00
{
/* most likely, it's the second character in '\r\n' or '\n\r'
* sequence . let ' s not update the line and column number . */
2022-08-02 13:41:13 +00:00
/*inp->colm = 1;*/
2021-01-09 16:53:47 +00:00
}
else
{
/* if the previous charater was a newline,
* increment the line counter and reset column to 1.
* incrementing the line number here instead of
* updating inp - > lxc causes the line number for
* TOK_EOF to be the same line as the lxc newline . */
2022-08-02 13:41:13 +00:00
inp - > line + + ;
inp - > colm = 1 ;
inp - > nl = inp - > lxc . c ;
2021-01-09 16:53:47 +00:00
}
}
2022-08-02 13:41:13 +00:00
lc = inp - > buf [ inp - > b . pos + + ] ;
2021-01-09 16:53:47 +00:00
2022-08-02 13:41:13 +00:00
inp - > lxc . c = lc ;
inp - > lxc . l . line = inp - > line ;
inp - > lxc . l . colm = inp - > colm + + ;
inp - > lxc . l . file = inp - > name ;
2021-01-09 16:53:47 +00:00
return 1 ; /* indicate that a normal character has been read */
}
2022-08-02 13:41:13 +00:00
static int get_char ( hcl_t * hcl )
{
int n ;
if ( hcl - > c - > nungots > 0 )
{
/* something in the unget buffer */
hcl - > c - > lxc = hcl - > c - > ungot [ - - hcl - > c - > nungots ] ;
return 0 ;
}
n = _get_char ( hcl , hcl - > c - > curinp ) ;
if ( n > = 0 ) hcl - > c - > lxc = hcl - > c - > curinp - > lxc ;
return n ;
}
2021-01-09 16:53:47 +00:00
static hcl_iotok_type_t classify_ident_token ( hcl_t * hcl , const hcl_oocs_t * v )
{
hcl_oow_t i ;
struct
{
hcl_oow_t len ;
hcl_ooch_t name [ 10 ] ;
hcl_iotok_type_t type ;
} tab [ ] =
{
{ 4 , { ' n ' , ' u ' , ' l ' , ' l ' } , HCL_IOTOK_NIL } ,
{ 4 , { ' t ' , ' r ' , ' u ' , ' e ' } , HCL_IOTOK_TRUE } ,
2022-02-05 17:35:10 +00:00
{ 5 , { ' f ' , ' a ' , ' l ' , ' s ' , ' e ' } , HCL_IOTOK_FALSE } ,
2022-02-18 16:32:19 +00:00
{ 4 , { ' s ' , ' e ' , ' l ' , ' f ' } , HCL_IOTOK_SELF } ,
{ 5 , { ' s ' , ' u ' , ' p ' , ' e ' , ' r ' } , HCL_IOTOK_SUPER }
2021-01-09 16:53:47 +00:00
} ;
for ( i = 0 ; i < HCL_COUNTOF ( tab ) ; i + + )
{
if ( hcl_comp_oochars ( v - > ptr , v - > len , tab [ i ] . name , tab [ i ] . len ) = = 0 ) return tab [ i ] . type ;
}
return HCL_IOTOK_IDENT ;
}
2023-05-19 03:55:08 +00:00
static int is_sr_name_in_use ( hcl_t * hcl , const hcl_ooch_t * sr_name )
{
/* [NOTE]
* this is very error prone . if there are changes in refernece
* points of this sr_name in the source code , this function also
* must be modifed . */
hcl_iosrarg_t * cur ;
if ( hcl - > c - > synerr . loc . file = = sr_name ) return 1 ;
cur = hcl - > c - > curinp ;
while ( cur )
{
if ( cur - > lxc . l . file = = sr_name ) return 1 ;
cur = cur - > includer ;
}
return 0 ;
}
static void clear_sr_names ( hcl_t * hcl )
2021-01-09 16:53:47 +00:00
{
hcl_iolink_t * cur ;
HCL_ASSERT ( hcl , hcl - > c ! = HCL_NULL ) ;
2023-05-19 03:55:08 +00:00
while ( hcl - > c - > sr_names )
2021-01-09 16:53:47 +00:00
{
2023-05-19 03:55:08 +00:00
cur = hcl - > c - > sr_names ;
hcl - > c - > sr_names = cur - > link ;
2021-01-09 16:53:47 +00:00
hcl_freemem ( hcl , cur ) ;
}
}
2023-05-19 03:55:08 +00:00
static const hcl_ooch_t * add_sr_name ( hcl_t * hcl , const hcl_oocs_t * name )
2021-01-09 16:53:47 +00:00
{
hcl_iolink_t * link ;
2023-05-19 03:55:08 +00:00
hcl_ooch_t * nptr ;
/* TODO: make search faster */
link = hcl - > c - > sr_names ;
while ( link )
{
nptr = ( hcl_ooch_t * ) ( link + 1 ) ;
if ( hcl_comp_oochars_oocstr ( name - > ptr , name - > len , nptr ) = = 0 ) return nptr ;
link = link - > link ;
}
2021-01-09 16:53:47 +00:00
link = ( hcl_iolink_t * ) hcl_callocmem ( hcl , HCL_SIZEOF ( * link ) + HCL_SIZEOF ( hcl_ooch_t ) * ( name - > len + 1 ) ) ;
if ( HCL_UNLIKELY ( ! link ) ) return HCL_NULL ;
2023-05-19 03:55:08 +00:00
nptr = ( hcl_ooch_t * ) ( link + 1 ) ;
2021-01-09 16:53:47 +00:00
2023-05-19 03:55:08 +00:00
hcl_copy_oochars ( nptr , name - > ptr , name - > len ) ;
nptr [ name - > len ] = ' \0 ' ;
2021-01-09 16:53:47 +00:00
2023-05-19 03:55:08 +00:00
link - > link = hcl - > c - > sr_names ;
hcl - > c - > sr_names = link ;
2021-01-09 16:53:47 +00:00
2023-05-19 03:55:08 +00:00
return nptr ;
2021-01-09 16:53:47 +00:00
}
/* -------------------------------------------------------------------------- */
2021-01-13 09:54:44 +00:00
static HCL_INLINE int enter_list ( hcl_t * hcl , const hcl_ioloc_t * loc , int flagv )
2021-01-09 16:53:47 +00:00
{
2021-01-13 09:54:44 +00:00
hcl_rstl_t * rstl ;
rstl = hcl_callocmem ( hcl , HCL_SIZEOF ( * rstl ) ) ;
if ( HCL_UNLIKELY ( ! rstl ) ) return - 1 ;
rstl - > loc = * loc ;
rstl - > flagv = flagv ;
rstl - > prev = hcl - > c - > r . st ; /* push */
hcl - > c - > r . st = rstl ;
return 0 ;
2021-01-09 16:53:47 +00:00
}
2021-01-13 09:54:44 +00:00
static HCL_INLINE hcl_cnode_t * leave_list ( hcl_t * hcl , int * flagv , int * oldflagv )
2021-01-09 16:53:47 +00:00
{
2021-01-13 09:54:44 +00:00
hcl_rstl_t * rstl ;
2022-07-31 04:18:09 +00:00
hcl_cnode_t * head ;
2021-01-13 09:54:44 +00:00
hcl_ioloc_t loc ;
2021-01-09 16:53:47 +00:00
int fv , concode ;
/* the stack must not be empty - cannot leave a list without entering it */
2021-01-13 09:54:44 +00:00
HCL_ASSERT ( hcl , hcl - > c - > r . st ! = HCL_NULL ) ;
rstl = hcl - > c - > r . st ; /* get the stack top */
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
head = rstl - > head ;
fv = rstl - > flagv ;
loc = rstl - > loc ;
2021-01-09 16:53:47 +00:00
concode = LIST_FLAG_GET_CONCODE ( fv ) ;
2021-01-13 09:54:44 +00:00
hcl - > c - > r . st = rstl - > prev ; /* pop off */
hcl_freemem ( hcl , rstl ) ; /* dispose of the stack node */
2021-01-09 16:53:47 +00:00
if ( fv & ( COMMAED | COLONED ) )
{
hcl_setsynerr ( hcl , ( ( fv & COMMAED ) ? HCL_SYNERR_COMMANOVALUE : HCL_SYNERR_COLONNOVALUE ) , TOKEN_LOC ( hcl ) , HCL_NULL ) ;
2021-01-15 09:12:28 +00:00
if ( head ) hcl_freecnode ( hcl , head ) ;
2021-01-09 16:53:47 +00:00
return HCL_NULL ;
}
2021-01-12 09:06:25 +00:00
* oldflagv = fv ;
if ( ! hcl - > c - > r . st )
{
/* the stack is empty after popping.
* it is back to the top level .
* the top level can never be quoted . */
* flagv = 0 ;
}
else
{
/* restore the flag for the outer returning level */
2021-01-13 09:54:44 +00:00
* flagv = hcl - > c - > r . st - > flagv ;
2021-01-12 09:06:25 +00:00
}
2021-01-13 09:54:44 +00:00
/* NOTE: empty xlist will get translated to #nil.
* this is useful when used in the lambda expression to express an empty argument . also in defun .
* ( lambda ( ) . . . ) is equivalent to ( lambda # nil . . . )
* ( defun x ( ) . . . ) */
2021-01-15 09:12:28 +00:00
2023-05-18 01:24:01 +00:00
if ( head )
2021-01-19 14:07:42 +00:00
{
HCL_ASSERT ( hcl , HCL_CNODE_IS_CONS ( head ) ) ;
HCL_CNODE_CONS_CONCODE ( head ) = concode ;
return head ;
}
/* the list is empty */
2021-01-25 15:23:24 +00:00
return hcl_makecnodeelist ( hcl , & loc , concode ) ;
2021-01-09 16:53:47 +00:00
}
static HCL_INLINE int can_dot_list ( hcl_t * hcl )
{
2021-01-13 09:54:44 +00:00
hcl_rstl_t * rstl ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
HCL_ASSERT ( hcl , hcl - > c - > r . st ! = HCL_NULL ) ;
rstl = hcl - > c - > r . st ;
2021-01-09 16:53:47 +00:00
/* mark the state that a dot has appeared in the list */
2021-01-13 09:54:44 +00:00
if ( rstl - > count < = 0 ) return 0 ;
if ( LIST_FLAG_GET_CONCODE ( rstl - > flagv ) ! = HCL_CONCODE_QLIST ) return 0 ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
rstl - > flagv | = DOTTED ;
2021-01-09 16:53:47 +00:00
return 1 ;
}
static HCL_INLINE int can_comma_list ( hcl_t * hcl )
{
2021-01-13 09:54:44 +00:00
hcl_rstl_t * rstl ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
HCL_ASSERT ( hcl , hcl - > c - > r . st ! = HCL_NULL ) ;
rstl = hcl - > c - > r . st ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
if ( rstl - > count < = 0 ) return 0 ;
if ( rstl - > count = = 1 ) rstl - > flagv | = JSON ;
else if ( ! ( rstl - > flagv & JSON ) ) return 0 ;
if ( rstl - > flagv & ( COMMAED | COLONED ) ) return 0 ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
if ( LIST_FLAG_GET_CONCODE ( rstl - > flagv ) = = HCL_CONCODE_DIC )
2021-01-09 16:53:47 +00:00
{
2021-01-13 09:54:44 +00:00
if ( rstl - > count & 1 ) return 0 ;
2021-01-09 16:53:47 +00:00
}
2021-01-13 09:54:44 +00:00
else if ( LIST_FLAG_GET_CONCODE ( rstl - > flagv ) ! = HCL_CONCODE_ARRAY & &
LIST_FLAG_GET_CONCODE ( rstl - > flagv ) ! = HCL_CONCODE_BYTEARRAY )
2021-01-09 16:53:47 +00:00
{
return 0 ;
}
2021-01-13 09:54:44 +00:00
rstl - > flagv | = COMMAED ;
2021-01-09 16:53:47 +00:00
return 1 ;
}
static HCL_INLINE int can_colon_list ( hcl_t * hcl )
{
2021-01-13 09:54:44 +00:00
hcl_rstl_t * rstl ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
HCL_ASSERT ( hcl , hcl - > c - > r . st ! = HCL_NULL ) ;
rstl = hcl - > c - > r . st ;
2021-01-09 16:53:47 +00:00
/* mark the state that a colon has appeared in the list */
2021-01-13 09:54:44 +00:00
if ( rstl - > count < = 0 ) return 0 ;
if ( rstl - > count = = 1 ) rstl - > flagv | = JSON ;
else if ( ! ( rstl - > flagv & JSON ) ) return 0 ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
if ( rstl - > flagv & ( COMMAED | COLONED ) ) return 0 ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
if ( LIST_FLAG_GET_CONCODE ( rstl - > flagv ) ! = HCL_CONCODE_DIC ) return 0 ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
if ( ! ( rstl - > count & 1 ) ) return 0 ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
rstl - > flagv | = COLONED ;
2021-01-09 16:53:47 +00:00
return 1 ;
}
static HCL_INLINE void clear_comma_colon_flag ( hcl_t * hcl )
{
2021-01-13 09:54:44 +00:00
hcl_rstl_t * rstl ;
HCL_ASSERT ( hcl , hcl - > c - > r . st ! = HCL_NULL ) ;
rstl = hcl - > c - > r . st ;
rstl - > flagv & = ~ ( COMMAED | COLONED ) ;
2021-01-09 16:53:47 +00:00
}
2021-01-15 09:12:28 +00:00
static int chain_to_list ( hcl_t * hcl , hcl_cnode_t * obj )
2021-01-09 16:53:47 +00:00
{
2021-01-13 09:54:44 +00:00
hcl_rstl_t * rstl ;
2021-01-09 16:53:47 +00:00
int flagv ;
2021-01-13 09:54:44 +00:00
HCL_ASSERT ( hcl , hcl - > c - > r . st ! = HCL_NULL ) ;
rstl = hcl - > c - > r . st ;
flagv = rstl - > flagv ;
2021-01-09 16:53:47 +00:00
if ( flagv & CLOSED )
{
/* the list has already been closed and cannot add more items
2021-01-25 15:23:24 +00:00
* for instance , see this faulty expression # ( 1 2 . 3 4 ) .
2021-01-09 16:53:47 +00:00
* you can have only 1 item after the period . this condition
* can only be triggered by a wrong qlist where a period is
* allowed . so i can safely hard - code the error code to
2021-01-25 15:23:24 +00:00
* HCL_SYNERR_RPAREN */
hcl_setsynerr ( hcl , HCL_SYNERR_RPAREN , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
2021-01-15 09:12:28 +00:00
return - 1 ;
2021-01-09 16:53:47 +00:00
}
else if ( flagv & DOTTED )
{
2021-01-12 09:06:25 +00:00
hcl_cnode_t * tail ;
2021-01-09 16:53:47 +00:00
/* the list must not be empty to have reached the dotted state */
2021-01-13 09:54:44 +00:00
HCL_ASSERT ( hcl , rstl - > head ! = HCL_NULL ) ;
HCL_ASSERT ( hcl , rstl - > tail ! = HCL_NULL ) ;
HCL_ASSERT ( hcl , rstl - > count > 0 ) ;
2021-01-09 16:53:47 +00:00
/* chain the object via 'cdr' of the tail cell */
2021-01-13 09:54:44 +00:00
tail = rstl - > tail ;
HCL_ASSERT ( hcl , tail ! = HCL_NULL ) ;
2021-01-19 14:07:42 +00:00
HCL_ASSERT ( hcl , HCL_CNODE_IS_CONS ( tail ) ) ;
2021-01-25 15:23:24 +00:00
if ( HCL_CNODE_IS_CONS ( obj ) & & HCL_CNODE_CONS_CONCODE ( obj ) ! = HCL_CONCODE_QLIST )
{
hcl_cnode_t * shell ;
/* if the last element is another non-data list
2023-05-18 01:24:01 +00:00
* for example , # ( 1 2 . [ 3 4 5 ] )
2021-01-25 15:23:24 +00:00
* use a shell node to wrap the actual object list node head
* for the compiler .
*/
shell = hcl_makecnodeshell ( hcl , HCL_CNODE_GET_LOC ( obj ) , obj ) ;
if ( HCL_UNLIKELY ( ! shell ) ) return - 1 ;
tail - > u . cons . cdr = shell ;
}
else
{
tail - > u . cons . cdr = obj ;
}
2021-01-09 16:53:47 +00:00
/* update the flag to CLOSED so that you can have more than
* one item after the dot . */
flagv | = CLOSED ;
2021-01-13 09:54:44 +00:00
rstl - > flagv = flagv ;
2021-01-12 09:06:25 +00:00
/* TODO: check overflow on count??? */
2021-01-13 09:54:44 +00:00
rstl - > count + + ;
2021-01-09 16:53:47 +00:00
}
else
{
2021-01-12 09:06:25 +00:00
hcl_cnode_t * cons , * tail ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
if ( ( flagv & JSON ) & & rstl - > count > 0 & & ! ( flagv & ( COMMAED | COLONED ) ) )
2021-01-09 16:53:47 +00:00
{
/* there is no separator between array/dictionary elements
* for instance , [ 1 2 ] { 10 20 } */
hcl_setsynerr ( hcl , HCL_SYNERR_NOSEP , TOKEN_LOC ( hcl ) , HCL_NULL ) ;
2021-01-15 09:12:28 +00:00
return - 1 ;
2021-01-09 16:53:47 +00:00
}
2021-01-19 14:07:42 +00:00
cons = hcl_makecnodecons ( hcl , HCL_CNODE_GET_LOC ( obj ) , obj , HCL_NULL ) ;
2021-01-15 09:12:28 +00:00
if ( HCL_UNLIKELY ( ! cons ) ) return - 1 ;
2021-01-09 16:53:47 +00:00
2021-01-13 09:54:44 +00:00
if ( rstl - > count < = 0 )
2021-01-09 16:53:47 +00:00
{
/* the list head is not set yet. it is the first
* element added to the list . let both head and tail
* point to the new cons cell */
2021-01-13 09:54:44 +00:00
HCL_ASSERT ( hcl , rstl - > tail = = HCL_NULL ) ;
HCL_ASSERT ( hcl , rstl - > head = = HCL_NULL ) ;
2021-01-12 09:06:25 +00:00
2021-01-13 09:54:44 +00:00
rstl - > head = cons ;
rstl - > tail = cons ;
2021-01-09 16:53:47 +00:00
}
else
{
/* the new cons cell is not the first element.
* append it to the list */
2021-01-13 09:54:44 +00:00
tail = rstl - > tail ;
2021-01-19 14:07:42 +00:00
HCL_ASSERT ( hcl , HCL_CNODE_IS_CONS ( tail ) ) ;
2021-01-15 09:12:28 +00:00
tail - > u . cons . cdr = cons ;
2021-01-13 09:54:44 +00:00
rstl - > tail = cons ;
2021-01-09 16:53:47 +00:00
}
2021-01-12 09:06:25 +00:00
/* TODO: check overflow on count??? */
2021-01-13 09:54:44 +00:00
rstl - > count + + ;
2021-01-09 16:53:47 +00:00
}
2021-01-15 09:12:28 +00:00
return 0 ;
2021-01-09 16:53:47 +00:00
}
2021-01-30 16:13:27 +00:00
/* ------------------------------------------------------------------------ */
2021-01-17 17:45:39 +00:00
/* TODO:
hcl_cnodetoobj ( hcl_t * hcl , hcl_cnode_t * x )
{
* drop location information and compose object ? ?
2023-05-18 01:24:01 +00:00
* is it doable ? can convert a dotted symbol to a proper value ?
2021-01-17 17:45:39 +00:00
}
2023-05-18 01:24:01 +00:00
*/
2021-01-30 16:13:27 +00:00
2022-05-25 14:23:43 +00:00
/* ---------------------------------------------------------------------- */
2022-07-29 11:29:47 +00:00
static int on_fed_cnode ( hcl_t * hcl , hcl_cnode_t * obj )
{
/* the default handler for a cnode composed via feeding - just compile the object node. */
return hcl_compile ( hcl , obj , 0 ) ;
}
/* ---------------------------------------------------------------------- */
2022-05-25 14:23:43 +00:00
static void init_feed ( hcl_t * hcl )
{
2022-07-26 15:06:53 +00:00
HCL_MEMSET ( & hcl - > c - > feed , 0 , HCL_SIZEOF ( hcl - > c - > feed ) ) ;
2022-07-22 08:02:14 +00:00
hcl - > c - > feed . lx . state = HCL_FLX_START ;
2022-05-25 14:23:43 +00:00
hcl - > c - > feed . lx . loc . line = 1 ;
hcl - > c - > feed . lx . loc . colm = 1 ;
hcl - > c - > feed . lx . loc . file = HCL_NULL ;
2022-07-29 11:29:47 +00:00
hcl - > c - > feed . on_cnode = on_fed_cnode ;
2022-05-25 14:23:43 +00:00
}
2022-07-26 15:06:53 +00:00
/* ------------------------------------------------------------------------ */
2022-07-28 14:07:18 +00:00
static int feed_begin_include ( hcl_t * hcl )
2022-07-26 15:06:53 +00:00
{
2023-05-19 03:55:08 +00:00
hcl_iosrarg_t * arg ;
2022-07-28 14:07:18 +00:00
const hcl_ooch_t * io_name ;
2022-07-26 15:06:53 +00:00
2023-05-19 03:55:08 +00:00
io_name = add_sr_name ( hcl , TOKEN_NAME ( hcl ) ) ;
2022-07-28 14:07:18 +00:00
if ( HCL_UNLIKELY ( ! io_name ) ) return - 1 ;
2022-07-26 15:06:53 +00:00
2023-05-19 03:55:08 +00:00
arg = ( hcl_iosrarg_t * ) hcl_callocmem ( hcl , HCL_SIZEOF ( * arg ) ) ;
2022-07-28 14:07:18 +00:00
if ( HCL_UNLIKELY ( ! arg ) ) goto oops ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
arg - > name = io_name ;
arg - > line = 1 ;
arg - > colm = 1 ;
/*arg->nl = '\0';*/
arg - > includer = hcl - > c - > curinp ;
if ( hcl - > c - > reader ( hcl , HCL_IO_OPEN , arg ) < = - 1 )
2022-07-26 15:06:53 +00:00
{
2023-05-18 01:24:01 +00:00
const hcl_ooch_t * org_errmsg = hcl_backuperrmsg ( hcl ) ;
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_INCLUDE , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , " unable to feed-include %js - %js " , io_name , org_errmsg ) ;
2022-07-28 14:07:18 +00:00
goto oops ;
}
2022-07-26 15:06:53 +00:00
2023-05-19 03:55:08 +00:00
if ( arg - > includer = = & hcl - > c - > srarg ) /* top-level include */
2022-08-02 13:41:13 +00:00
{
2023-05-19 03:55:08 +00:00
/* TODO: remove hcl_readbasesrchar() and clean up this part.
* hcl_readbasesrchar ( ) , if called in the middle of feeds ,
* updates hcl - > c - > srarg ' s line and colm . so use a separate
2022-08-02 13:41:13 +00:00
* field to store the current feed location for now */
hcl - > c - > feed . lx . _oloc = hcl - > c - > feed . lx . loc ;
}
else
{
arg - > includer - > name = hcl - > c - > feed . lx . loc . file ;
arg - > includer - > line = hcl - > c - > feed . lx . loc . line ;
arg - > includer - > colm = hcl - > c - > feed . lx . loc . colm ;
}
hcl - > c - > feed . lx . loc . file = arg - > name ;
hcl - > c - > feed . lx . loc . line = arg - > line ;
hcl - > c - > feed . lx . loc . colm = arg - > colm ;
2022-07-28 14:07:18 +00:00
/* switch to the includee's stream */
hcl - > c - > curinp = arg ;
/* hcl->c->depth.incl++; */
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
return 0 ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
oops :
if ( arg ) hcl_freemem ( hcl , arg ) ;
return - 1 ;
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
static int feed_end_include ( hcl_t * hcl )
{
int x ;
2023-05-19 03:55:08 +00:00
hcl_iosrarg_t * cur ;
2022-07-26 15:06:53 +00:00
2023-05-19 03:55:08 +00:00
if ( hcl - > c - > curinp = = & hcl - > c - > srarg ) return 0 ; /* no include */
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
/* if it is an included file, close it and
* retry to read a character from an outer file */
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
x = hcl - > c - > reader ( hcl , HCL_IO_CLOSE , hcl - > c - > curinp ) ;
2022-07-26 15:06:53 +00:00
2023-05-18 01:24:01 +00:00
/* if closing has failed, still destroy the sio structure
* first as normal and return the failure below . this way ,
2022-07-29 11:29:47 +00:00
* the caller doesn ' t call HCL_IO_CLOSE on hcl - > c - > curinp again . */
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
cur = hcl - > c - > curinp ;
hcl - > c - > curinp = hcl - > c - > curinp - > includer ;
2022-07-26 15:06:53 +00:00
2023-05-19 03:55:08 +00:00
if ( hcl - > c - > curinp = = & hcl - > c - > srarg )
2022-08-02 13:41:13 +00:00
{
hcl - > c - > feed . lx . loc = hcl - > c - > feed . lx . _oloc ;
}
else
{
hcl - > c - > feed . lx . loc . file = hcl - > c - > curinp - > name ;
hcl - > c - > feed . lx . loc . line = hcl - > c - > curinp - > line ;
hcl - > c - > feed . lx . loc . colm = hcl - > c - > curinp - > colm ;
}
2022-07-28 14:07:18 +00:00
HCL_ASSERT ( hcl , cur - > name ! = HCL_NULL ) ;
hcl_freemem ( hcl , cur ) ;
/* hcl->parse.depth.incl--; */
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
if ( x ! = 0 )
{
/* the failure mentioned above is returned here */
return - 1 ;
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
hcl - > c - > lxc = hcl - > c - > curinp - > lxc ;
return 1 ; /* ended the included file successfully */
}
2022-07-26 15:06:53 +00:00
2023-05-19 03:55:08 +00:00
static void feed_clean_up_reader_stack ( hcl_t * hcl )
{
/* clean up the reader stack for a list */
while ( hcl - > c - > r . st )
{
hcl_rstl_t * rstl ;
rstl = hcl - > c - > r . st ;
hcl - > c - > r . st = rstl - > prev ;
if ( rstl - > head ) hcl_freecnode ( hcl , rstl - > head ) ;
hcl_freemem ( hcl , rstl ) ;
}
}
2022-07-28 14:07:18 +00:00
static int feed_process_token ( hcl_t * hcl )
{
hcl_frd_t * frd = & hcl - > c - > feed . rd ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
/* this function composes an s-expression non-recursively
* by manipulating its own stack . */
2022-07-26 15:06:53 +00:00
2022-07-30 03:02:57 +00:00
/*hcl_logbfmt (hcl, HCL_LOG_STDERR, "TOKEN => [%.*js] type=%d LOC=%d.%d\n", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl), TOKEN_TYPE(hcl), TOKEN_LOC(hcl)->line, TOKEN_LOC(hcl)->colm);*/
2022-07-28 14:07:18 +00:00
if ( frd - > expect_include_file )
{
2022-07-30 03:02:57 +00:00
/* the #include directive is an exception to the general expression rule.
* use this exceptional code block to divert the major token processing */
2022-07-28 14:07:18 +00:00
if ( TOKEN_TYPE ( hcl ) ! = HCL_IOTOK_STRLIT )
{
hcl_setsynerr ( hcl , HCL_SYNERR_STRING , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
frd - > expect_include_file = 0 ;
2022-07-28 14:20:28 +00:00
/* indicate that the file inclusion should be performed soon.
2023-05-18 01:24:01 +00:00
* don ' t perform actual inclusion here so that the return value of
2022-07-28 14:20:28 +00:00
* feed_char ( ) advances the input pointers properly . */
2023-05-18 01:24:01 +00:00
frd - > do_include_file = 1 ;
2022-07-28 14:20:28 +00:00
2022-07-28 14:07:18 +00:00
goto ok ;
}
2022-07-30 03:02:57 +00:00
if ( frd - > expect_vlist_item & & TOKEN_TYPE ( hcl ) ! = HCL_IOTOK_IDENT & & TOKEN_TYPE ( hcl ) ! = HCL_IOTOK_VBAR )
{
/* vlist also has special requirement that it can only contain variable names. */
hcl_setsynerr ( hcl , HCL_SYNERR_VARNAME , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
}
2022-07-28 14:07:18 +00:00
switch ( TOKEN_TYPE ( hcl ) )
{
default :
hcl_setsynerr ( hcl , HCL_SYNERR_ILTOK , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
case HCL_IOTOK_EOF :
hcl_setsynerr ( hcl , HCL_SYNERR_EOF , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
case HCL_IOTOK_INCLUDE :
/* TODO: should i limit where #include can be specified?
* disallow it inside a list literal or an array literal ? */
frd - > expect_include_file = 1 ;
goto ok ;
2022-07-30 03:02:57 +00:00
case HCL_IOTOK_VBAR :
if ( frd - > expect_vlist_item )
{
/* closer */
int oldflagv ;
frd - > expect_vlist_item = 0 ;
frd - > obj = leave_list ( hcl , & frd - > flagv , & oldflagv ) ;
frd - > level - - ;
break ;
}
else
{
/* opener */
2023-05-18 01:24:01 +00:00
2022-07-30 03:02:57 +00:00
/* the vlist is different from other lists in that
* it uses the same opener and the closer
* it allows only variable names .
* it prohibits nesting of other lists
*/
2022-07-30 15:21:44 +00:00
if ( hcl - > c - > r . st & & ( hcl - > c - > r . st - > flagv & DATA_LIST ) )
2022-07-30 03:02:57 +00:00
{
2022-07-30 15:21:44 +00:00
/* if the outer list is a data list */
2022-07-30 03:02:57 +00:00
hcl_setsynerr ( hcl , HCL_SYNERR_VBARBANNED , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
}
/* neither a data list nor an executable list. handle this specially using
* a dedicated frd - > expect_vlist_item variable */
2023-05-18 01:24:01 +00:00
frd - > flagv = 0 ;
2022-07-30 03:02:57 +00:00
LIST_FLAG_SET_CONCODE ( frd - > flagv , HCL_CONCODE_VLIST ) ;
frd - > expect_vlist_item = 1 ;
goto start_list ;
}
case HCL_IOTOK_LBRACK : /* [ */
frd - > flagv = DATA_LIST ;
2022-07-28 14:07:18 +00:00
LIST_FLAG_SET_CONCODE ( frd - > flagv , HCL_CONCODE_ARRAY ) ;
goto start_list ;
case HCL_IOTOK_BAPAREN : /* #[ */
2022-07-30 03:02:57 +00:00
frd - > flagv = DATA_LIST ;
2022-07-28 14:07:18 +00:00
LIST_FLAG_SET_CONCODE ( frd - > flagv , HCL_CONCODE_BYTEARRAY ) ;
goto start_list ;
2023-10-23 09:49:15 +00:00
case HCL_IOTOK_DLPAREN : /* #{ */
2022-07-30 03:02:57 +00:00
frd - > flagv = DATA_LIST ;
2022-07-28 14:07:18 +00:00
LIST_FLAG_SET_CONCODE ( frd - > flagv , HCL_CONCODE_DIC ) ;
goto start_list ;
case HCL_IOTOK_QLPAREN : /* #( */
2022-07-30 03:02:57 +00:00
frd - > flagv = DATA_LIST ;
2022-07-28 14:07:18 +00:00
LIST_FLAG_SET_CONCODE ( frd - > flagv , HCL_CONCODE_QLIST ) ;
goto start_list ;
case HCL_IOTOK_LPARCOLON : /* (: */
frd - > flagv = 0 ;
LIST_FLAG_SET_CONCODE ( frd - > flagv , HCL_CONCODE_MLIST ) ;
goto start_list ;
2023-05-18 01:24:01 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_LPAREN : /* ( */
frd - > flagv = 0 ;
LIST_FLAG_SET_CONCODE ( frd - > flagv , HCL_CONCODE_XLIST ) ;
start_list :
if ( frd - > level > = HCL_TYPE_MAX ( int ) )
2022-07-26 15:06:53 +00:00
{
2022-07-30 03:02:57 +00:00
/* the nesting level has become too deep */
2022-07-28 14:07:18 +00:00
hcl_setsynerr ( hcl , HCL_SYNERR_NESTING , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
/* push some data to simulate recursion into
* a list literal or an array literal */
if ( enter_list ( hcl , TOKEN_LOC ( hcl ) , frd - > flagv ) < = - 1 ) goto oops ;
frd - > level + + ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
/* read the next token */
goto ok ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_DOT :
if ( frd - > level < = 0 | | ! can_dot_list ( hcl ) )
{
/* cannot have a period:
* 1. at the top frd - > level - not inside ( )
* 2. at the beginning of a list
* 3. inside an array , byte - array , dictionary , xlist */
hcl_setsynerr ( hcl , HCL_SYNERR_DOTBANNED , TOKEN_LOC ( hcl ) , HCL_NULL ) ;
goto oops ;
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
goto ok ;
case HCL_IOTOK_COLON :
if ( frd - > level < = 0 | | ! can_colon_list ( hcl ) )
{
hcl_setsynerr ( hcl , HCL_SYNERR_COLONBANNED , TOKEN_LOC ( hcl ) , HCL_NULL ) ;
goto oops ;
}
goto ok ;
case HCL_IOTOK_COMMA :
if ( frd - > level < = 0 | | ! can_comma_list ( hcl ) )
{
hcl_setsynerr ( hcl , HCL_SYNERR_COMMABANNED , TOKEN_LOC ( hcl ) , HCL_NULL ) ;
goto oops ;
}
goto ok ;
case HCL_IOTOK_RPAREN : /* xlist (), qlist #() */
case HCL_IOTOK_RBRACK : /* bytearray #[], array[] */
2023-10-23 09:49:15 +00:00
case HCL_IOTOK_RBRACE : /* dictionary #{} */
2022-07-28 14:07:18 +00:00
{
static struct
{
int closer ;
hcl_synerrnum_t synerr ;
} req [ ] =
{
{ HCL_IOTOK_RPAREN , HCL_SYNERR_RPAREN } , /* XLIST ( ) */
{ HCL_IOTOK_RPAREN , HCL_SYNERR_RPAREN } , /* MLIST (: ) */
{ HCL_IOTOK_RBRACK , HCL_SYNERR_RBRACK } , /* ARRAY [ ] */
{ HCL_IOTOK_RBRACK , HCL_SYNERR_RBRACK } , /* BYTEARRAY #[ ] */
2023-10-23 09:49:15 +00:00
{ HCL_IOTOK_RBRACE , HCL_SYNERR_RBRACE } , /* DIC #{ } */
2022-07-28 14:07:18 +00:00
{ HCL_IOTOK_RPAREN , HCL_SYNERR_RPAREN } /* QLIST #( ) */
} ;
int oldflagv ;
int concode ;
if ( frd - > level < = 0 )
{
hcl_setsynerr ( hcl , HCL_SYNERR_UNBALPBB , TOKEN_LOC ( hcl ) , HCL_NULL ) ;
goto oops ;
}
concode = LIST_FLAG_GET_CONCODE ( frd - > flagv ) ;
if ( req [ concode ] . closer ! = TOKEN_TYPE ( hcl ) )
{
hcl_setsynerr ( hcl , req [ concode ] . synerr , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
}
2022-07-26 15:06:53 +00:00
#if 0
2022-07-28 14:07:18 +00:00
if ( ( flagv & QUOTED ) | | frd - > level < = 0 )
{
/* the right parenthesis can never appear while
* ' quoted ' is true . ' quoted ' is set to false when
* entering a normal list . ' quoted ' is set to true
* when entering a quoted list . a quoted list does
* not have an explicit right parenthesis .
* so the right parenthesis can only pair up with
* the left parenthesis for the normal list .
*
* For example , ' ( 1 2 3 ' ) 5 6 )
*
* this condition is triggerred when the first ) is
* met after the second quote .
*
* also it is illegal to have the right parenthesis
* with no opening ( left ) parenthesis , which is
* indicated by frd - > level < = 0.
*/
hcl_setsynerr ( hcl , HCL_SYNERR_LPAREN , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
}
2022-07-26 15:06:53 +00:00
# endif
2022-07-28 14:07:18 +00:00
frd - > obj = leave_list ( hcl , & frd - > flagv , & oldflagv ) ;
frd - > level - - ;
break ;
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_NIL :
frd - > obj = hcl_makecnodenil ( hcl , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
break ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_TRUE :
frd - > obj = hcl_makecnodetrue ( hcl , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
break ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_FALSE :
frd - > obj = hcl_makecnodefalse ( hcl , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
break ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_SELF :
frd - > obj = hcl_makecnodeself ( hcl , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
break ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_SUPER :
frd - > obj = hcl_makecnodesuper ( hcl , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
break ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_ELLIPSIS :
frd - > obj = hcl_makecnodeellipsis ( hcl , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
break ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_TRPCOLONS :
frd - > obj = hcl_makecnodetrpcolons ( hcl , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
break ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_DCSTAR :
frd - > obj = hcl_makecnodedcstar ( hcl , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
break ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_SMPTRLIT :
{
hcl_oow_t i ;
hcl_oow_t v = 0 ;
HCL_ASSERT ( hcl , TOKEN_NAME_LEN ( hcl ) > = 3 ) ;
for ( i = 2 ; i < TOKEN_NAME_LEN ( hcl ) ; i + + )
2022-07-26 15:06:53 +00:00
{
2022-07-28 14:07:18 +00:00
HCL_ASSERT ( hcl , is_xdigitchar ( TOKEN_NAME_CHAR ( hcl , i ) ) ) ;
v = v * 16 + CHAR_TO_NUM ( TOKEN_NAME_CHAR ( hcl , i ) , 16 ) ;
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
if ( ! HCL_IN_SMPTR_RANGE ( v ) )
{
hcl_setsynerr ( hcl , HCL_SYNERR_SMPTRLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
frd - > obj = hcl_makecnodesmptrlit ( hcl , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , v ) ;
break ;
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_ERRLIT :
{
hcl_oow_t i ;
hcl_ooi_t v = 0 ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
HCL_ASSERT ( hcl , TOKEN_NAME_LEN ( hcl ) > = 3 ) ;
for ( i = 2 ; i < TOKEN_NAME_LEN ( hcl ) ; i + + )
2022-07-26 15:06:53 +00:00
{
2022-07-28 14:07:18 +00:00
HCL_ASSERT ( hcl , is_digitchar ( TOKEN_NAME_CHAR ( hcl , i ) ) ) ;
v = v * 10 + CHAR_TO_NUM ( TOKEN_NAME_CHAR ( hcl , i ) , 10 ) ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
if ( v > HCL_ERROR_MAX )
2022-07-26 15:06:53 +00:00
{
2022-07-28 14:07:18 +00:00
hcl_setsynerr ( hcl , HCL_SYNERR_ERRLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
goto oops ;
2022-07-26 15:06:53 +00:00
}
}
2022-07-28 14:07:18 +00:00
frd - > obj = hcl_makecnodeerrlit ( hcl , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , v ) ;
break ;
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_CHARLIT :
frd - > obj = hcl_makecnodecharlit ( hcl , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , TOKEN_NAME_CHAR ( hcl , 0 ) ) ;
break ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_NUMLIT :
frd - > obj = hcl_makecnodenumlit ( hcl , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
break ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_RADNUMLIT :
frd - > obj = hcl_makecnoderadnumlit ( hcl , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
break ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_FPDECLIT :
frd - > obj = hcl_makecnodefpdeclit ( hcl , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
break ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
/*
case HCL_IOTOK_REAL :
frd - > obj = hcl_makerealnum ( hcl , HCL_IOTOK_RVAL ( hcl ) ) ;
break ;
*/
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_STRLIT :
frd - > obj = hcl_makecnodestrlit ( hcl , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
break ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_IDENT :
frd - > obj = hcl_makecnodesymbol ( hcl , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
break ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
case HCL_IOTOK_IDENT_DOTTED :
frd - > obj = hcl_makecnodedsymbol ( hcl , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
break ;
}
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
if ( ! frd - > obj ) goto oops ;
2022-07-26 15:06:53 +00:00
#if 0
2022-07-28 14:07:18 +00:00
/* check if the element is read for a quoted list */
while ( flagv & QUOTED )
{
int oldflagv ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
HCL_ASSERT ( hcl , frd - > level > 0 ) ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
/* if so, append the element read into the quote list */
if ( chain_to_list ( hcl , obj ) < = - 1 ) goto oops ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
/* exit out of the quoted list. the quoted list can have one element only. */
obj = leave_list ( hcl , & flagv , & oldflagv ) ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
/* one frd->level up toward the top */
frd - > level - - ;
}
2022-07-26 15:06:53 +00:00
# endif
2022-07-28 14:07:18 +00:00
/* check if we are at the top frd->level */
2023-05-18 01:24:01 +00:00
if ( frd - > level < = 0 )
2022-07-28 14:07:18 +00:00
{
2022-07-29 11:29:47 +00:00
int n ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
/* upon exit, we must be at the top level */
HCL_ASSERT ( hcl , frd - > level = = 0 ) ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
HCL_ASSERT ( hcl , hcl - > c - > r . st = = HCL_NULL ) ;
HCL_ASSERT ( hcl , frd - > obj ! = HCL_NULL ) ;
2022-07-26 15:06:53 +00:00
2022-07-29 11:29:47 +00:00
n = hcl - > c - > feed . on_cnode ( hcl , frd - > obj ) ;
hcl_freecnode ( hcl , frd - > obj ) ; /* not needed any more */
frd - > obj = HCL_NULL ;
if ( n < = - 1 ) goto oops ;
2022-07-28 14:07:18 +00:00
}
else
{
/* if not, append the element read into the current list.
* if we are not at the top frd - > level , we must be in a list */
if ( chain_to_list ( hcl , frd - > obj ) < = - 1 ) goto oops ;
2022-07-26 15:06:53 +00:00
2023-05-18 01:24:01 +00:00
/* because it has been chained to the list, it belongs to the current stack top.
2022-07-28 14:07:18 +00:00
* mark that obj is not stand - alone by nullifying it . without this , if a jump
* is made to oops , the momory block pointed to by obj may get freed twice . */
2023-05-18 01:24:01 +00:00
frd - > obj = HCL_NULL ;
2022-07-26 15:06:53 +00:00
2022-07-28 14:07:18 +00:00
clear_comma_colon_flag ( hcl ) ;
2022-07-26 15:06:53 +00:00
}
2022-07-28 14:07:18 +00:00
ok :
2022-07-26 15:06:53 +00:00
return 0 ;
oops :
2023-05-18 01:24:01 +00:00
if ( frd - > obj )
2022-07-26 15:06:53 +00:00
{
hcl_freecnode ( hcl , frd - > obj ) ;
frd - > obj = HCL_NULL ;
}
/* clean up the reader stack for a list */
2023-05-19 03:55:08 +00:00
feed_clean_up_reader_stack ( hcl ) ;
2022-07-26 15:06:53 +00:00
return - 1 ;
}
/* ------------------------------------------------------------------------ */
2022-05-25 14:23:43 +00:00
struct delim_token_t
{
const char * t_value ;
hcl_oow_t t_len ;
hcl_iotok_type_t t_type ;
} ;
typedef struct delim_token_t delim_token_t ;
static delim_token_t delim_token_tab [ ] =
{
2023-05-18 01:24:01 +00:00
/* [NOTE 1]
* if you add a new token , ensure the first character is listed in is_delimchar ( )
*
2022-05-25 14:23:43 +00:00
* [ NOTE 2 ]
* for the implementation limitation in find_delim_token_char ( ) ,
* the entries in this table must be laid out in a certain way .
2023-05-18 01:24:01 +00:00
*
2022-05-25 14:23:43 +00:00
* Group the items with the same prefix together .
* List the shorter before the longer items in the same group .
* The length must not differ by greater than 1 between 2 items in the same group .
2023-05-18 01:24:01 +00:00
*
2022-07-26 00:06:29 +00:00
* [ NOTE 3 ]
* don ' t list # ( and # [ here because of overlapping use of # for various purposes .
* however , # is included in is_delimchar ( ) .
2022-05-25 14:23:43 +00:00
*/
{ " ( " , 1 , HCL_IOTOK_LPAREN } ,
{ " (: " , 2 , HCL_IOTOK_LPARCOLON } ,
{ " ) " , 1 , HCL_IOTOK_RPAREN } ,
{ " [ " , 1 , HCL_IOTOK_LBRACK } ,
{ " ] " , 1 , HCL_IOTOK_RBRACK } ,
2023-05-18 01:24:01 +00:00
2022-05-25 14:23:43 +00:00
{ " { " , 1 , HCL_IOTOK_LBRACE } ,
{ " } " , 1 , HCL_IOTOK_RBRACE } ,
2022-07-26 00:06:29 +00:00
{ " | " , 1 , HCL_IOTOK_VBAR } ,
2022-05-25 14:23:43 +00:00
{ " , " , 1 , HCL_IOTOK_COMMA } ,
{ " . " , 1 , HCL_IOTOK_DOT } ,
{ " .. " , 2 , HCL_IOTOK_DBLDOTS } ,
{ " ... " , 3 , HCL_IOTOK_ELLIPSIS } ,
{ " : " , 1 , HCL_IOTOK_COLON } ,
{ " :: " , 2 , HCL_IOTOK_DBLCOLONS } ,
{ " ::* " , 3 , HCL_IOTOK_DCSTAR } ,
{ " ::: " , 3 , HCL_IOTOK_TRPCOLONS }
} ;
2022-07-22 08:02:14 +00:00
static int find_delim_token_char ( hcl_t * hcl , const hcl_ooci_t c , int row_start , int row_end , int col , hcl_flx_dt_t * dt )
2022-05-25 14:23:43 +00:00
{
int found = 0 , i ;
for ( i = row_start ; i < = row_end ; i + + )
{
2023-05-18 01:24:01 +00:00
if ( col < delim_token_tab [ i ] . t_len & & c = = delim_token_tab [ i ] . t_value [ col ] )
2022-05-25 14:23:43 +00:00
{
if ( ! found ) dt - > row_start = i ;
dt - > row_end = i ;
found = 1 ;
}
else if ( found ) break ;
}
if ( found ) dt - > col_next = col + 1 ;
return found ;
}
static HCL_INLINE int feed_wrap_up ( hcl_t * hcl , hcl_iotok_type_t type )
{
2022-07-26 15:06:53 +00:00
int n ;
2022-05-25 14:23:43 +00:00
SET_TOKEN_TYPE ( hcl , type ) ;
2022-07-26 15:06:53 +00:00
n = feed_process_token ( hcl ) ;
2022-05-25 14:23:43 +00:00
2022-07-22 08:02:14 +00:00
hcl - > c - > feed . lx . state = HCL_FLX_START ;
2022-07-26 15:06:53 +00:00
return n ;
2022-05-25 14:23:43 +00:00
}
static int feed_wrap_up_with_char ( hcl_t * hcl , hcl_ooci_t c , hcl_iotok_type_t type )
{
ADD_TOKEN_CHAR ( hcl , c ) ;
return feed_wrap_up ( hcl , type ) ;
}
static int feed_wrap_up_with_str ( hcl_t * hcl , const hcl_ooch_t * str , hcl_oow_t len , hcl_iotok_type_t type )
{
ADD_TOKEN_STR ( hcl , str , len ) ;
return feed_wrap_up ( hcl , type ) ;
}
2022-07-22 08:02:14 +00:00
static int feed_continue ( hcl_t * hcl , hcl_flx_state_t state )
2022-05-25 14:23:43 +00:00
{
hcl - > c - > feed . lx . state = state ;
return 0 ;
}
2022-07-22 08:02:14 +00:00
static int feed_continue_with_char ( hcl_t * hcl , hcl_ooci_t c , hcl_flx_state_t state )
2022-05-25 14:23:43 +00:00
{
ADD_TOKEN_CHAR ( hcl , c ) ;
hcl - > c - > feed . lx . state = state ;
return 0 ;
}
# define FEED_WRAP_UP(hcl, type) do { if (feed_wrap_up(hcl, type) <= -1) return -1; } while(0)
# define FEED_WRAP_UP_WITH_CHAR(hcl, c, type) do { if (feed_wrap_up_with_char(hcl, c, type) <= -1) return -1; } while(0)
# define FEED_WRAP_UP_WITH_CHARS(hcl, str, len, type) do { if (feed_wrap_up_with_str(hcl, str, len, type) <= -1) return -1; } while(0)
# define FEED_CONTINUE(hcl, state) do { if (feed_continue(hcl, state) <= -1) return -1; } while(0)
# define FEED_CONTINUE_WITH_CHAR(hcl, c, state) do { if (feed_continue_with_char(hcl, c, state) <= -1) return -1; } while(0)
2022-07-26 15:06:53 +00:00
/* ------------------------------------------------------------------------ */
/* short-cuts to basic lexer data */
2022-07-22 08:02:14 +00:00
# define FLX_STATE(hcl) ((hcl)->c->feed.lx.state)
# define FLX_LOC(hcl) (&((hcl)->c->feed.lx.loc))
/* short-cuts to lexer state data */
# define FLX_DT(hcl) (&((hcl)->c->feed.lx.u.dt))
2022-07-23 06:57:01 +00:00
# define FLX_HC(hcl) (&((hcl)->c->feed.lx.u.hc))
2022-07-23 10:09:36 +00:00
# define FLX_HI(hcl) (&((hcl)->c->feed.lx.u.hi))
# define FLX_HN(hcl) (&((hcl)->c->feed.lx.u.hn))
2022-07-26 00:06:29 +00:00
# define FLX_PI(hcl) (&((hcl)->c->feed.lx.u.pi))
# define FLX_PN(hcl) (&((hcl)->c->feed.lx.u.pn))
2022-07-22 08:02:14 +00:00
# define FLX_QT(hcl) (&((hcl)->c->feed.lx.u.qt))
2022-07-26 00:06:29 +00:00
# define FLX_ST(hcl) (&((hcl)->c->feed.lx.u.st))
2022-05-25 14:23:43 +00:00
2022-07-23 06:57:01 +00:00
static HCL_INLINE void init_flx_hc ( hcl_flx_hc_t * hc )
{
HCL_MEMSET ( hc , 0 , HCL_SIZEOF ( * hc ) ) ;
}
2022-07-23 10:09:36 +00:00
static HCL_INLINE void init_flx_hi ( hcl_flx_hi_t * hi )
{
HCL_MEMSET ( hi , 0 , HCL_SIZEOF ( * hi ) ) ;
}
2022-07-23 06:57:01 +00:00
2022-07-26 00:06:29 +00:00
static HCL_INLINE void init_flx_hn ( hcl_flx_hn_t * hn , hcl_iotok_type_t tok_type , hcl_synerrnum_t synerr_code , int radix )
2022-07-23 10:09:36 +00:00
{
2022-07-26 00:06:29 +00:00
HCL_MEMSET ( hn , 0 , HCL_SIZEOF ( * hn ) ) ;
hn - > tok_type = tok_type ;
hn - > synerr_code = synerr_code ;
hn - > radix = radix ;
2022-07-23 10:09:36 +00:00
}
2022-07-23 06:57:01 +00:00
static HCL_INLINE void init_flx_qt ( hcl_flx_qt_t * qt , hcl_iotok_type_t tok_type , hcl_synerrnum_t synerr_code , hcl_ooch_t end_char , hcl_ooch_t esc_char , hcl_oow_t min_len , hcl_oow_t max_len )
{
HCL_MEMSET ( qt , 0 , HCL_SIZEOF ( * qt ) ) ;
qt - > tok_type = tok_type ;
qt - > synerr_code = synerr_code ;
qt - > end_char = end_char ;
qt - > esc_char = esc_char ;
qt - > min_len = min_len ;
qt - > max_len = max_len ;
}
2022-07-26 00:06:29 +00:00
static HCL_INLINE void init_flx_pi ( hcl_flx_pi_t * pi )
2022-05-25 14:23:43 +00:00
{
2022-07-26 00:06:29 +00:00
HCL_MEMSET ( pi , 0 , HCL_SIZEOF ( * pi ) ) ;
}
2022-07-22 08:02:14 +00:00
2022-07-26 00:06:29 +00:00
static HCL_INLINE void init_flx_pn ( hcl_flx_pn_t * pn )
{
HCL_MEMSET ( pn , 0 , HCL_SIZEOF ( * pn ) ) ;
}
static HCL_INLINE void init_flx_st ( hcl_flx_st_t * st , hcl_ooch_t sign_c )
{
HCL_MEMSET ( st , 0 , HCL_SIZEOF ( * st ) ) ;
st - > sign_c = sign_c ;
}
2022-05-25 14:23:43 +00:00
2022-07-26 00:06:29 +00:00
static void reset_flx_token ( hcl_t * hcl )
{
2022-05-25 14:23:43 +00:00
/* clear the token name, reset its location */
SET_TOKEN_TYPE ( hcl , HCL_IOTOK_EOF ) ; /* is it correct? */
CLEAR_TOKEN_NAME ( hcl ) ;
2022-07-22 08:02:14 +00:00
SET_TOKEN_LOC ( hcl , & hcl - > c - > feed . lx . loc ) ;
2022-07-26 00:06:29 +00:00
}
static int flx_start ( hcl_t * hcl , hcl_ooci_t c )
{
HCL_ASSERT ( hcl , FLX_STATE ( hcl ) = = HCL_FLX_START ) ;
if ( is_spacechar ( c ) ) goto consumed ; /* skip spaces */
reset_flx_token ( hcl ) ;
2022-05-25 14:23:43 +00:00
2023-05-18 01:24:01 +00:00
if ( find_delim_token_char ( hcl , c , 0 , HCL_COUNTOF ( delim_token_tab ) - 1 , 0 , FLX_DT ( hcl ) ) )
2022-05-25 14:23:43 +00:00
{
2022-07-22 08:02:14 +00:00
/* the character is one of the first character of a delimiter token such as (, [, :, etc */
2023-05-18 01:24:01 +00:00
if ( FLX_DT ( hcl ) - > row_start = = FLX_DT ( hcl ) - > row_end & &
2022-07-22 08:02:14 +00:00
FLX_DT ( hcl ) - > col_next = = delim_token_tab [ FLX_DT ( hcl ) - > row_start ] . t_len )
2022-05-25 14:23:43 +00:00
{
2022-07-22 08:02:14 +00:00
/* single character delimiter token */
FEED_WRAP_UP_WITH_CHAR ( hcl , c , delim_token_tab [ FLX_DT ( hcl ) - > row_start ] . t_type ) ;
2022-05-25 14:23:43 +00:00
}
else
{
2022-07-22 08:02:14 +00:00
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_DELIM_TOKEN ) ; /* consume c and move to HCL_FLX_DELIM_TOKEN state */
2022-05-25 14:23:43 +00:00
}
goto consumed ;
}
switch ( c )
{
case HCL_OOCI_EOF :
2022-07-29 11:29:47 +00:00
/* only EOF of the top-level stream is supposed to be fed in.
* the internal logic discard EOFs of included streams */
2022-05-25 14:23:43 +00:00
FEED_WRAP_UP_WITH_CHARS ( hcl , vocas [ VOCA_EOF ] . str , vocas [ VOCA_EOF ] . len , HCL_IOTOK_EOF ) ;
2022-07-26 00:06:29 +00:00
goto consumed ;
2022-05-25 14:23:43 +00:00
case ' ; ' :
2022-07-22 08:02:14 +00:00
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_COMMENT ) ;
2022-07-26 00:06:29 +00:00
goto consumed ;
2022-05-25 14:23:43 +00:00
case ' # ' :
2022-07-26 00:06:29 +00:00
/* no state date to initialize. just change the state */
2022-07-23 14:06:46 +00:00
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_HMARKED_TOKEN ) ;
2022-07-26 00:06:29 +00:00
goto consumed ;
2022-05-25 14:23:43 +00:00
case ' \" ' :
2022-07-23 06:57:01 +00:00
init_flx_qt ( FLX_QT ( hcl ) , HCL_IOTOK_STRLIT , HCL_SYNERR_STRLIT , c , ' \\ ' , 0 , HCL_TYPE_MAX ( hcl_oow_t ) ) ;
2022-07-22 08:02:14 +00:00
FEED_CONTINUE ( hcl , HCL_FLX_QUOTED_TOKEN ) ; /* discard the quote itself. move on the the QUOTED_TOKEN state */
2022-07-26 00:06:29 +00:00
goto consumed ;
2022-05-25 14:23:43 +00:00
case ' \' ' :
2022-07-23 06:57:01 +00:00
init_flx_qt ( FLX_QT ( hcl ) , HCL_IOTOK_CHARLIT , HCL_SYNERR_CHARLIT , c , ' \\ ' , 1 , 1 ) ;
2022-07-22 08:02:14 +00:00
FEED_CONTINUE ( hcl , HCL_FLX_QUOTED_TOKEN ) ; /* discard the quote itself. move on the the QUOTED_TOKEN state */
2022-07-26 00:06:29 +00:00
goto consumed ;
2022-05-25 14:23:43 +00:00
case ' + ' :
case ' - ' :
2022-07-26 00:06:29 +00:00
init_flx_st ( FLX_ST ( hcl ) , c ) ;
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_SIGNED_TOKEN ) ;
goto consumed ;
2022-05-25 14:23:43 +00:00
case ' 0 ' : case ' 1 ' : case ' 2 ' : case ' 3 ' : case ' 4 ' :
case ' 5 ' : case ' 6 ' : case ' 7 ' : case ' 8 ' : case ' 9 ' :
2022-07-26 00:06:29 +00:00
init_flx_pn ( FLX_PN ( hcl ) ) ;
FEED_CONTINUE ( hcl , HCL_FLX_PLAIN_NUMBER ) ;
goto not_consumed ;
2022-05-25 14:23:43 +00:00
default :
2022-07-26 00:06:29 +00:00
init_flx_pi ( FLX_PI ( hcl ) ) ;
FEED_CONTINUE ( hcl , HCL_FLX_PLAIN_IDENT ) ;
goto not_consumed ;
2022-05-25 14:23:43 +00:00
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2022-07-23 06:57:01 +00:00
static int flx_comment ( hcl_t * hcl , hcl_ooci_t c )
{
if ( is_linebreak ( c ) ) FEED_CONTINUE ( hcl , HCL_FLX_START ) ;
return 1 ; /* consumed */
}
2022-07-22 08:02:14 +00:00
static int flx_delim_token ( hcl_t * hcl , hcl_ooci_t c )
2022-05-25 14:23:43 +00:00
{
2023-05-18 01:24:01 +00:00
if ( find_delim_token_char ( hcl , c , FLX_DT ( hcl ) - > row_start , FLX_DT ( hcl ) - > row_end , FLX_DT ( hcl ) - > col_next , FLX_DT ( hcl ) ) )
2022-05-25 14:23:43 +00:00
{
2023-05-18 01:24:01 +00:00
if ( FLX_DT ( hcl ) - > row_start = = FLX_DT ( hcl ) - > row_end & &
2022-07-22 08:02:14 +00:00
FLX_DT ( hcl ) - > col_next = = delim_token_tab [ FLX_DT ( hcl ) - > row_start ] . t_len )
2022-05-25 14:23:43 +00:00
{
2022-07-22 08:02:14 +00:00
/* complete token and switch to the HCL_FLX_START state */
2023-05-18 01:24:01 +00:00
FEED_WRAP_UP_WITH_CHAR ( hcl , c , delim_token_tab [ FLX_DT ( hcl ) - > row_start ] . t_type ) ;
2022-05-25 14:23:43 +00:00
}
else
{
ADD_TOKEN_CHAR ( hcl , c ) ;
}
goto consumed ;
}
else
{
/* the longest match so far */
2023-05-18 01:24:01 +00:00
FEED_WRAP_UP ( hcl , delim_token_tab [ FLX_DT ( hcl ) - > row_start ] . t_type ) ;
2022-05-25 14:23:43 +00:00
goto not_consumed ;
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2022-07-23 14:06:46 +00:00
static int flx_hmarked_token ( hcl_t * hcl , hcl_ooci_t c )
2022-05-25 14:23:43 +00:00
{
/*
* # xXXXX hexadecimal
* # oOOOO octal
* # bBBBB binary
* # eDDD error
* # pHHH smptr
* # \ C character
* # \ xHHHH unicode character
* # \ UHHHH unicode character
* # \ uHHHH unicode character
* # \ backspace
* # \ linefeed
* # \ newline
* # \ nul
* # \ page
* # \ return
* # \ rubout
* # \ space
* # \ tab
* # \ vtab
* # include
* # [ ] byte array
* # ( ) qlist
2023-10-23 09:49:15 +00:00
* # { } dictionary
2022-05-25 14:23:43 +00:00
*/
switch ( c )
{
case ' # ' :
case ' ! ' :
/* ## comment start
* # ! also comment start .
* ; comment start */
2022-07-22 08:02:14 +00:00
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_COMMENT ) ;
2022-05-25 14:23:43 +00:00
goto consumed ;
2022-07-23 06:57:01 +00:00
/* --------------------------- */
case ' x ' :
2022-07-23 10:09:36 +00:00
init_flx_hn ( FLX_HN ( hcl ) , HCL_IOTOK_RADNUMLIT , HCL_SYNERR_NUMLIT , 16 ) ;
2022-07-23 06:57:01 +00:00
goto radixed_number ;
case ' o ' :
2022-07-23 10:09:36 +00:00
init_flx_hn ( FLX_HN ( hcl ) , HCL_IOTOK_RADNUMLIT , HCL_SYNERR_NUMLIT , 8 ) ;
2022-07-23 06:57:01 +00:00
goto radixed_number ;
case ' b ' :
2022-07-23 10:09:36 +00:00
init_flx_hn ( FLX_HN ( hcl ) , HCL_IOTOK_RADNUMLIT , HCL_SYNERR_NUMLIT , 2 ) ;
2022-07-23 06:57:01 +00:00
goto radixed_number ;
case ' e ' :
2022-07-23 10:09:36 +00:00
init_flx_hn ( FLX_HN ( hcl ) , HCL_IOTOK_ERRLIT , HCL_SYNERR_ERRLIT , 10 ) ;
2022-07-23 06:57:01 +00:00
goto radixed_number ;
case ' p ' :
2022-07-23 10:09:36 +00:00
init_flx_hn ( FLX_HN ( hcl ) , HCL_IOTOK_SMPTRLIT , HCL_SYNERR_SMPTRLIT , 16 ) ;
2022-07-23 06:57:01 +00:00
radixed_number :
2022-07-23 14:06:46 +00:00
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_HMARKED_NUMBER ) ;
2022-07-23 10:09:36 +00:00
goto consumed ;
2022-07-23 06:57:01 +00:00
/* --------------------------- */
case ' \\ ' :
2022-07-23 14:06:46 +00:00
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_HMARKED_CHAR ) ;
2022-07-23 06:57:01 +00:00
goto consumed ;
/* --------------------------- */
2022-05-25 14:23:43 +00:00
case ' [ ' :
FEED_WRAP_UP_WITH_CHAR ( hcl , c , HCL_IOTOK_BAPAREN ) ;
goto consumed ;
case ' ( ' :
FEED_WRAP_UP_WITH_CHAR ( hcl , c , HCL_IOTOK_QLPAREN ) ;
goto consumed ;
2023-10-23 09:49:15 +00:00
case ' { ' :
FEED_WRAP_UP_WITH_CHAR ( hcl , c , HCL_IOTOK_DLPAREN ) ;
goto consumed ;
2022-07-23 06:57:01 +00:00
/* --------------------------- */
2022-05-25 14:23:43 +00:00
default :
2022-07-23 10:09:36 +00:00
/* the character used as case values above can never be the first character of a hash-marked identifier */
init_flx_hi ( FLX_HI ( hcl ) ) ;
2022-07-23 14:06:46 +00:00
FEED_CONTINUE ( hcl , HCL_FLX_HMARKED_IDENT ) ;
2022-07-23 10:09:36 +00:00
goto not_consumed ;
2022-05-25 14:23:43 +00:00
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2022-07-23 14:06:46 +00:00
static int flx_hmarked_char ( hcl_t * hcl , hcl_ooci_t c )
2022-07-23 06:57:01 +00:00
{
hcl_flx_hc_t * hc = FLX_HC ( hcl ) ;
if ( is_delimchar ( c ) )
{
if ( hc - > char_count = = 0 )
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_CHARLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ,
" no valid character in character literal %.*js " , TOKEN_NAME_LEN ( hcl ) , TOKEN_NAME_PTR ( hcl ) ) ;
return - 1 ;
}
if ( TOKEN_NAME_LEN ( hcl ) > = 4 )
{
int max_digit_count = 0 ;
if ( TOKEN_NAME_CHAR ( hcl , 2 ) = = ' x ' )
{
hcl_oow_t i ;
max_digit_count = 2 ;
hexcharlit :
if ( TOKEN_NAME_LEN ( hcl ) - 3 > max_digit_count )
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_CHARLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ,
" invalid hexadecimal character character literal %.*js " , TOKEN_NAME_LEN ( hcl ) , TOKEN_NAME_PTR ( hcl ) ) ;
return - 1 ;
}
c = 0 ;
for ( i = 3 ; i < TOKEN_NAME_LEN ( hcl ) ; i + + )
{
if ( ! is_xdigitchar ( TOKEN_NAME_CHAR ( hcl , i ) ) )
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_CHARLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ,
" invalid hexadecimal character character literal %.*js " , TOKEN_NAME_LEN ( hcl ) , TOKEN_NAME_PTR ( hcl ) ) ;
return - 1 ;
}
c = c * 16 + CHAR_TO_NUM ( TOKEN_NAME_CHAR ( hcl , i ) , 16 ) ; /* don't care if it is for 'p' */
}
}
# if (HCL_SIZEOF_OOCH_T >= 2)
else if ( TOKEN_NAME_CHAR ( hcl , 2 ) = = ' u ' )
{
max_digit_count = 4 ;
goto hexcharlit ;
}
# endif
# if (HCL_SIZEOF_OOCH_T >= 4)
else if ( TOKEN_NAME_CHAR ( hcl , 2 ) = = ' U ' )
{
max_digit_count = 8 ;
goto hexcharlit ;
}
# endif
else if ( does_token_name_match ( hcl , VOCA_BACKSPACE ) ) c = ' \b ' ;
else if ( does_token_name_match ( hcl , VOCA_LINEFEED ) ) c = ' \n ' ;
else if ( does_token_name_match ( hcl , VOCA_NEWLINE ) ) c = ' \n ' ; /* TODO: convert it to host newline convention. how to handle if it's composed of 2 letters like \r\n? */
else if ( does_token_name_match ( hcl , VOCA_NUL ) ) c = ' \0 ' ; /* null character. not the object null */
else if ( does_token_name_match ( hcl , VOCA_PAGE ) ) c = ' \f ' ;
else if ( does_token_name_match ( hcl , VOCA_RETURN ) ) c = ' \r ' ;
else if ( does_token_name_match ( hcl , VOCA_RUBOUT ) ) c = ' \x7F ' ; /* DEL */
else if ( does_token_name_match ( hcl , VOCA_SPACE ) ) c = ' ' ;
else if ( does_token_name_match ( hcl , VOCA_TAB ) ) c = ' \t ' ;
else if ( does_token_name_match ( hcl , VOCA_VTAB ) ) c = ' \v ' ;
else
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_CHARLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ,
" invalid character literal %.*js " , TOKEN_NAME_LEN ( hcl ) , TOKEN_NAME_PTR ( hcl ) ) ;
return - 1 ;
}
}
else
{
HCL_ASSERT ( hcl , TOKEN_NAME_LEN ( hcl ) = = 3 ) ;
c = TOKEN_NAME_CHAR ( hcl , 2 ) ;
}
/* reset the token name to the converted character */
CLEAR_TOKEN_NAME ( hcl ) ;
ADD_TOKEN_CHAR ( hcl , c ) ;
FEED_WRAP_UP ( hcl , HCL_IOTOK_CHARLIT ) ;
goto not_consumed ;
}
else
{
ADD_TOKEN_CHAR ( hcl , c ) ;
hc - > char_count + + ;
goto consumed ;
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2022-07-23 14:06:46 +00:00
static int flx_hmarked_ident ( hcl_t * hcl , hcl_ooci_t c )
2022-07-23 10:09:36 +00:00
{
hcl_flx_hi_t * hi = FLX_HI ( hcl ) ;
if ( is_delimchar ( c ) )
{
2022-07-24 00:49:03 +00:00
hcl_iotok_type_t tok_type ;
2022-07-23 10:09:36 +00:00
if ( hi - > char_count = = 0 )
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_HASHLIT , FLX_LOC ( hcl ) , HCL_NULL ,
2023-10-23 09:49:15 +00:00
" no valid character after hash sign " ) ;
2022-07-23 10:09:36 +00:00
return - 1 ;
}
2022-07-24 00:49:03 +00:00
if ( get_directive_token_type ( hcl , & tok_type ) < = - 1 )
2022-07-23 10:09:36 +00:00
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_HASHLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ,
2022-07-23 14:06:46 +00:00
" invalid hash-marked literal %.*js " , TOKEN_NAME_LEN ( hcl ) , TOKEN_NAME_PTR ( hcl ) ) ;
2022-07-23 10:09:36 +00:00
return - 1 ;
}
2022-07-24 00:49:03 +00:00
else
{
FEED_WRAP_UP ( hcl , tok_type ) ;
goto not_consumed ;
}
2022-07-23 10:09:36 +00:00
}
else
{
ADD_TOKEN_CHAR ( hcl , c ) ;
hi - > char_count + + ;
goto consumed ;
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2022-07-23 14:06:46 +00:00
static int flx_hmarked_number ( hcl_t * hcl , hcl_ooci_t c )
2022-07-23 06:57:01 +00:00
{
2022-07-23 10:09:36 +00:00
hcl_flx_hn_t * rn = FLX_HN ( hcl ) ;
2022-07-23 06:57:01 +00:00
if ( CHAR_TO_NUM ( c , rn - > radix ) > = rn - > radix )
{
2022-07-24 00:49:03 +00:00
if ( is_delimchar ( c ) )
2022-07-23 06:57:01 +00:00
{
2022-07-24 00:49:03 +00:00
if ( rn - > digit_count = = 0 )
2022-07-23 06:57:01 +00:00
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_NUMLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ,
2022-07-24 00:49:03 +00:00
" no valid digit after radix specifier in %.*js " , TOKEN_NAME_LEN ( hcl ) , TOKEN_NAME_PTR ( hcl ) ) ;
2022-07-23 06:57:01 +00:00
return - 1 ;
}
2022-07-24 00:49:03 +00:00
else if ( rn - > invalid_digit_count > 0 )
{
2022-07-26 00:06:29 +00:00
/* invalid as a radixed number, but this could be a hash-marked directive */
2022-07-24 00:49:03 +00:00
hcl_iotok_type_t tok_type ;
if ( get_directive_token_type ( hcl , & tok_type ) < = - 1 )
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_NUMLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ,
" neither valid radixed number nor valid directive %.*js " , TOKEN_NAME_LEN ( hcl ) , TOKEN_NAME_PTR ( hcl ) ) ;
return - 1 ;
}
else
{
FEED_WRAP_UP ( hcl , tok_type ) ;
goto not_consumed ;
}
}
2022-07-23 06:57:01 +00:00
FEED_WRAP_UP ( hcl , rn - > tok_type ) ;
goto not_consumed ;
}
else
{
ADD_TOKEN_CHAR ( hcl , c ) ;
rn - > digit_count + + ;
2022-07-24 00:49:03 +00:00
rn - > invalid_digit_count + + ;
2022-07-23 06:57:01 +00:00
goto consumed ;
}
}
else
{
HCL_ASSERT ( hcl , ! is_delimchar ( c ) ) ;
ADD_TOKEN_CHAR ( hcl , c ) ;
rn - > digit_count + + ;
goto consumed ;
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2022-07-26 00:06:29 +00:00
static int flx_plain_ident ( hcl_t * hcl , hcl_ooci_t c ) /* identifier */
{
hcl_flx_pi_t * pi = FLX_PI ( hcl ) ;
if ( is_delimchar ( c ) ) /* [NOTE] . is one of the delimiter character */
{
hcl_oow_t start ;
hcl_oocs_t seg ;
hcl_iotok_type_t tok_type ;
if ( pi - > seg_len = = 0 )
{
2022-07-28 14:07:18 +00:00
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_MSEGIDENT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , " blank segment " ) ;
2022-07-26 00:06:29 +00:00
return - 1 ;
}
start = TOKEN_NAME_LEN ( hcl ) - pi - > seg_len ;
seg . ptr = & TOKEN_NAME_CHAR ( hcl , start ) ;
seg . len = pi - > seg_len ;
tok_type = classify_ident_token ( hcl , & seg ) ;
2023-05-18 01:24:01 +00:00
if ( tok_type ! = HCL_IOTOK_IDENT )
2022-07-26 00:06:29 +00:00
{
pi - > non_ident_seg_count + + ;
pi - > last_non_ident_type = tok_type ;
}
pi - > seg_len = 0 ; /* the length of the segment to be worked on */
pi - > seg_count + + ; /* total number of segments completed */
if ( c = = ' . ' )
{
/* move on to the next segment */
ADD_TOKEN_CHAR ( hcl , c ) ;
pi - > char_count + + ;
goto consumed ;
}
/* finish */
if ( pi - > non_ident_seg_count > 0 )
{
if ( pi - > seg_count = = 1 )
{
FEED_WRAP_UP ( hcl , pi - > last_non_ident_type ) ;
goto not_consumed ;
}
else
{
hcl_setsynerr ( hcl , HCL_SYNERR_MSEGIDENT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) ) ;
return - 1 ;
}
}
FEED_WRAP_UP ( hcl , ( pi - > seg_count = = 1 ? HCL_IOTOK_IDENT : HCL_IOTOK_IDENT_DOTTED ) ) ;
goto not_consumed ;
}
else
{
ADD_TOKEN_CHAR ( hcl , c ) ;
pi - > char_count + + ;
pi - > seg_len + + ;
goto consumed ;
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
static int flx_plain_number ( hcl_t * hcl , hcl_ooci_t c ) /* number */
{
hcl_flx_pn_t * pn = FLX_PN ( hcl ) ;
if ( is_digitchar ( c ) )
{
ADD_TOKEN_CHAR ( hcl , c ) ;
pn - > digit_count [ pn - > fpdec ] + + ;
goto consumed ;
}
else
{
if ( ! pn - > fpdec & & c = = ' . ' )
{
pn - > fpdec = 1 ;
ADD_TOKEN_CHAR ( hcl , c ) ;
goto consumed ;
}
if ( pn - > digit_count [ 0 ] = = 0 )
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_NUMLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , " invalid numeric literal with no digit before decimal point " ) ;
return - 1 ;
}
else if ( pn - > fpdec & & pn - > digit_count [ 1 ] = = 0 )
{
hcl_setsynerrbfmt ( hcl , HCL_SYNERR_NUMLIT , TOKEN_LOC ( hcl ) , TOKEN_NAME ( hcl ) , " invalid numeric literal with no digit after decimal point " ) ;
return - 1 ;
}
FEED_WRAP_UP ( hcl , ( pn - > fpdec ? HCL_IOTOK_FPDECLIT : HCL_IOTOK_NUMLIT ) ) ;
goto not_consumed ;
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2022-07-23 06:57:01 +00:00
static int flx_quoted_token ( hcl_t * hcl , hcl_ooci_t c ) /* string, character */
2022-05-25 14:23:43 +00:00
{
2022-07-22 08:02:14 +00:00
hcl_flx_qt_t * qt = FLX_QT ( hcl ) ;
if ( c = = HCL_OOCI_EOF )
2022-05-25 14:23:43 +00:00
{
2022-07-23 06:57:01 +00:00
hcl_setsynerr ( hcl , qt - > synerr_code , TOKEN_LOC ( hcl ) /*FLX_LOC(hcl) instead?*/ , HCL_NULL ) ;
2022-07-22 08:02:14 +00:00
return - 1 ;
}
2022-05-25 14:23:43 +00:00
2022-07-22 08:02:14 +00:00
if ( qt - > escaped = = 3 )
{
if ( c > = ' 0 ' & & c < = ' 7 ' )
{
/* more octal digits */
qt - > c_acc = qt - > c_acc * 8 + c - ' 0 ' ;
qt - > digit_count + + ;
if ( qt - > digit_count > = qt - > escaped )
{
/* should i limit the max to 0xFF/0377?
* if ( qt - > c_acc > 0377 ) qt - > c_acc = 0377 ; */
ADD_TOKEN_CHAR ( hcl , qt - > c_acc ) ;
qt - > escaped = 0 ;
}
goto consumed ;
}
else
{
ADD_TOKEN_CHAR ( hcl , qt - > c_acc ) ;
qt - > escaped = 0 ;
}
}
else if ( qt - > escaped = = 2 | | qt - > escaped = = 4 | | qt - > escaped = = 8 )
{
if ( c > = ' 0 ' & & c < = ' 9 ' )
{
qt - > c_acc = qt - > c_acc * 16 + c - ' 0 ' ;
qt - > digit_count + + ;
if ( qt - > digit_count > = qt - > escaped )
{
ADD_TOKEN_CHAR ( hcl , qt - > c_acc ) ;
qt - > escaped = 0 ;
}
goto consumed ;
}
else if ( c > = ' A ' & & c < = ' F ' )
{
qt - > c_acc = qt - > c_acc * 16 + c - ' A ' + 10 ;
qt - > digit_count + + ;
if ( qt - > digit_count > = qt - > escaped )
{
ADD_TOKEN_CHAR ( hcl , qt - > c_acc ) ;
qt - > escaped = 0 ;
}
goto consumed ;
}
else if ( c > = ' a ' & & c < = ' f ' )
{
qt - > c_acc = qt - > c_acc * 16 + c - ' a ' + 10 ;
qt - > digit_count + + ;
if ( qt - > digit_count > = qt - > escaped )
{
ADD_TOKEN_CHAR ( hcl , qt - > c_acc ) ;
qt - > escaped = 0 ;
}
goto consumed ;
}
else
{
hcl_ooch_t rc ;
rc = ( qt - > escaped = = 2 ) ? ' x ' :
( qt - > escaped = = 4 ) ? ' u ' : ' U ' ;
if ( qt - > digit_count = = 0 )
ADD_TOKEN_CHAR ( hcl , rc ) ;
else ADD_TOKEN_CHAR ( hcl , qt - > c_acc ) ;
qt - > escaped = 0 ;
}
}
2022-05-25 14:23:43 +00:00
2022-07-22 08:02:14 +00:00
if ( qt - > escaped = = 0 & & c = = qt - > end_char )
{
/* terminating quote */
FEED_WRAP_UP ( hcl , qt - > tok_type ) ; /* HCL_IOTOK_STRLIT or HCL_IOTOK_CHARLIT */
if ( TOKEN_NAME_LEN ( hcl ) < qt - > min_len )
{
hcl_setsynerr ( hcl , qt - > synerr_code , TOKEN_LOC ( hcl ) , HCL_NULL ) ;
return - 1 ;
}
goto consumed ;
}
2022-05-25 14:23:43 +00:00
2022-07-22 08:02:14 +00:00
if ( qt - > escaped = = 0 & & c = = qt - > esc_char )
{
qt - > escaped = 1 ;
goto consumed ;
}
2022-05-25 14:23:43 +00:00
2022-07-22 08:02:14 +00:00
if ( qt - > escaped = = 1 )
{
if ( c = = ' a ' ) c = ' \a ' ;
else if ( c = = ' b ' ) c = ' \b ' ;
else if ( c = = ' f ' ) c = ' \f ' ;
else if ( c = = ' n ' ) c = ' \n ' ;
else if ( c = = ' r ' ) c = ' \r ' ;
else if ( c = = ' t ' ) c = ' \t ' ;
else if ( c = = ' v ' ) c = ' \v ' ;
else if ( c > = ' 0 ' & & c < = ' 7 ' & & ! qt - > regex )
{
/* i don't support the octal notation for a regular expression.
* it conflicts with the backreference notation between \ 1 and \ 7 inclusive . */
qt - > escaped = 3 ;
qt - > digit_count = 1 ;
qt - > c_acc = c - ' 0 ' ;
goto consumed ;
}
else if ( c = = ' x ' )
{
qt - > escaped = 2 ;
qt - > digit_count = 0 ;
qt - > c_acc = 0 ;
goto consumed ;
}
# if (HCL_SIZEOF_OOCH_T >= 2)
else if ( c = = ' u ' )
{
qt - > escaped = 4 ;
qt - > digit_count = 0 ;
qt - > c_acc = 0 ;
goto consumed ;
}
# endif
# if (HCL_SIZEOF_OOCH_T >= 4)
else if ( c = = ' U ' )
{
qt - > escaped = 8 ;
qt - > digit_count = 0 ;
qt - > c_acc = 0 ;
goto consumed ;
}
# endif
else if ( qt - > regex )
{
/* if the following character doesn't compose a proper
* escape sequence , keep the escape character .
* an unhandled escape sequence can be handled
* outside this function since the escape character
* is preserved . */
ADD_TOKEN_CHAR ( hcl , qt - > esc_char ) ;
}
qt - > escaped = 0 ;
}
ADD_TOKEN_CHAR ( hcl , c ) ;
2022-05-25 14:23:43 +00:00
2022-07-22 08:02:14 +00:00
consumed :
if ( TOKEN_NAME_LEN ( hcl ) > qt - > max_len )
{
hcl_setsynerr ( hcl , qt - > synerr_code , TOKEN_LOC ( hcl ) , HCL_NULL ) ;
return - 1 ;
}
return 1 ;
}
2022-07-26 00:06:29 +00:00
static int flx_signed_token ( hcl_t * hcl , hcl_ooci_t c )
{
hcl_flx_st_t * st = FLX_ST ( hcl ) ;
if ( st - > char_count = = 0 & & c = = ' # ' )
{
ADD_TOKEN_CHAR ( hcl , c ) ;
st - > hmarked = 1 ;
st - > char_count + + ;
goto consumed ;
}
if ( st - > hmarked )
{
HCL_ASSERT ( hcl , st - > char_count = = 1 ) ;
if ( c = = ' b ' | | c = = ' o ' | | c = = ' x ' )
{
init_flx_hn ( FLX_HN ( hcl ) , HCL_IOTOK_RADNUMLIT , HCL_SYNERR_NUMLIT , ( c = = ' b ' ? 2 : ( c = = ' o ' ? 8 : 16 ) ) ) ;
FEED_CONTINUE_WITH_CHAR ( hcl , c , HCL_FLX_HMARKED_NUMBER ) ;
goto consumed ;
}
else
{
/* at this point, the token name buffer holds +# or -# */
HCL_ASSERT ( hcl , TOKEN_NAME_LEN ( hcl ) = = 2 ) ;
TOKEN_NAME_LEN ( hcl ) - - ; /* remove the ending # from the name buffer */
FEED_WRAP_UP ( hcl , HCL_IOTOK_IDENT ) ;
/* reset the token information as if it enters HMARKED_TOKEN from START */
reset_flx_token ( hcl ) ;
/* the current character is on the same line as the hash mark, the column must be greater than 1 */
2023-05-18 01:24:01 +00:00
HCL_ASSERT ( hcl , FLX_LOC ( hcl ) - > colm > 1 ) ;
2022-07-26 00:06:29 +00:00
FLX_LOC ( hcl ) - > colm - - ; /* move back one character location by decrementing the column number */
ADD_TOKEN_CHAR ( hcl , ' # ' ) ;
FEED_CONTINUE ( hcl , HCL_FLX_HMARKED_TOKEN ) ;
goto not_consumed ;
}
}
HCL_ASSERT ( hcl , st - > char_count = = 0 ) ;
if ( is_digitchar ( c ) )
{
init_flx_pn ( FLX_PN ( hcl ) ) ; /* the sign is not part of the pn->digit_count[0] so keep it at 0 here */
FEED_CONTINUE ( hcl , HCL_FLX_PLAIN_NUMBER ) ;
goto not_consumed ;
}
else
{
init_flx_pi ( FLX_PI ( hcl ) ) ;
2022-07-28 14:07:18 +00:00
2023-05-18 01:24:01 +00:00
/* the sign is already in the token name buffer.
2022-07-28 14:07:18 +00:00
* adjust the state data for the sign . */
HCL_ASSERT ( hcl , TOKEN_NAME_LEN ( hcl ) = = 1 ) ;
2023-05-18 01:24:01 +00:00
FLX_PI ( hcl ) - > char_count + + ;
2022-07-28 14:07:18 +00:00
FLX_PI ( hcl ) - > seg_len + + ;
/* let refeeding of 'c' happen at the next iteration */
2022-07-26 00:06:29 +00:00
FEED_CONTINUE ( hcl , HCL_FLX_PLAIN_IDENT ) ;
goto not_consumed ;
}
consumed :
return 1 ;
not_consumed :
return 0 ;
}
2022-07-26 15:06:53 +00:00
/* ------------------------------------------------------------------------ */
2022-07-22 08:02:14 +00:00
static int feed_char ( hcl_t * hcl , hcl_ooci_t c )
{
2022-07-28 14:07:18 +00:00
/*hcl_logbfmt (hcl, HCL_LOG_STDERR, "FEED->[%jc] %d STATE->%d\n", c, c, FLX_STATE(hcl));*/
2022-07-22 08:02:14 +00:00
switch ( FLX_STATE ( hcl ) )
{
2022-07-23 06:57:01 +00:00
case HCL_FLX_START : return flx_start ( hcl , c ) ;
case HCL_FLX_COMMENT : return flx_comment ( hcl , c ) ;
case HCL_FLX_DELIM_TOKEN : return flx_delim_token ( hcl , c ) ;
2022-07-23 14:06:46 +00:00
case HCL_FLX_HMARKED_TOKEN : return flx_hmarked_token ( hcl , c ) ;
case HCL_FLX_HMARKED_CHAR : return flx_hmarked_char ( hcl , c ) ;
case HCL_FLX_HMARKED_IDENT : return flx_hmarked_ident ( hcl , c ) ;
2022-07-26 00:06:29 +00:00
case HCL_FLX_HMARKED_NUMBER : return flx_hmarked_number ( hcl , c ) ;
case HCL_FLX_PLAIN_IDENT : return flx_plain_ident ( hcl , c ) ;
2022-07-26 15:06:53 +00:00
case HCL_FLX_PLAIN_NUMBER : return flx_plain_number ( hcl , c ) ;
2022-07-23 06:57:01 +00:00
case HCL_FLX_QUOTED_TOKEN : return flx_quoted_token ( hcl , c ) ;
2022-07-26 00:06:29 +00:00
case HCL_FLX_SIGNED_TOKEN : return flx_signed_token ( hcl , c ) ;
2022-05-25 14:23:43 +00:00
default :
2022-07-26 15:06:53 +00:00
/* unknown state */
2022-05-25 14:23:43 +00:00
break ;
}
2022-07-26 15:06:53 +00:00
HCL_ASSERT ( hcl , ! " internal error - this must never happen " ) ;
2022-08-02 13:41:13 +00:00
hcl_seterrbfmt ( hcl , HCL_EINTERN , " internal error - unknown flx state - %d " , FLX_STATE ( hcl ) ) ;
2022-07-26 15:06:53 +00:00
return - 1 ;
2022-05-25 14:23:43 +00:00
}
2022-08-02 13:41:13 +00:00
static void feed_update_lx_loc ( hcl_t * hcl , hcl_ooci_t ch )
{
if ( is_linebreak ( ch ) )
{
hcl - > c - > feed . lx . loc . line + + ;
hcl - > c - > feed . lx . loc . colm = 1 ;
}
else
{
hcl - > c - > feed . lx . loc . colm + + ;
}
}
2022-07-28 14:07:18 +00:00
2022-08-22 04:30:35 +00:00
static int feed_from_includee ( hcl_t * hcl )
2022-07-28 14:07:18 +00:00
{
int x ;
2023-05-19 03:55:08 +00:00
HCL_ASSERT ( hcl , hcl - > c - > curinp ! = HCL_NULL & & hcl - > c - > curinp ! = & hcl - > c - > srarg ) ;
2022-07-28 14:07:18 +00:00
do
{
if ( hcl - > c - > curinp - > b . pos > = hcl - > c - > curinp - > b . len )
{
if ( hcl - > c - > reader ( hcl , HCL_IO_READ , hcl - > c - > curinp ) < = - 1 )
{
return - 1 ;
}
if ( hcl - > c - > curinp - > xlen < = 0 )
{
2022-07-29 11:29:47 +00:00
/* got EOF from an included stream */
2022-07-28 14:07:18 +00:00
feed_end_include ( hcl ) ;
continue ;
}
hcl - > c - > curinp - > b . pos = 0 ;
hcl - > c - > curinp - > b . len = hcl - > c - > curinp - > xlen ;
}
2022-07-29 11:29:47 +00:00
x = feed_char ( hcl , hcl - > c - > curinp - > buf [ hcl - > c - > curinp - > b . pos ] ) ;
2022-07-28 14:07:18 +00:00
if ( x < = - 1 ) return - 1 ;
2023-05-18 01:24:01 +00:00
if ( x > = 1 )
2022-08-02 13:41:13 +00:00
{
/* consumed */
feed_update_lx_loc ( hcl , hcl - > c - > curinp - > buf [ hcl - > c - > curinp - > b . pos ] ) ;
hcl - > c - > curinp - > b . pos + = x ;
}
2022-07-28 14:20:28 +00:00
if ( hcl - > c - > feed . rd . do_include_file )
{
2023-05-18 01:24:01 +00:00
/* feed_process_token(), called for the "filename" token for the #include
2022-07-29 11:29:47 +00:00
* directive , sets hcl - > c - > feed . rd . do_include_file to 1 instead of attepmting
* to include the file . the file inclusion is attempted here after the return
* value of feed_char ( ) is used to advance the hcl - > c - > curinp - > b . pos pointer . */
hcl - > c - > feed . rd . do_include_file = 0 ; /* clear this regardless of inclusion result */
2022-07-28 14:20:28 +00:00
if ( feed_begin_include ( hcl ) < = - 1 ) return - 1 ;
}
2022-07-28 14:07:18 +00:00
}
2023-05-19 03:55:08 +00:00
while ( hcl - > c - > curinp ! = & hcl - > c - > srarg ) ;
2022-07-28 14:07:18 +00:00
return 0 ;
}
2023-05-18 01:24:01 +00:00
int hcl_beginfeed ( hcl_t * hcl , hcl_on_cnode_t on_cnode )
2022-07-29 11:29:47 +00:00
{
2023-05-18 01:24:01 +00:00
HCL_ASSERT ( hcl , hcl - > c ! = HCL_NULL ) ; /* call hcl_attachio() or hcl_attachiostd() first */
2022-07-29 14:41:00 +00:00
init_feed ( hcl ) ;
if ( on_cnode ) hcl - > c - > feed . on_cnode = on_cnode ;
2023-05-18 01:24:01 +00:00
/* if you pass HCL_NULL for on_cnode, hcl->c->feed.on_cnode resets
2022-07-29 14:41:00 +00:00
* back to the default handler in init_feed ( ) */
2023-05-18 01:24:01 +00:00
return 0 ;
}
int hcl_endfeed ( hcl_t * hcl )
{
return hcl_feed ( hcl , HCL_NULL , 0 ) ;
2022-07-29 11:29:47 +00:00
}
2022-05-25 14:23:43 +00:00
int hcl_feed ( hcl_t * hcl , const hcl_ooch_t * data , hcl_oow_t len )
{
/* TODO: need to return the number of processed characters?
* need to stop after the first complete expression ? */
hcl_oow_t i ;
int x ;
2023-05-18 01:24:01 +00:00
HCL_ASSERT ( hcl , hcl - > c ! = HCL_NULL ) ;
if ( data )
2022-05-25 14:23:43 +00:00
{
2023-05-18 01:24:01 +00:00
for ( i = 0 ; i < len ; )
2022-05-25 14:23:43 +00:00
{
x = feed_char ( hcl , data [ i ] ) ;
2023-05-19 03:55:08 +00:00
if ( x < = - 1 ) goto oops ; /* TODO: return the number of processed characters via an argument? */
2023-05-18 01:24:01 +00:00
2022-05-25 14:23:43 +00:00
if ( x > 0 )
{
2022-07-18 10:39:10 +00:00
/* consumed */
2022-08-02 13:41:13 +00:00
feed_update_lx_loc ( hcl , data [ i ] ) ;
2022-07-28 14:07:18 +00:00
i + = x ; /* x is supposed to be 1. otherwise, some characters may get skipped. */
}
2022-07-28 14:20:28 +00:00
if ( hcl - > c - > feed . rd . do_include_file )
{
2023-05-19 03:55:08 +00:00
if ( feed_begin_include ( hcl ) < = - 1 ) goto oops ;
2022-07-28 14:20:28 +00:00
hcl - > c - > feed . rd . do_include_file = 0 ;
}
2023-05-19 03:55:08 +00:00
if ( hcl - > c - > curinp & & hcl - > c - > curinp ! = & hcl - > c - > srarg & & feed_from_includee ( hcl ) < = - 1 )
2022-07-28 14:07:18 +00:00
{
/* TODO: return the number of processed characters via an argument? */
2023-05-19 03:55:08 +00:00
goto oops ;
2022-05-25 14:23:43 +00:00
}
2022-07-28 14:07:18 +00:00
/* feed data[i] again if not consumed */
2022-05-25 14:23:43 +00:00
}
}
else
{
2022-07-29 11:29:47 +00:00
for ( i = 0 ; i < 1 ; ) /* weird loop in case feed_char() returns 0 */
2022-05-25 14:23:43 +00:00
{
x = feed_char ( hcl , HCL_OOCI_EOF ) ;
2023-05-18 01:24:01 +00:00
if ( x < = - 1 )
2022-07-29 11:29:47 +00:00
{
if ( hcl - > c - > feed . rd . level < = 0 & & hcl_geterrnum ( hcl ) = = HCL_ESYNERR & & hcl_getsynerrnum ( hcl ) = = HCL_SYNERR_EOF )
{
2023-05-18 01:24:01 +00:00
/* convert this EOF error to success as the caller knows EOF in the feed mode.
* the caller can safely stop feeding after gettting success from hcl_feed ( hcl , HCL_NULL , 0 ) ;
2022-07-29 11:29:47 +00:00
* in the feed mode , this function doesn ' t set HCL_EFINIS . */
x = 1 ;
}
else
{
2023-05-19 03:55:08 +00:00
goto oops ;
2022-07-29 11:29:47 +00:00
}
}
2022-05-25 14:23:43 +00:00
i + = x ;
}
}
return 0 ;
2023-05-19 03:55:08 +00:00
oops :
2023-05-19 14:25:50 +00:00
/* if enter_list() is in feed_process_token(), the stack grows.
* leave_list ( ) pops an element off the stack . the stack can be
* not empty if an error occurs outside feed_process_token ( ) after
* leave_list ( ) in it . for example ,
*
* ( # aaa
* ^ ^
* leave_list ( ) error in flx_hmarked_ident ( ) before a full cnode is processed
*/
2023-05-19 03:55:08 +00:00
feed_clean_up_reader_stack ( hcl ) ;
return - 1 ;
2022-05-25 14:23:43 +00:00
}
/*
hcl_setopt ( ON_EXPRESSION CALLBACK ? ? ? ) ;
hcl_feed ( hcl , " (hello) (10) " , 12 ) ;
> on_token
> on_expression
> on_eof
default callback for on_expression ?
compile
execute ? ? / if in the interactive mode ? ( say it ' s used as a network protocol . execute each expression when received . . . . )
default callback for on_eof ?
execute or terminate ?
*/
2022-07-22 08:02:14 +00:00
/* ------------------------------------------------------------------------ */
/* TODO: rename compiler to something else that can include reader, printer, and compiler
* move compiler intialization / finalization here to more common place */
2023-05-18 01:24:01 +00:00
static void gc_compiler_cb ( hcl_t * hcl )
2022-07-22 08:02:14 +00:00
{
2023-05-18 01:24:01 +00:00
if ( hcl - > c )
{
hcl - > c - > r . s = hcl_moveoop ( hcl , hcl - > c - > r . s ) ;
hcl - > c - > r . e = hcl_moveoop ( hcl , hcl - > c - > r . e ) ;
}
2022-07-22 08:02:14 +00:00
}
2023-05-18 01:24:01 +00:00
static void fini_compiler_cb ( hcl_t * hcl )
2022-07-22 08:02:14 +00:00
{
/* called before the hcl object is closed */
if ( hcl - > c )
{
if ( hcl - > c - > cfs . ptr )
{
hcl_freemem ( hcl , hcl - > c - > cfs . ptr ) ;
hcl - > c - > cfs . ptr = HCL_NULL ;
hcl - > c - > cfs . top = - 1 ;
hcl - > c - > cfs . capa = 0 ;
}
if ( hcl - > c - > tv . s . ptr )
{
hcl_freemem ( hcl , hcl - > c - > tv . s . ptr ) ;
hcl - > c - > tv . s . ptr = HCL_NULL ;
hcl - > c - > tv . s . len = 0 ;
hcl - > c - > tv . capa = 0 ;
hcl - > c - > tv . wcount = 0 ;
}
HCL_ASSERT ( hcl , hcl - > c - > tv . capa = = 0 ) ;
HCL_ASSERT ( hcl , hcl - > c - > tv . wcount = = 0 ) ;
if ( hcl - > c - > cblk . info )
{
hcl_freemem ( hcl , hcl - > c - > cblk . info ) ;
hcl - > c - > cblk . info = HCL_NULL ;
hcl - > c - > cblk . info_capa = 0 ;
hcl - > c - > cblk . depth = - 1 ;
}
if ( hcl - > c - > clsblk . info )
{
hcl_freemem ( hcl , hcl - > c - > clsblk . info ) ;
hcl - > c - > clsblk . info = HCL_NULL ;
hcl - > c - > clsblk . info_capa = 0 ;
hcl - > c - > clsblk . depth = - 1 ;
}
if ( hcl - > c - > fnblk . info )
{
hcl_freemem ( hcl , hcl - > c - > fnblk . info ) ;
hcl - > c - > fnblk . info = HCL_NULL ;
hcl - > c - > fnblk . info_capa = 0 ;
hcl - > c - > fnblk . depth = - 1 ;
}
2023-05-19 03:55:08 +00:00
clear_sr_names ( hcl ) ;
2022-07-22 08:02:14 +00:00
if ( hcl - > c - > tok . name . ptr ) hcl_freemem ( hcl , hcl - > c - > tok . name . ptr ) ;
hcl_detachio ( hcl ) ;
hcl_freemem ( hcl , hcl - > c ) ;
hcl - > c = HCL_NULL ;
}
}
2023-05-18 01:24:01 +00:00
static void fini_compiler ( hcl_t * hcl )
2022-07-22 08:02:14 +00:00
{
2023-05-18 01:24:01 +00:00
/* unlike fini_compiler_cb(), this is to be used in some error handling
* between init_compiler success and subquent operation failure */
if ( hcl - > c )
{
hcl_deregcb ( hcl , hcl - > c - > cbp ) ;
fini_compiler_cb ( hcl ) ;
}
}
2022-07-22 08:02:14 +00:00
2023-05-18 01:24:01 +00:00
static int init_compiler ( hcl_t * hcl )
{
hcl_cb_t cb , * cbp = HCL_NULL ;
HCL_ASSERT ( hcl , hcl - > c = = HCL_NULL ) ;
HCL_MEMSET ( & cb , 0 , HCL_SIZEOF ( cb ) ) ;
cb . gc = gc_compiler_cb ;
cb . fini = fini_compiler_cb ;
cbp = hcl_regcb ( hcl , & cb ) ;
if ( HCL_UNLIKELY ( ! cbp ) ) return - 1 ;
hcl - > c = ( hcl_compiler_t * ) hcl_callocmem ( hcl , HCL_SIZEOF ( * hcl - > c ) ) ;
if ( HCL_UNLIKELY ( ! hcl - > c ) )
2022-07-22 08:02:14 +00:00
{
2023-05-18 01:24:01 +00:00
hcl_deregcb ( hcl , cbp ) ;
2022-07-22 08:02:14 +00:00
return - 1 ;
}
2023-05-18 01:24:01 +00:00
hcl - > c - > ilchr_ucs . ptr = & hcl - > c - > ilchr ;
hcl - > c - > ilchr_ucs . len = 1 ;
2022-07-22 08:02:14 +00:00
2023-05-18 01:24:01 +00:00
hcl - > c - > r . s = hcl - > _nil ;
hcl - > c - > r . e = hcl - > _nil ;
2022-07-22 08:02:14 +00:00
2023-05-18 01:24:01 +00:00
hcl - > c - > cfs . top = - 1 ;
hcl - > c - > cblk . depth = - 1 ;
hcl - > c - > clsblk . depth = - 1 ;
hcl - > c - > fnblk . depth = - 1 ;
2022-07-22 08:02:14 +00:00
2023-05-18 01:24:01 +00:00
init_feed ( hcl ) ;
hcl - > c - > cbp = cbp ;
2022-07-22 08:02:14 +00:00
2023-05-18 01:24:01 +00:00
return 0 ;
}
2022-07-22 08:02:14 +00:00
2023-05-19 03:55:08 +00:00
int hcl_attachio ( hcl_t * hcl , hcl_ioimpl_t reader , hcl_ioimpl_t scanner , hcl_ioimpl_t printer )
2023-05-18 01:24:01 +00:00
{
int n ;
int inited_compiler = 0 ;
2023-05-19 03:55:08 +00:00
hcl_iosrarg_t new_srarg ;
2023-05-18 01:24:01 +00:00
hcl_ioinarg_t new_inarg ;
hcl_iooutarg_t new_outarg ;
2022-07-22 08:02:14 +00:00
2023-05-28 16:49:13 +00:00
if ( ! hcl - > c )
2022-07-22 08:02:14 +00:00
{
2023-05-28 16:49:13 +00:00
if ( init_compiler ( hcl ) < = - 1 ) return - 1 ;
inited_compiler = 1 ;
2022-07-22 08:02:14 +00:00
}
2023-05-19 03:55:08 +00:00
if ( reader )
2023-05-18 01:24:01 +00:00
{
2023-05-19 03:55:08 +00:00
/* The name field and the includer field are HCL_NULL
* for the main stream */
HCL_MEMSET ( & new_srarg , 0 , HCL_SIZEOF ( new_srarg ) ) ;
new_srarg . line = 1 ;
new_srarg . colm = 1 ;
/* open the top-level source input stream */
n = reader ( hcl , HCL_IO_OPEN , & new_srarg ) ;
if ( n < = - 1 ) goto oops ;
2023-05-18 01:24:01 +00:00
}
2023-05-19 03:55:08 +00:00
if ( scanner )
{
HCL_MEMSET ( & new_inarg , 0 , HCL_SIZEOF ( new_inarg ) ) ;
n = scanner ( hcl , HCL_IO_OPEN , & new_inarg ) ;
if ( n < = - 1 )
{
reader ( hcl , HCL_IO_CLOSE , & new_srarg ) ;
goto oops ;
}
}
2022-07-22 08:02:14 +00:00
2023-05-19 03:55:08 +00:00
if ( printer )
{
/* open the new output stream */
HCL_MEMSET ( & new_outarg , 0 , HCL_SIZEOF ( new_outarg ) ) ;
n = printer ( hcl , HCL_IO_OPEN , & new_outarg ) ;
if ( n < = - 1 )
{
if ( scanner ) scanner ( hcl , HCL_IO_CLOSE , & new_inarg ) ;
if ( reader ) reader ( hcl , HCL_IO_CLOSE , & new_srarg ) ;
goto oops ;
}
}
2022-07-22 08:02:14 +00:00
2023-05-19 03:55:08 +00:00
if ( reader )
2022-07-22 08:02:14 +00:00
{
2023-05-19 03:55:08 +00:00
if ( hcl - > c - > reader )
{
/* close the old source input stream */
hcl - > c - > reader ( hcl , HCL_IO_CLOSE , & hcl - > c - > srarg ) ;
}
hcl - > c - > reader = reader ;
hcl - > c - > srarg = new_srarg ;
2022-07-22 08:02:14 +00:00
}
2023-05-19 03:55:08 +00:00
if ( scanner )
2022-07-22 08:02:14 +00:00
{
2023-05-19 03:55:08 +00:00
if ( hcl - > io . scanner )
{
/* close the old input stream */
hcl - > io . scanner ( hcl , HCL_IO_CLOSE , & hcl - > io . inarg ) ;
}
hcl - > io . scanner = scanner ;
hcl - > io . inarg = new_inarg ;
2022-07-22 08:02:14 +00:00
}
2023-05-18 01:24:01 +00:00
2023-05-19 03:55:08 +00:00
if ( printer )
2022-07-22 08:02:14 +00:00
{
2023-05-19 03:55:08 +00:00
if ( hcl - > io . printer )
{
/* close the old output stream */
hcl - > io . printer ( hcl , HCL_IO_CLOSE , & hcl - > io . outarg ) ;
}
hcl - > io . printer = printer ;
hcl - > io . outarg = new_outarg ;
2022-07-22 08:02:14 +00:00
}
2023-05-18 01:24:01 +00:00
2023-05-19 03:55:08 +00:00
if ( reader )
{
/* clear unneeded source stream names */
2023-05-19 04:06:32 +00:00
/*clear_sr_names (hcl); <---- TODO: tricky to clean up here */
2023-05-18 01:24:01 +00:00
2023-05-19 03:55:08 +00:00
/* initialize some other key fields */
hcl - > c - > nungots = 0 ;
/* the source stream is open. set it as the current input stream */
hcl - > c - > curinp = & hcl - > c - > srarg ;
}
2023-05-18 01:24:01 +00:00
return 0 ;
oops :
if ( inited_compiler ) fini_compiler ( hcl ) ;
2022-07-22 08:02:14 +00:00
return - 1 ;
}
void hcl_flushio ( hcl_t * hcl )
{
2023-05-18 01:24:01 +00:00
if ( hcl - > io . printer ) hcl - > io . printer ( hcl , HCL_IO_FLUSH , & hcl - > io . outarg ) ;
2022-07-22 08:02:14 +00:00
}
void hcl_detachio ( hcl_t * hcl )
{
/* an error occurred and control has reached here
* probably , some included files might not have been
* closed . close them */
if ( hcl - > c )
{
if ( hcl - > c - > reader )
{
2023-05-19 03:55:08 +00:00
while ( hcl - > c - > curinp ! = & hcl - > c - > srarg )
2022-07-22 08:02:14 +00:00
{
2023-05-19 03:55:08 +00:00
hcl_iosrarg_t * prev ;
2022-07-22 08:02:14 +00:00
/* nothing much to do about a close error */
hcl - > c - > reader ( hcl , HCL_IO_CLOSE , hcl - > c - > curinp ) ;
prev = hcl - > c - > curinp - > includer ;
HCL_ASSERT ( hcl , hcl - > c - > curinp - > name ! = HCL_NULL ) ;
hcl_freemem ( hcl , hcl - > c - > curinp ) ;
hcl - > c - > curinp = prev ;
}
hcl - > c - > reader ( hcl , HCL_IO_CLOSE , hcl - > c - > curinp ) ;
hcl - > c - > reader = HCL_NULL ; /* ready for another attachment */
}
2023-05-18 01:24:01 +00:00
}
2023-05-19 03:55:08 +00:00
if ( hcl - > io . scanner )
{
hcl - > io . scanner ( hcl , HCL_IO_CLOSE , & hcl - > io . inarg ) ;
hcl - > io . scanner = HCL_NULL ; /* ready for another attachment */
}
2023-05-18 01:24:01 +00:00
if ( hcl - > io . printer )
{
hcl - > io . printer ( hcl , HCL_IO_CLOSE , & hcl - > io . outarg ) ;
hcl - > io . printer = HCL_NULL ; /* ready for another attachment */
2022-07-22 08:02:14 +00:00
}
}
2022-07-29 14:41:00 +00:00
2023-05-19 03:55:08 +00:00
void hcl_setbasesrloc ( hcl_t * hcl , hcl_oow_t line , hcl_oow_t colm )
2022-07-31 13:17:44 +00:00
{
2023-05-19 03:55:08 +00:00
hcl - > c - > srarg . line = line ;
hcl - > c - > srarg . colm = colm ;
2022-07-31 13:17:44 +00:00
}
2022-08-02 13:41:13 +00:00
2023-05-19 03:55:08 +00:00
hcl_iolxc_t * hcl_readbasesrchar ( hcl_t * hcl )
2022-08-02 13:41:13 +00:00
{
2023-05-18 01:24:01 +00:00
/* read a character using the base input stream. the caller must care extra
2022-08-02 13:41:13 +00:00
* care when using this function . this function reads the main stream regardless
* of the inclusion status and ignores the ungot characters . */
2023-05-19 03:55:08 +00:00
int n = _get_char ( hcl , & hcl - > c - > srarg ) ;
2022-08-02 13:41:13 +00:00
if ( n < = - 1 ) return HCL_NULL ;
2023-05-19 03:55:08 +00:00
return & hcl - > c - > srarg . lxc ;
2022-08-02 13:41:13 +00:00
}
2023-05-18 01:24:01 +00:00
2023-05-19 03:55:08 +00:00
hcl_ooch_t * hcl_readbasesrraw ( hcl_t * hcl , hcl_oow_t * xlen )
2023-05-18 01:24:01 +00:00
{
2023-05-18 15:16:51 +00:00
/* this function provides the raw input interface to the attached source
* input handler . it doesn ' t increment line / column number , nor does it
* care about ungot characters . it must be used with extra care */
2023-05-19 03:55:08 +00:00
HCL_ASSERT ( hcl , hcl - > c ! = HCL_NULL ) ; /* call hio_attachio() or hio_attachiostd() with proper arguments first */
2023-05-18 15:16:51 +00:00
2023-05-19 03:55:08 +00:00
if ( hcl - > c - > reader ( hcl , HCL_IO_READ , & hcl - > c - > srarg ) < = - 1 ) return HCL_NULL ;
* xlen = hcl - > c - > srarg . xlen ;
return hcl - > c - > srarg . buf ;
2023-05-18 01:24:01 +00:00
}