6809 lines
150 KiB
C
6809 lines
150 KiB
C
/*
|
|
* $Id$
|
|
*
|
|
Copyright 2006-2014 Chung, Hyung-Hwan.
|
|
This file is part of QSE.
|
|
|
|
QSE is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU Lesser General Public License as
|
|
published by the Free Software Foundation, either version 3 of
|
|
the License, or (at your option) any later version.
|
|
|
|
QSE is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with QSE. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "awk.h"
|
|
|
|
#if !defined(QSE_AWK_DEFAULT_MODPREFIX)
|
|
# if defined(_WIN32)
|
|
# define QSE_AWK_DEFAULT_MODPREFIX "qseawk-"
|
|
# elif defined(__OS2__)
|
|
# define QSE_AWK_DEFAULT_MODPREFIX "awk-"
|
|
# elif defined(__DOS__)
|
|
# define QSE_AWK_DEFAULT_MODPREFIX "awk-"
|
|
# else
|
|
# define QSE_AWK_DEFAULT_MODPREFIX "libqseawk-"
|
|
# endif
|
|
#endif
|
|
|
|
#if !defined(QSE_AWK_DEFAULT_MODPOSTFIX)
|
|
# define QSE_AWK_DEFAULT_MODPOSTFIX ""
|
|
#endif
|
|
|
|
enum tok_t
|
|
{
|
|
TOK_EOF,
|
|
TOK_NEWLINE,
|
|
|
|
/* TOK_XXX_ASSNs should be in sync with assop in assign_to_opcode.
|
|
* it also should be in the order as qse_awk_assop_type_t in run.h */
|
|
TOK_ASSN,
|
|
TOK_PLUS_ASSN,
|
|
TOK_MINUS_ASSN,
|
|
TOK_MUL_ASSN,
|
|
TOK_DIV_ASSN,
|
|
TOK_IDIV_ASSN,
|
|
TOK_MOD_ASSN,
|
|
TOK_EXP_ASSN, /* ^ - exponentiation */
|
|
TOK_CONCAT_ASSN,
|
|
TOK_RS_ASSN,
|
|
TOK_LS_ASSN,
|
|
TOK_BAND_ASSN,
|
|
TOK_BXOR_ASSN,
|
|
TOK_BOR_ASSN,
|
|
/* end of TOK_XXX_ASSN */
|
|
|
|
TOK_TEQ,
|
|
TOK_TNE,
|
|
TOK_EQ,
|
|
TOK_NE,
|
|
TOK_LE,
|
|
TOK_LT,
|
|
TOK_GE,
|
|
TOK_GT,
|
|
TOK_MA, /* ~ - match */
|
|
TOK_NM, /* !~ - not match */
|
|
TOK_LNOT, /* ! - logical negation */
|
|
TOK_BQUOTE, /* ` - is-defined */
|
|
TOK_PLUS,
|
|
TOK_PLUSPLUS,
|
|
TOK_MINUS,
|
|
TOK_MINUSMINUS,
|
|
TOK_MUL,
|
|
TOK_DIV,
|
|
TOK_IDIV,
|
|
TOK_MOD,
|
|
TOK_LOR,
|
|
TOK_LAND,
|
|
TOK_BOR,
|
|
TOK_BXOR, /* ^^ - bitwise-xor */
|
|
TOK_BAND,
|
|
TOK_BNOT, /* ~~ - used for unary bitwise-not */
|
|
TOK_RS,
|
|
TOK_LS,
|
|
TOK_IN,
|
|
TOK_EXP,
|
|
TOK_CONCAT,
|
|
|
|
TOK_LPAREN,
|
|
TOK_RPAREN,
|
|
TOK_LBRACE,
|
|
TOK_RBRACE,
|
|
TOK_LBRACK,
|
|
TOK_RBRACK,
|
|
|
|
TOK_DOLLAR,
|
|
TOK_COMMA,
|
|
TOK_SEMICOLON,
|
|
TOK_COLON,
|
|
TOK_DBLCOLON,
|
|
TOK_QUEST,
|
|
|
|
/* == begin reserved words == */
|
|
/* === extended reserved words === */
|
|
TOK_XGLOBAL,
|
|
TOK_XLOCAL,
|
|
TOK_XINCLUDE,
|
|
TOK_XABORT,
|
|
TOK_XRESET,
|
|
|
|
/* === normal reserved words === */
|
|
TOK_BEGIN,
|
|
TOK_END,
|
|
TOK_FUNCTION,
|
|
|
|
TOK_IF,
|
|
TOK_ELSE,
|
|
TOK_WHILE,
|
|
TOK_FOR,
|
|
TOK_DO,
|
|
TOK_BREAK,
|
|
TOK_CONTINUE,
|
|
TOK_RETURN,
|
|
TOK_EXIT,
|
|
TOK_DELETE,
|
|
TOK_NEXT,
|
|
TOK_NEXTFILE,
|
|
TOK_NEXTOFILE,
|
|
|
|
TOK_PRINT,
|
|
TOK_PRINTF,
|
|
TOK_GETLINE,
|
|
/* == end reserved words == */
|
|
|
|
TOK_IDENT,
|
|
TOK_INT,
|
|
TOK_FLT,
|
|
TOK_STR,
|
|
TOK_REX,
|
|
|
|
__TOKEN_COUNT__
|
|
};
|
|
|
|
enum
|
|
{
|
|
PARSE_GBL,
|
|
PARSE_FUNCTION,
|
|
PARSE_BEGIN,
|
|
PARSE_END,
|
|
PARSE_BEGIN_BLOCK,
|
|
PARSE_END_BLOCK,
|
|
PARSE_PATTERN,
|
|
PARSE_ACTION_BLOCK
|
|
};
|
|
|
|
enum
|
|
{
|
|
PARSE_LOOP_NONE,
|
|
PARSE_LOOP_WHILE,
|
|
PARSE_LOOP_FOR,
|
|
PARSE_LOOP_DOWHILE
|
|
};
|
|
|
|
typedef struct binmap_t binmap_t;
|
|
|
|
struct binmap_t
|
|
{
|
|
int token;
|
|
int binop;
|
|
};
|
|
|
|
static int parse_progunit (qse_awk_t* awk);
|
|
static qse_awk_t* collect_globals (qse_awk_t* awk);
|
|
static void adjust_static_globals (qse_awk_t* awk);
|
|
static qse_size_t find_global (qse_awk_t* awk, const qse_cstr_t* name);
|
|
static qse_awk_t* collect_locals (
|
|
qse_awk_t* awk, qse_size_t nlcls, int istop);
|
|
|
|
static qse_awk_nde_t* parse_function (qse_awk_t* awk);
|
|
static qse_awk_nde_t* parse_begin (qse_awk_t* awk);
|
|
static qse_awk_nde_t* parse_end (qse_awk_t* awk);
|
|
static qse_awk_chain_t* parse_action_block (
|
|
qse_awk_t* awk, qse_awk_nde_t* ptn, int blockless);
|
|
|
|
static qse_awk_nde_t* parse_block_dc (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc, int istop);
|
|
|
|
static qse_awk_nde_t* parse_statement (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
|
|
static qse_awk_nde_t* parse_expr_withdc (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
|
|
static qse_awk_nde_t* parse_logical_or (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
static qse_awk_nde_t* parse_logical_and (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
static qse_awk_nde_t* parse_in (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
static qse_awk_nde_t* parse_regex_match (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
static qse_awk_nde_t* parse_bitwise_or (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
static qse_awk_nde_t* parse_bitwise_xor (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
static qse_awk_nde_t* parse_bitwise_and (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
static qse_awk_nde_t* parse_equality (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
static qse_awk_nde_t* parse_relational (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
static qse_awk_nde_t* parse_shift (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
static qse_awk_nde_t* parse_concat (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
static qse_awk_nde_t* parse_additive (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
static qse_awk_nde_t* parse_multiplicative (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
|
|
static qse_awk_nde_t* parse_unary (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
static qse_awk_nde_t* parse_exponent (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
static qse_awk_nde_t* parse_unary_exp (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
static qse_awk_nde_t* parse_increment (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
static qse_awk_nde_t* parse_primary (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
static qse_awk_nde_t* parse_primary_ident (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc);
|
|
|
|
static qse_awk_nde_t* parse_hashidx (
|
|
qse_awk_t* awk, const qse_cstr_t* name, const qse_awk_loc_t* xloc);
|
|
static qse_awk_nde_t* parse_fncall (
|
|
qse_awk_t* awk, const qse_cstr_t* name,
|
|
qse_awk_fnc_t* fnc, const qse_awk_loc_t* xloc, int noarg);
|
|
|
|
static qse_awk_nde_t* parse_primary_ident_segs (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc, const qse_cstr_t* full,
|
|
const qse_cstr_t segs[], int nsegs);
|
|
|
|
static int get_token (qse_awk_t* awk);
|
|
static int preget_token (qse_awk_t* awk);
|
|
static int get_rexstr (qse_awk_t* awk, qse_awk_tok_t* tok);
|
|
|
|
static int skip_spaces (qse_awk_t* awk);
|
|
static int skip_comment (qse_awk_t* awk);
|
|
static int classify_ident (qse_awk_t* awk, const qse_cstr_t* name);
|
|
|
|
static int deparse (qse_awk_t* awk);
|
|
static qse_htb_walk_t deparse_func (
|
|
qse_htb_t* map, qse_htb_pair_t* pair, void* arg);
|
|
static int put_char (qse_awk_t* awk, qse_char_t c);
|
|
static int flush_out (qse_awk_t* awk);
|
|
|
|
static qse_awk_mod_t* query_module (
|
|
qse_awk_t* awk, const qse_cstr_t segs[], int nsegs,
|
|
qse_awk_mod_sym_t* sym);
|
|
|
|
typedef struct kwent_t kwent_t;
|
|
|
|
struct kwent_t
|
|
{
|
|
qse_cstr_t name;
|
|
int type;
|
|
int trait; /* the entry is valid when this option is set */
|
|
};
|
|
|
|
static kwent_t kwtab[] =
|
|
{
|
|
/* keep this table in sync with the kw_t enums in <parse.h>.
|
|
* also keep it sorted by the first field for binary search */
|
|
{ { QSE_T("@abort"), 6 }, TOK_XABORT, 0 },
|
|
{ { QSE_T("@global"), 7 }, TOK_XGLOBAL, 0 },
|
|
{ { QSE_T("@include"), 8 }, TOK_XINCLUDE, 0 },
|
|
{ { QSE_T("@local"), 6 }, TOK_XLOCAL, 0 },
|
|
{ { QSE_T("@reset"), 6 }, TOK_XRESET, 0 },
|
|
{ { QSE_T("BEGIN"), 5 }, TOK_BEGIN, QSE_AWK_PABLOCK },
|
|
{ { QSE_T("END"), 3 }, TOK_END, QSE_AWK_PABLOCK },
|
|
{ { QSE_T("break"), 5 }, TOK_BREAK, 0 },
|
|
{ { QSE_T("continue"), 8 }, TOK_CONTINUE, 0 },
|
|
{ { QSE_T("delete"), 6 }, TOK_DELETE, 0 },
|
|
{ { QSE_T("do"), 2 }, TOK_DO, 0 },
|
|
{ { QSE_T("else"), 4 }, TOK_ELSE, 0 },
|
|
{ { QSE_T("exit"), 4 }, TOK_EXIT, 0 },
|
|
{ { QSE_T("for"), 3 }, TOK_FOR, 0 },
|
|
{ { QSE_T("function"), 8 }, TOK_FUNCTION, 0 },
|
|
{ { QSE_T("getline"), 7 }, TOK_GETLINE, QSE_AWK_RIO },
|
|
{ { QSE_T("if"), 2 }, TOK_IF, 0 },
|
|
{ { QSE_T("in"), 2 }, TOK_IN, 0 },
|
|
{ { QSE_T("next"), 4 }, TOK_NEXT, QSE_AWK_PABLOCK },
|
|
{ { QSE_T("nextfile"), 8 }, TOK_NEXTFILE, QSE_AWK_PABLOCK },
|
|
{ { QSE_T("nextofile"), 9 }, TOK_NEXTOFILE, QSE_AWK_PABLOCK | QSE_AWK_NEXTOFILE },
|
|
{ { QSE_T("print"), 5 }, TOK_PRINT, QSE_AWK_RIO },
|
|
{ { QSE_T("printf"), 6 }, TOK_PRINTF, QSE_AWK_RIO },
|
|
{ { QSE_T("return"), 6 }, TOK_RETURN, 0 },
|
|
{ { QSE_T("while"), 5 }, TOK_WHILE, 0 }
|
|
};
|
|
|
|
typedef struct global_t global_t;
|
|
|
|
struct global_t
|
|
{
|
|
const qse_char_t* name;
|
|
qse_size_t namelen;
|
|
int trait;
|
|
};
|
|
|
|
static global_t gtab[] =
|
|
{
|
|
/*
|
|
* this table must match the order of the qse_awk_gbl_id_t enumerators
|
|
*/
|
|
|
|
/* output real-to-str conversion format for other cases than 'print' */
|
|
{ QSE_T("CONVFMT"), 7, 0 },
|
|
|
|
/* current input file name */
|
|
{ QSE_T("FILENAME"), 8, QSE_AWK_PABLOCK },
|
|
|
|
/* input record number in current file */
|
|
{ QSE_T("FNR"), 3, QSE_AWK_PABLOCK },
|
|
|
|
/* input field separator */
|
|
{ QSE_T("FS"), 2, 0 },
|
|
|
|
/* ignore case in string comparison */
|
|
{ QSE_T("IGNORECASE"), 10, 0 },
|
|
|
|
/* number of fields in current input record
|
|
* NF is also updated if you assign a value to $0. so it is not
|
|
* associated with QSE_AWK_PABLOCK */
|
|
{ QSE_T("NF"), 2, 0 },
|
|
|
|
/* input record number */
|
|
{ QSE_T("NR"), 2, QSE_AWK_PABLOCK },
|
|
|
|
/* current output file name */
|
|
{ QSE_T("OFILENAME"), 9, QSE_AWK_PABLOCK | QSE_AWK_NEXTOFILE },
|
|
|
|
/* output real-to-str conversion format for 'print' */
|
|
{ QSE_T("OFMT"), 4, QSE_AWK_RIO },
|
|
|
|
/* output field separator for 'print' */
|
|
{ QSE_T("OFS"), 3, QSE_AWK_RIO },
|
|
|
|
/* output record separator. used for 'print' and blockless output */
|
|
{ QSE_T("ORS"), 3, QSE_AWK_RIO },
|
|
|
|
{ QSE_T("RLENGTH"), 7, 0 },
|
|
{ QSE_T("RS"), 2, 0 },
|
|
|
|
{ QSE_T("RSTART"), 6, 0 },
|
|
{ QSE_T("SUBSEP"), 6, 0 }
|
|
};
|
|
|
|
#define GET_CHAR(awk) \
|
|
do { if (get_char(awk) <= -1) return -1; } while(0)
|
|
|
|
#define GET_CHAR_TO(awk,c) \
|
|
do { \
|
|
if (get_char(awk) <= -1) return -1; \
|
|
c = (awk)->sio.last.c; \
|
|
} while(0)
|
|
|
|
#define SET_TOKEN_TYPE(awk,tok,code) \
|
|
do { (tok)->type = (code); } while (0)
|
|
|
|
#define ADD_TOKEN_CHAR(awk,tok,c) \
|
|
do { \
|
|
if (qse_str_ccat((tok)->name,(c)) == (qse_size_t)-1) \
|
|
{ \
|
|
qse_awk_seterror (awk, QSE_AWK_ENOMEM, QSE_NULL, &(tok)->loc); \
|
|
return -1; \
|
|
} \
|
|
} while (0)
|
|
|
|
#define ADD_TOKEN_STR(awk,tok,s,l) \
|
|
do { \
|
|
if (qse_str_ncat((tok)->name,(s),(l)) == (qse_size_t)-1) \
|
|
{ \
|
|
qse_awk_seterror (awk, QSE_AWK_ENOMEM, QSE_NULL, &(tok)->loc); \
|
|
return -1; \
|
|
} \
|
|
} while (0)
|
|
|
|
#define MATCH(awk,tok_type) ((awk)->tok.type == (tok_type))
|
|
#define MATCH_RANGE(awk,tok_type_start,tok_type_end) ((awk)->tok.type >= (tok_type_start) && (awk)->tok.type <= (tok_type_end))
|
|
|
|
#define MATCH_TERMINATOR_NORMAL(awk) \
|
|
(MATCH((awk),TOK_SEMICOLON) || MATCH((awk),TOK_NEWLINE))
|
|
|
|
#define MATCH_TERMINATOR_RBRACE(awk) \
|
|
((awk->opt.trait & QSE_AWK_NEWLINE) && MATCH((awk),TOK_RBRACE))
|
|
|
|
#define MATCH_TERMINATOR(awk) \
|
|
(MATCH_TERMINATOR_NORMAL(awk) || MATCH_TERMINATOR_RBRACE(awk))
|
|
|
|
#define ISNOERR(awk) ((awk)->errinf.num == QSE_AWK_ENOERR)
|
|
|
|
#define CLRERR(awk) \
|
|
qse_awk_seterror (awk, QSE_AWK_ENOERR, QSE_NULL, QSE_NULL)
|
|
|
|
#define SETERR_TOK(awk,code) \
|
|
qse_awk_seterror (awk, code, QSE_STR_XSTR((awk)->tok.name), &(awk)->tok.loc)
|
|
|
|
#define SETERR_COD(awk,code) \
|
|
qse_awk_seterror (awk, code, QSE_NULL, QSE_NULL)
|
|
|
|
#define SETERR_LOC(awk,code,loc) \
|
|
qse_awk_seterror (awk, code, QSE_NULL, loc)
|
|
|
|
#define SETERR_ARG_LOC(awk,code,ep,el,loc) \
|
|
do { \
|
|
qse_cstr_t __ea; \
|
|
__ea.len = (el); __ea.ptr = (ep); \
|
|
qse_awk_seterror ((awk), (code), &__ea, (loc)); \
|
|
} while (0)
|
|
|
|
#define SETERR_ARG(awk,code,ep,el) SETERR_ARG_LOC(awk,code,ep,el,QSE_NULL)
|
|
|
|
#define ADJERR_LOC(rtx,l) do { (awk)->errinf.loc = *(l); } while (0)
|
|
|
|
static QSE_INLINE int is_plain_var (qse_awk_nde_t* nde)
|
|
{
|
|
return nde->type == QSE_AWK_NDE_GBL ||
|
|
nde->type == QSE_AWK_NDE_LCL ||
|
|
nde->type == QSE_AWK_NDE_ARG ||
|
|
nde->type == QSE_AWK_NDE_NAMED;
|
|
}
|
|
|
|
static QSE_INLINE int is_var (qse_awk_nde_t* nde)
|
|
{
|
|
return nde->type == QSE_AWK_NDE_GBL ||
|
|
nde->type == QSE_AWK_NDE_LCL ||
|
|
nde->type == QSE_AWK_NDE_ARG ||
|
|
nde->type == QSE_AWK_NDE_NAMED ||
|
|
nde->type == QSE_AWK_NDE_GBLIDX ||
|
|
nde->type == QSE_AWK_NDE_LCLIDX ||
|
|
nde->type == QSE_AWK_NDE_ARGIDX ||
|
|
nde->type == QSE_AWK_NDE_NAMEDIDX;
|
|
}
|
|
|
|
static int get_char (qse_awk_t* awk)
|
|
{
|
|
qse_ssize_t n;
|
|
|
|
if (awk->sio.nungots > 0)
|
|
{
|
|
/* there are something in the unget buffer */
|
|
awk->sio.last = awk->sio.ungot[--awk->sio.nungots];
|
|
return 0;
|
|
}
|
|
|
|
if (awk->sio.inp->b.pos >= awk->sio.inp->b.len)
|
|
{
|
|
CLRERR (awk);
|
|
n = awk->sio.inf (
|
|
awk, QSE_AWK_SIO_READ, awk->sio.inp,
|
|
awk->sio.inp->b.buf, QSE_COUNTOF(awk->sio.inp->b.buf)
|
|
);
|
|
if (n <= -1)
|
|
{
|
|
if (ISNOERR(awk))
|
|
SETERR_ARG (awk, QSE_AWK_EREAD, QSE_T("<SIN>"), 5);
|
|
return -1;
|
|
}
|
|
|
|
if (n == 0)
|
|
{
|
|
awk->sio.inp->last.c = QSE_CHAR_EOF;
|
|
awk->sio.inp->last.line = awk->sio.inp->line;
|
|
awk->sio.inp->last.colm = awk->sio.inp->colm;
|
|
awk->sio.inp->last.file = awk->sio.inp->name;
|
|
awk->sio.last = awk->sio.inp->last;
|
|
return 0;
|
|
}
|
|
|
|
awk->sio.inp->b.pos = 0;
|
|
awk->sio.inp->b.len = n;
|
|
}
|
|
|
|
if (awk->sio.inp->last.c == QSE_T('\n'))
|
|
{
|
|
/* if the previous charater was a newline,
|
|
* increment the line counter and reset column to 1.
|
|
* incrementing it line number here instead of
|
|
* updating inp->last causes the line number for
|
|
* TOK_EOF to be the same line as the last newline. */
|
|
awk->sio.inp->line++;
|
|
awk->sio.inp->colm = 1;
|
|
}
|
|
|
|
awk->sio.inp->last.c = awk->sio.inp->b.buf[awk->sio.inp->b.pos++];
|
|
awk->sio.inp->last.line = awk->sio.inp->line;
|
|
awk->sio.inp->last.colm = awk->sio.inp->colm++;
|
|
awk->sio.inp->last.file = awk->sio.inp->name;
|
|
awk->sio.last = awk->sio.inp->last;
|
|
return 0;
|
|
}
|
|
|
|
static void unget_char (qse_awk_t* awk, const qse_awk_sio_lxc_t* c)
|
|
{
|
|
/* Make sure that the unget buffer is large enough */
|
|
QSE_ASSERTX (awk->sio.nungots < QSE_COUNTOF(awk->sio.ungot),
|
|
"Make sure that you have increased the size of sio.ungot large enough");
|
|
awk->sio.ungot[awk->sio.nungots++] = *c;
|
|
}
|
|
|
|
const qse_char_t* qse_awk_getgblname (
|
|
qse_awk_t* awk, qse_size_t idx, qse_size_t* len)
|
|
{
|
|
QSE_ASSERT (idx < QSE_LDA_SIZE(awk->parse.gbls));
|
|
|
|
*len = QSE_LDA_DLEN(awk->parse.gbls,idx);
|
|
return QSE_LDA_DPTR(awk->parse.gbls,idx);
|
|
}
|
|
|
|
void qse_awk_getkwname (qse_awk_t* awk, qse_awk_kwid_t id, qse_cstr_t* s)
|
|
{
|
|
*s = kwtab[id].name;
|
|
}
|
|
|
|
static int parse (qse_awk_t* awk)
|
|
{
|
|
int ret = -1;
|
|
qse_ssize_t op;
|
|
|
|
QSE_ASSERT (awk->sio.inf != QSE_NULL);
|
|
|
|
CLRERR (awk);
|
|
op = awk->sio.inf (awk, QSE_AWK_SIO_OPEN, awk->sio.inp, QSE_NULL, 0);
|
|
if (op <= -1)
|
|
{
|
|
/* cannot open the source file.
|
|
* it doesn't even have to call CLOSE */
|
|
if (ISNOERR(awk))
|
|
SETERR_ARG (awk, QSE_AWK_EOPEN, QSE_T("<SIN>"), 5);
|
|
return -1;
|
|
}
|
|
|
|
adjust_static_globals (awk);
|
|
|
|
/* get the first character and the first token */
|
|
if (get_char (awk) <= -1 || get_token (awk)) goto oops;
|
|
|
|
while (1)
|
|
{
|
|
while (MATCH(awk,TOK_NEWLINE))
|
|
{
|
|
if (get_token (awk) <= -1) goto oops;
|
|
}
|
|
if (MATCH(awk,TOK_EOF)) break;
|
|
|
|
if (parse_progunit (awk) <= -1) goto oops;
|
|
}
|
|
|
|
if (!(awk->opt.trait & QSE_AWK_IMPLICIT))
|
|
{
|
|
/* ensure that all functions called are defined
|
|
* in the EXPLICIT-only mode */
|
|
|
|
qse_htb_pair_t* p;
|
|
qse_size_t buckno;
|
|
|
|
p = qse_htb_getfirstpair (awk->parse.funs, &buckno);
|
|
while (p != QSE_NULL)
|
|
{
|
|
if (qse_htb_search (awk->tree.funs,
|
|
QSE_HTB_KPTR(p), QSE_HTB_KLEN(p)) == QSE_NULL)
|
|
{
|
|
|
|
qse_awk_nde_t* nde;
|
|
|
|
/* see parse_fncall() for what is
|
|
* stored into awk->tree.funs */
|
|
nde = (qse_awk_nde_t*)QSE_HTB_VPTR(p);
|
|
|
|
SETERR_ARG_LOC (
|
|
awk,
|
|
QSE_AWK_EFUNNF,
|
|
QSE_HTB_KPTR(p),
|
|
QSE_HTB_KLEN(p),
|
|
&nde->loc
|
|
);
|
|
|
|
goto oops;
|
|
}
|
|
|
|
p = qse_htb_getnextpair (awk->parse.funs, p, &buckno);
|
|
}
|
|
}
|
|
|
|
QSE_ASSERT (awk->tree.ngbls == QSE_LDA_SIZE(awk->parse.gbls));
|
|
ret = 0;
|
|
|
|
oops:
|
|
if (ret <= -1)
|
|
{
|
|
/* an error occurred and control has reached here
|
|
* probably, some included files might not have been
|
|
* closed. close them */
|
|
while (awk->sio.inp != &awk->sio.arg)
|
|
{
|
|
qse_awk_sio_arg_t* prev;
|
|
|
|
/* nothing much to do about a close error */
|
|
awk->sio.inf (
|
|
awk, QSE_AWK_SIO_CLOSE,
|
|
awk->sio.inp, QSE_NULL, 0);
|
|
|
|
prev = awk->sio.inp->prev;
|
|
|
|
QSE_ASSERT (awk->sio.inp->name != QSE_NULL);
|
|
QSE_MMGR_FREE (awk->mmgr, awk->sio.inp);
|
|
|
|
awk->sio.inp = prev;
|
|
}
|
|
}
|
|
else if (ret == 0)
|
|
{
|
|
/* no error occurred so far */
|
|
QSE_ASSERT (awk->sio.inp == &awk->sio.arg);
|
|
CLRERR (awk);
|
|
}
|
|
|
|
if (awk->sio.inf (
|
|
awk, QSE_AWK_SIO_CLOSE, awk->sio.inp, QSE_NULL, 0) != 0)
|
|
{
|
|
if (ret == 0)
|
|
{
|
|
/* this is to keep the earlier error above
|
|
* that might be more critical than this */
|
|
if (ISNOERR(awk))
|
|
SETERR_ARG (awk, QSE_AWK_ECLOSE, QSE_T("<SIN>"), 5);
|
|
ret = -1;
|
|
}
|
|
}
|
|
|
|
if (ret <= -1)
|
|
{
|
|
/* clear the parse tree partially constructed on error */
|
|
qse_awk_clear (awk);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void qse_awk_clearsionames (qse_awk_t* awk)
|
|
{
|
|
qse_link_t* cur;
|
|
while (awk->sio_names)
|
|
{
|
|
cur = awk->sio_names;
|
|
awk->sio_names = cur->link;
|
|
QSE_MMGR_FREE (awk->mmgr, cur);
|
|
}
|
|
}
|
|
|
|
int qse_awk_parse (qse_awk_t* awk, qse_awk_sio_t* sio)
|
|
{
|
|
int n;
|
|
|
|
QSE_ASSERTX (sio != QSE_NULL ,
|
|
"the source code istream must be provided");
|
|
QSE_ASSERTX (sio->in != QSE_NULL,
|
|
"the source code input stream must be provided at least");
|
|
if (sio == QSE_NULL || sio->in == QSE_NULL)
|
|
{
|
|
SETERR_COD (awk, QSE_AWK_EINVAL);
|
|
return -1;
|
|
}
|
|
|
|
QSE_ASSERT (awk->parse.depth.loop == 0);
|
|
QSE_ASSERT (awk->parse.depth.expr == 0);
|
|
|
|
qse_awk_clear (awk);
|
|
qse_awk_clearsionames (awk);
|
|
|
|
QSE_MEMSET (&awk->sio, 0, QSE_SIZEOF(awk->sio));
|
|
awk->sio.inf = sio->in;
|
|
awk->sio.outf = sio->out;
|
|
awk->sio.last.c = QSE_CHAR_EOF;
|
|
awk->sio.arg.line = 1;
|
|
awk->sio.arg.colm = 1;
|
|
awk->sio.inp = &awk->sio.arg;
|
|
|
|
n = parse (awk);
|
|
if (n == 0 && awk->sio.outf != QSE_NULL) n = deparse (awk);
|
|
|
|
QSE_ASSERT (awk->parse.depth.loop == 0);
|
|
QSE_ASSERT (awk->parse.depth.expr == 0);
|
|
|
|
return n;
|
|
}
|
|
|
|
static int end_include (qse_awk_t* awk)
|
|
{
|
|
int x;
|
|
qse_awk_sio_arg_t* cur;
|
|
|
|
if (awk->sio.inp == &awk->sio.arg) return 0; /* no include */
|
|
|
|
/* if it is an included file, close it and
|
|
* retry to read a character from an outer file */
|
|
|
|
CLRERR (awk);
|
|
x = awk->sio.inf (
|
|
awk, QSE_AWK_SIO_CLOSE,
|
|
awk->sio.inp, QSE_NULL, 0);
|
|
|
|
/* if closing has failed, still destroy the
|
|
* sio structure first as normal and return
|
|
* the failure below. this way, the caller
|
|
* does not call QSE_AWK_SIO_CLOSE on
|
|
* awk->sio.inp again. */
|
|
|
|
cur = awk->sio.inp;
|
|
awk->sio.inp = awk->sio.inp->prev;
|
|
|
|
QSE_ASSERT (cur->name != QSE_NULL);
|
|
QSE_MMGR_FREE (awk->mmgr, cur);
|
|
awk->parse.depth.incl--;
|
|
|
|
if (x != 0)
|
|
{
|
|
/* the failure mentioned above is returned here */
|
|
if (ISNOERR(awk))
|
|
SETERR_ARG (awk, QSE_AWK_ECLOSE, QSE_T("<SIN>"), 5);
|
|
return -1;
|
|
}
|
|
|
|
awk->sio.last = awk->sio.inp->last;
|
|
return 1; /* ended the included file successfully */
|
|
}
|
|
|
|
static int begin_include (qse_awk_t* awk)
|
|
{
|
|
qse_awk_sio_arg_t* arg = QSE_NULL;
|
|
qse_link_t* link;
|
|
|
|
if (qse_strlen(QSE_STR_PTR(awk->tok.name)) != QSE_STR_LEN(awk->tok.name))
|
|
{
|
|
/* a '\0' character included in the include file name.
|
|
* we don't support such a file name */
|
|
SETERR_ARG_LOC (
|
|
awk,
|
|
QSE_AWK_EIONMNL,
|
|
QSE_STR_PTR(awk->tok.name),
|
|
qse_strlen(QSE_STR_PTR(awk->tok.name)),
|
|
&awk->tok.loc
|
|
);
|
|
return -1;
|
|
}
|
|
|
|
if (awk->opt.incldirs.ptr)
|
|
{
|
|
/* include directory is set... */
|
|
/* TODO: */
|
|
}
|
|
|
|
/* store the include-file name into a list
|
|
* and this list is not deleted after qse_awk_parse.
|
|
* the errinfo.loc.file can point to a string here. */
|
|
link = (qse_link_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*link) +
|
|
QSE_SIZEOF(*arg) + QSE_SIZEOF(qse_char_t) * (QSE_STR_LEN(awk->tok.name) + 1));
|
|
if (link == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, &awk->ptok.loc);
|
|
goto oops;
|
|
}
|
|
qse_strncpy ((qse_char_t*)(link + 1), QSE_STR_PTR(awk->tok.name), QSE_STR_LEN(awk->tok.name));
|
|
link->link = awk->sio_names;
|
|
awk->sio_names = link;
|
|
|
|
arg = (qse_awk_sio_arg_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*awk));
|
|
if (arg == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, &awk->ptok.loc);
|
|
goto oops;
|
|
}
|
|
|
|
arg->name = (const qse_char_t*)(link + 1);
|
|
arg->line = 1;
|
|
arg->colm = 1;
|
|
|
|
/* let the argument's prev field point to the current */
|
|
arg->prev = awk->sio.inp;
|
|
|
|
CLRERR (awk);
|
|
if (awk->sio.inf (awk, QSE_AWK_SIO_OPEN, arg, QSE_NULL, 0) <= -1)
|
|
{
|
|
if (ISNOERR(awk)) SETERR_TOK (awk, QSE_AWK_EOPEN);
|
|
else awk->errinf.loc = awk->tok.loc; /* adjust error location */
|
|
goto oops;
|
|
}
|
|
|
|
/* i update the current pointer after opening is successful */
|
|
awk->sio.inp = arg;
|
|
awk->parse.depth.incl++;
|
|
|
|
/* read in the first character in the included file.
|
|
* so the next call to get_token() sees the character read
|
|
* from this file. */
|
|
if (get_char (awk) <= -1 || get_token (awk) <= -1)
|
|
{
|
|
end_include (awk);
|
|
/* i don't jump to oops since i've called
|
|
* end_include() where awk->sio.inp/arg is freed. */
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
|
|
oops:
|
|
/* i don't need to free 'link' here since it's linked to awk->sio_names
|
|
* that's freed at the beginning of qse_awk_parse() or by qse_awk_close(). */
|
|
if (arg) QSE_MMGR_FREE (awk->mmgr, arg);
|
|
return -1;
|
|
}
|
|
|
|
static int parse_progunit (qse_awk_t* awk)
|
|
{
|
|
/*
|
|
@include "xxxx"
|
|
@global xxx, xxxx;
|
|
BEGIN { action }
|
|
END { action }
|
|
pattern { action }
|
|
function name (parameter-list) { statement }
|
|
*/
|
|
|
|
QSE_ASSERT (awk->parse.depth.loop == 0);
|
|
|
|
if (MATCH(awk,TOK_XGLOBAL))
|
|
{
|
|
qse_size_t ngbls;
|
|
|
|
awk->parse.id.block = PARSE_GBL;
|
|
|
|
if (get_token(awk) <= -1) return -1;
|
|
|
|
QSE_ASSERT (awk->tree.ngbls == QSE_LDA_SIZE(awk->parse.gbls));
|
|
ngbls = awk->tree.ngbls;
|
|
if (collect_globals (awk) == QSE_NULL)
|
|
{
|
|
qse_lda_delete (
|
|
awk->parse.gbls, ngbls,
|
|
QSE_LDA_SIZE(awk->parse.gbls) - ngbls);
|
|
awk->tree.ngbls = ngbls;
|
|
return -1;
|
|
}
|
|
}
|
|
else if (MATCH(awk,TOK_XINCLUDE))
|
|
{
|
|
if (awk->opt.depth.s.incl > 0 &&
|
|
awk->parse.depth.incl >= awk->opt.depth.s.incl)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_EINCLTD, &awk->ptok.loc);
|
|
return -1;
|
|
}
|
|
|
|
if (get_token(awk) <= -1) return -1;
|
|
|
|
if (!MATCH(awk,TOK_STR))
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_EINCLSTR, &awk->ptok.loc);
|
|
return -1;
|
|
}
|
|
|
|
if (begin_include (awk) <= -1) return -1;
|
|
|
|
/* i just return without doing anything special
|
|
* after having setting up the environment for file
|
|
* inclusion. the loop in parse() proceeds to call
|
|
* parse_progunit() */
|
|
}
|
|
else if (MATCH(awk,TOK_FUNCTION))
|
|
{
|
|
awk->parse.id.block = PARSE_FUNCTION;
|
|
if (parse_function (awk) == QSE_NULL) return -1;
|
|
}
|
|
else if (MATCH(awk,TOK_BEGIN))
|
|
{
|
|
if ((awk->opt.trait & QSE_AWK_PABLOCK) == 0)
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_EKWFNC);
|
|
return -1;
|
|
}
|
|
|
|
awk->parse.id.block = PARSE_BEGIN;
|
|
if (get_token(awk) <= -1) return -1;
|
|
|
|
if (MATCH(awk,TOK_NEWLINE) || MATCH(awk,TOK_EOF))
|
|
{
|
|
/* when QSE_AWK_NEWLINE is set,
|
|
* BEGIN and { should be located on the same line */
|
|
SETERR_LOC (awk, QSE_AWK_EBLKBEG, &awk->ptok.loc);
|
|
return -1;
|
|
}
|
|
|
|
if (!MATCH(awk,TOK_LBRACE))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ELBRACE);
|
|
return -1;
|
|
}
|
|
|
|
awk->parse.id.block = PARSE_BEGIN_BLOCK;
|
|
if (parse_begin (awk) == QSE_NULL) return -1;
|
|
|
|
/* skip a semicolon after an action block if any */
|
|
if (MATCH(awk,TOK_SEMICOLON) &&
|
|
get_token (awk) <= -1) return -1;
|
|
}
|
|
else if (MATCH(awk,TOK_END))
|
|
{
|
|
if ((awk->opt.trait & QSE_AWK_PABLOCK) == 0)
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_EKWFNC);
|
|
return -1;
|
|
}
|
|
|
|
awk->parse.id.block = PARSE_END;
|
|
if (get_token(awk) <= -1) return -1;
|
|
|
|
if (MATCH(awk,TOK_NEWLINE) || MATCH(awk,TOK_EOF))
|
|
{
|
|
/* when QSE_AWK_NEWLINE is set,
|
|
* END and { should be located on the same line */
|
|
SETERR_LOC (awk, QSE_AWK_EBLKEND, &awk->ptok.loc);
|
|
return -1;
|
|
}
|
|
|
|
if (!MATCH(awk,TOK_LBRACE))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ELBRACE);
|
|
return -1;
|
|
}
|
|
|
|
awk->parse.id.block = PARSE_END_BLOCK;
|
|
if (parse_end (awk) == QSE_NULL) return -1;
|
|
|
|
/* skip a semicolon after an action block if any */
|
|
if (MATCH(awk,TOK_SEMICOLON) &&
|
|
get_token (awk) <= -1) return -1;
|
|
}
|
|
else if (MATCH(awk,TOK_LBRACE))
|
|
{
|
|
/* patternless block */
|
|
if ((awk->opt.trait & QSE_AWK_PABLOCK) == 0)
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_EKWFNC);
|
|
return -1;
|
|
}
|
|
|
|
awk->parse.id.block = PARSE_ACTION_BLOCK;
|
|
if (parse_action_block (awk, QSE_NULL, 0) == QSE_NULL) return -1;
|
|
|
|
/* skip a semicolon after an action block if any */
|
|
if (MATCH(awk,TOK_SEMICOLON) &&
|
|
get_token (awk) <= -1) return -1;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
expressions
|
|
/regular expression/
|
|
pattern && pattern
|
|
pattern || pattern
|
|
!pattern
|
|
(pattern)
|
|
pattern, pattern
|
|
*/
|
|
qse_awk_nde_t* ptn;
|
|
qse_awk_loc_t eloc;
|
|
|
|
if ((awk->opt.trait & QSE_AWK_PABLOCK) == 0)
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_EKWFNC);
|
|
return -1;
|
|
}
|
|
|
|
awk->parse.id.block = PARSE_PATTERN;
|
|
|
|
eloc = awk->tok.loc;
|
|
ptn = parse_expr_withdc (awk, &eloc);
|
|
if (ptn == QSE_NULL) return -1;
|
|
|
|
QSE_ASSERT (ptn->next == QSE_NULL);
|
|
|
|
if (MATCH(awk,TOK_COMMA))
|
|
{
|
|
if (get_token (awk) <= -1)
|
|
{
|
|
qse_awk_clrpt (awk, ptn);
|
|
return -1;
|
|
}
|
|
|
|
eloc = awk->tok.loc;
|
|
ptn->next = parse_expr_withdc (awk, &eloc);
|
|
|
|
if (ptn->next == QSE_NULL)
|
|
{
|
|
qse_awk_clrpt (awk, ptn);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
if (MATCH(awk,TOK_NEWLINE) || MATCH(awk,TOK_SEMICOLON) || MATCH(awk,TOK_EOF))
|
|
{
|
|
/* blockless pattern */
|
|
int eof = MATCH(awk,TOK_EOF);
|
|
qse_awk_loc_t ploc = awk->ptok.loc;
|
|
|
|
awk->parse.id.block = PARSE_ACTION_BLOCK;
|
|
if (parse_action_block (awk, ptn, 1) == QSE_NULL)
|
|
{
|
|
qse_awk_clrpt (awk, ptn);
|
|
return -1;
|
|
}
|
|
|
|
if (!eof)
|
|
{
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
/* 'ptn' has been added to the chain.
|
|
* it doesn't have to be cleared here
|
|
* as qse_awk_clear does it */
|
|
/*qse_awk_clrpt (awk, ptn);*/
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
if ((awk->opt.trait & QSE_AWK_RIO) != QSE_AWK_RIO)
|
|
{
|
|
/* blockless pattern requires QSE_AWK_RIO
|
|
* to be ON because the implicit block is
|
|
* "print $0" */
|
|
SETERR_LOC (awk, QSE_AWK_ENOSUP, &ploc);
|
|
return -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* parse the action block */
|
|
if (!MATCH(awk,TOK_LBRACE))
|
|
{
|
|
qse_awk_clrpt (awk, ptn);
|
|
SETERR_TOK (awk, QSE_AWK_ELBRACE);
|
|
return -1;
|
|
}
|
|
|
|
awk->parse.id.block = PARSE_ACTION_BLOCK;
|
|
if (parse_action_block (awk, ptn, 0) == QSE_NULL)
|
|
{
|
|
qse_awk_clrpt (awk, ptn);
|
|
return -1;
|
|
}
|
|
|
|
/* skip a semicolon after an action block if any */
|
|
if (MATCH(awk,TOK_SEMICOLON) &&
|
|
get_token (awk) <= -1) return -1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_function (qse_awk_t* awk)
|
|
{
|
|
qse_cstr_t name;
|
|
qse_awk_nde_t* body;
|
|
qse_awk_fun_t* fun;
|
|
qse_size_t nargs, g;
|
|
qse_htb_pair_t* pair;
|
|
qse_awk_loc_t xloc;
|
|
int rederr;
|
|
|
|
/* eat up the keyword 'function' and get the next token */
|
|
QSE_ASSERT (MATCH(awk,TOK_FUNCTION));
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
|
|
/* check if an identifier is in place */
|
|
if (!MATCH(awk,TOK_IDENT))
|
|
{
|
|
/* cannot find a valid identifier for a function name */
|
|
SETERR_TOK (awk, QSE_AWK_EFUNNAM);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
name.len = QSE_STR_LEN(awk->tok.name);
|
|
name.ptr = QSE_STR_PTR(awk->tok.name);
|
|
|
|
/* note that i'm assigning to rederr in the 'if' conditions below.
|
|
* i'm not checking equality */
|
|
/* check if it is a builtin function */
|
|
if ((qse_awk_findfnc (awk, &name) != QSE_NULL && (rederr = QSE_AWK_EFNCRED)) ||
|
|
/* check if it has already been defined as a function */
|
|
(qse_htb_search (awk->tree.funs, name.ptr, name.len) != QSE_NULL && (rederr = QSE_AWK_EFUNRED)) ||
|
|
/* check if it conflicts with a named variable */
|
|
(qse_htb_search (awk->parse.named, name.ptr, name.len) != QSE_NULL && (rederr = QSE_AWK_EVARRED)) ||
|
|
/* check if it coincides to be a global variable name */
|
|
(((g = find_global (awk, &name)) != QSE_LDA_NIL) && (rederr = QSE_AWK_EGBLRED)))
|
|
{
|
|
qse_awk_seterror (awk, rederr, &name, &awk->tok.loc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
/* duplicate the name before it's overridden by get_token() */
|
|
name.ptr = qse_strxdup (name.ptr, name.len, awk->mmgr);
|
|
if (name.ptr == QSE_NULL)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ENOMEM, &awk->tok.loc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
/* get the next token */
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
/* match a left parenthesis */
|
|
if (!MATCH(awk,TOK_LPAREN))
|
|
{
|
|
/* a function name is not followed by a left parenthesis */
|
|
SETERR_TOK (awk, QSE_AWK_ELPAREN);
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
/* get the next token */
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
/* make sure that parameter table is empty */
|
|
QSE_ASSERT (QSE_LDA_SIZE(awk->parse.params) == 0);
|
|
|
|
/* read parameter list */
|
|
if (MATCH(awk,TOK_RPAREN))
|
|
{
|
|
/* no function parameter found. get the next token */
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
return QSE_NULL;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
while (1)
|
|
{
|
|
qse_char_t* pa;
|
|
qse_size_t pal;
|
|
|
|
if (!MATCH(awk,TOK_IDENT))
|
|
{
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
qse_lda_clear (awk->parse.params);
|
|
SETERR_TOK (awk, QSE_AWK_EBADPAR);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
pa = QSE_STR_PTR(awk->tok.name);
|
|
pal = QSE_STR_LEN(awk->tok.name);
|
|
|
|
/* NOTE: the following is not a conflict.
|
|
* so the parameter is not checked against
|
|
* global variables.
|
|
* global x;
|
|
* function f (x) { print x; }
|
|
* x in print x is a parameter
|
|
*/
|
|
|
|
/* check if a parameter conflicts with the function
|
|
* name or other parameters */
|
|
if (((awk->opt.trait & QSE_AWK_STRICTNAMING) &&
|
|
qse_strxncmp (pa, pal, name.ptr, name.len) == 0) ||
|
|
qse_lda_search (awk->parse.params, 0, pa, pal) != QSE_LDA_NIL)
|
|
{
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
qse_lda_clear (awk->parse.params);
|
|
SETERR_ARG_LOC (
|
|
awk, QSE_AWK_EDUPPAR,
|
|
pa, pal, &awk->tok.loc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
/* push the parameter to the parameter list */
|
|
if (QSE_LDA_SIZE(awk->parse.params) >= QSE_AWK_MAX_PARAMS)
|
|
{
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
qse_lda_clear (awk->parse.params);
|
|
SETERR_LOC (awk, QSE_AWK_EPARTM, &awk->tok.loc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
if (qse_lda_insert (
|
|
awk->parse.params,
|
|
QSE_LDA_SIZE(awk->parse.params),
|
|
pa, pal) == QSE_LDA_NIL)
|
|
{
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
qse_lda_clear (awk->parse.params);
|
|
SETERR_LOC (awk, QSE_AWK_ENOMEM, &awk->tok.loc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
if (get_token (awk) <= -1)
|
|
{
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
qse_lda_clear (awk->parse.params);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
if (MATCH(awk,TOK_RPAREN)) break;
|
|
|
|
if (!MATCH(awk,TOK_COMMA))
|
|
{
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
qse_lda_clear (awk->parse.params);
|
|
SETERR_TOK (awk, QSE_AWK_ECOMMA);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
do
|
|
{
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
qse_lda_clear (awk->parse.params);
|
|
return QSE_NULL;
|
|
}
|
|
}
|
|
while (MATCH(awk,TOK_NEWLINE));
|
|
}
|
|
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
qse_lda_clear (awk->parse.params);
|
|
return QSE_NULL;
|
|
}
|
|
}
|
|
|
|
/* function body can be placed on a different line
|
|
* from a function name and the parameters even if
|
|
* QSE_AWK_NEWLINE is set. note TOK_NEWLINE is
|
|
* available only when the option is set. */
|
|
while (MATCH(awk,TOK_NEWLINE))
|
|
{
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
qse_lda_clear (awk->parse.params);
|
|
return QSE_NULL;
|
|
}
|
|
}
|
|
|
|
/* check if the function body starts with a left brace */
|
|
if (!MATCH(awk,TOK_LBRACE))
|
|
{
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
qse_lda_clear (awk->parse.params);
|
|
|
|
SETERR_TOK (awk, QSE_AWK_ELBRACE);
|
|
return QSE_NULL;
|
|
}
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
qse_lda_clear (awk->parse.params);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
/* remember the current function name so that the body parser
|
|
* can know the name of the current function being parsed */
|
|
awk->tree.cur_fun.ptr = name.ptr;
|
|
awk->tree.cur_fun.len = name.len;
|
|
|
|
/* actual function body */
|
|
xloc = awk->ptok.loc;
|
|
body = parse_block_dc (awk, &xloc, 1);
|
|
|
|
/* clear the current function name remembered */
|
|
awk->tree.cur_fun.ptr = QSE_NULL;
|
|
awk->tree.cur_fun.len = 0;
|
|
|
|
if (body == QSE_NULL)
|
|
{
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
qse_lda_clear (awk->parse.params);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
/* TODO: study furthur if the parameter names should be saved
|
|
* for some reasons - might be needed for better deparsing output */
|
|
nargs = QSE_LDA_SIZE(awk->parse.params);
|
|
/* parameter names are not required anymore. clear them */
|
|
qse_lda_clear (awk->parse.params);
|
|
|
|
fun = (qse_awk_fun_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*fun));
|
|
if (fun == QSE_NULL)
|
|
{
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
qse_awk_clrpt (awk, body);
|
|
ADJERR_LOC (awk, &awk->tok.loc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
/*fun->name.ptr = QSE_NULL;*/ /* function name is set below */
|
|
fun->name.len = 0;
|
|
fun->nargs = nargs;
|
|
fun->body = body;
|
|
|
|
pair = qse_htb_insert (awk->tree.funs, name.ptr, name.len, fun, 0);
|
|
if (pair == QSE_NULL)
|
|
{
|
|
/* if qse_htb_insert() fails for other reasons than memory
|
|
* shortage, there should be implementaion errors as duplicate
|
|
* functions are detected earlier in this function */
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
qse_awk_clrpt (awk, body);
|
|
QSE_AWK_FREE (awk, fun);
|
|
SETERR_LOC (awk, QSE_AWK_ENOMEM, &awk->tok.loc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
/* do some trick to save a string. make it back-point at the key part
|
|
* of the pair */
|
|
fun->name.ptr = QSE_HTB_KPTR(pair);
|
|
fun->name.len = QSE_HTB_KLEN(pair);
|
|
QSE_AWK_FREE (awk, name.ptr);
|
|
|
|
/* remove an undefined function call entry from the parse.fun table */
|
|
qse_htb_delete (awk->parse.funs, fun->name.ptr, name.len);
|
|
return body;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_begin (qse_awk_t* awk)
|
|
{
|
|
qse_awk_nde_t* nde;
|
|
qse_awk_loc_t xloc = awk->tok.loc;
|
|
|
|
QSE_ASSERT (MATCH(awk,TOK_LBRACE));
|
|
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
nde = parse_block_dc (awk, &xloc, 1);
|
|
if (nde == QSE_NULL) return QSE_NULL;
|
|
|
|
if (awk->tree.begin == QSE_NULL)
|
|
{
|
|
awk->tree.begin = nde;
|
|
awk->tree.begin_tail = nde;
|
|
}
|
|
else
|
|
{
|
|
awk->tree.begin_tail->next = nde;
|
|
awk->tree.begin_tail = nde;
|
|
}
|
|
|
|
return nde;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_end (qse_awk_t* awk)
|
|
{
|
|
qse_awk_nde_t* nde;
|
|
qse_awk_loc_t xloc = awk->tok.loc;
|
|
|
|
QSE_ASSERT (MATCH(awk,TOK_LBRACE));
|
|
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
nde = parse_block_dc (awk, &xloc, 1);
|
|
if (nde == QSE_NULL) return QSE_NULL;
|
|
|
|
if (awk->tree.end == QSE_NULL)
|
|
{
|
|
awk->tree.end = nde;
|
|
awk->tree.end_tail = nde;
|
|
}
|
|
else
|
|
{
|
|
awk->tree.end_tail->next = nde;
|
|
awk->tree.end_tail = nde;
|
|
}
|
|
return nde;
|
|
}
|
|
|
|
static qse_awk_chain_t* parse_action_block (
|
|
qse_awk_t* awk, qse_awk_nde_t* ptn, int blockless)
|
|
{
|
|
qse_awk_nde_t* nde;
|
|
qse_awk_chain_t* chain;
|
|
qse_awk_loc_t xloc = awk->tok.loc;
|
|
|
|
if (blockless) nde = QSE_NULL;
|
|
else
|
|
{
|
|
QSE_ASSERT (MATCH(awk,TOK_LBRACE));
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
nde = parse_block_dc (awk, &xloc, 1);
|
|
if (nde == QSE_NULL) return QSE_NULL;
|
|
}
|
|
|
|
chain = (qse_awk_chain_t*)
|
|
QSE_AWK_ALLOC (awk, QSE_SIZEOF(qse_awk_chain_t));
|
|
if (chain == QSE_NULL)
|
|
{
|
|
qse_awk_clrpt (awk, nde);
|
|
SETERR_LOC (awk, QSE_AWK_ENOMEM, &xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
chain->pattern = ptn;
|
|
chain->action = nde;
|
|
chain->next = QSE_NULL;
|
|
|
|
if (awk->tree.chain == QSE_NULL)
|
|
{
|
|
awk->tree.chain = chain;
|
|
awk->tree.chain_tail = chain;
|
|
awk->tree.chain_size++;
|
|
}
|
|
else
|
|
{
|
|
awk->tree.chain_tail->next = chain;
|
|
awk->tree.chain_tail = chain;
|
|
awk->tree.chain_size++;
|
|
}
|
|
|
|
return chain;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_block (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc, int istop)
|
|
{
|
|
qse_awk_nde_t* head, * curr, * nde;
|
|
qse_awk_nde_blk_t* block;
|
|
qse_size_t nlcls, nlcls_max, tmp;
|
|
|
|
nlcls = QSE_LDA_SIZE(awk->parse.lcls);
|
|
nlcls_max = awk->parse.nlcls_max;
|
|
|
|
/* local variable declarations */
|
|
while (1)
|
|
{
|
|
/* skip new lines before local declaration in a block*/
|
|
while (MATCH(awk,TOK_NEWLINE))
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
}
|
|
|
|
if (!MATCH(awk,TOK_XLOCAL)) break;
|
|
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
qse_lda_delete (
|
|
awk->parse.lcls, nlcls,
|
|
QSE_LDA_SIZE(awk->parse.lcls)-nlcls);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
if (collect_locals (awk, nlcls, istop) == QSE_NULL)
|
|
{
|
|
qse_lda_delete (
|
|
awk->parse.lcls, nlcls,
|
|
QSE_LDA_SIZE(awk->parse.lcls)-nlcls);
|
|
return QSE_NULL;
|
|
}
|
|
}
|
|
|
|
/* block body */
|
|
head = QSE_NULL; curr = QSE_NULL;
|
|
|
|
while (1)
|
|
{
|
|
/* skip new lines within a block */
|
|
while (MATCH(awk,TOK_NEWLINE))
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
}
|
|
|
|
/* if EOF is met before the right brace, this is an error */
|
|
if (MATCH(awk,TOK_EOF))
|
|
{
|
|
qse_lda_delete (
|
|
awk->parse.lcls, nlcls,
|
|
QSE_LDA_SIZE(awk->parse.lcls) - nlcls);
|
|
if (head != QSE_NULL) qse_awk_clrpt (awk, head);
|
|
SETERR_LOC (awk, QSE_AWK_EEOF, &awk->tok.loc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
/* end the block when the right brace is met */
|
|
if (MATCH(awk,TOK_RBRACE))
|
|
{
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
qse_lda_delete (
|
|
awk->parse.lcls, nlcls,
|
|
QSE_LDA_SIZE(awk->parse.lcls)-nlcls);
|
|
if (head != QSE_NULL) qse_awk_clrpt (awk, head);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
/* parse an actual statement in a block */
|
|
{
|
|
qse_awk_loc_t sloc = awk->tok.loc;
|
|
nde = parse_statement (awk, &sloc);
|
|
}
|
|
|
|
if (nde == QSE_NULL)
|
|
{
|
|
qse_lda_delete (
|
|
awk->parse.lcls, nlcls,
|
|
QSE_LDA_SIZE(awk->parse.lcls)-nlcls);
|
|
if (head != QSE_NULL) qse_awk_clrpt (awk, head);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
/* remove unnecessary statements such as adjacent
|
|
* null statements */
|
|
if (nde->type == QSE_AWK_NDE_NULL)
|
|
{
|
|
qse_awk_clrpt (awk, nde);
|
|
continue;
|
|
}
|
|
if (nde->type == QSE_AWK_NDE_BLK &&
|
|
((qse_awk_nde_blk_t*)nde)->body == QSE_NULL)
|
|
{
|
|
qse_awk_clrpt (awk, nde);
|
|
continue;
|
|
}
|
|
|
|
if (curr == QSE_NULL) head = nde;
|
|
else curr->next = nde;
|
|
curr = nde;
|
|
}
|
|
|
|
block = (qse_awk_nde_blk_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*block));
|
|
if (block == QSE_NULL)
|
|
{
|
|
qse_lda_delete (
|
|
awk->parse.lcls, nlcls,
|
|
QSE_LDA_SIZE(awk->parse.lcls)-nlcls);
|
|
qse_awk_clrpt (awk, head);
|
|
ADJERR_LOC (awk, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
tmp = QSE_LDA_SIZE(awk->parse.lcls);
|
|
if (tmp > awk->parse.nlcls_max) awk->parse.nlcls_max = tmp;
|
|
|
|
/* remove all lcls to move it up to the top level */
|
|
qse_lda_delete (awk->parse.lcls, nlcls, tmp - nlcls);
|
|
|
|
/* adjust the number of lcls for a block without any statements */
|
|
/* if (head == QSE_NULL) tmp = 0; */
|
|
|
|
block->type = QSE_AWK_NDE_BLK;
|
|
block->loc = *xloc;
|
|
block->body = head;
|
|
|
|
/* TODO: not only local variables but also nested blocks,
|
|
unless it is part of other constructs such as if, can be promoted
|
|
and merged to top-level block */
|
|
|
|
/* migrate all block-local variables to a top-level block */
|
|
if (istop)
|
|
{
|
|
block->nlcls = awk->parse.nlcls_max - nlcls;
|
|
awk->parse.nlcls_max = nlcls_max;
|
|
}
|
|
else
|
|
{
|
|
/*block->nlcls = tmp - nlcls;*/
|
|
block->nlcls = 0;
|
|
}
|
|
|
|
return (qse_awk_nde_t*)block;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_block_dc (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc, int istop)
|
|
{
|
|
qse_awk_nde_t* nde;
|
|
|
|
if (awk->opt.depth.s.block_parse > 0 &&
|
|
awk->parse.depth.block >= awk->opt.depth.s.block_parse)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_EBLKNST, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
awk->parse.depth.block++;
|
|
nde = parse_block (awk, xloc, istop);
|
|
awk->parse.depth.block--;
|
|
|
|
return nde;
|
|
}
|
|
|
|
int qse_awk_initgbls (qse_awk_t* awk)
|
|
{
|
|
int id;
|
|
|
|
/* qse_awk_initgbls is not generic-purpose. call this from
|
|
* qse_awk_open only. */
|
|
QSE_ASSERT (awk->tree.ngbls_base == 0 && awk->tree.ngbls == 0);
|
|
|
|
awk->tree.ngbls_base = 0;
|
|
awk->tree.ngbls = 0;
|
|
|
|
for (id = QSE_AWK_MIN_GBL_ID; id <= QSE_AWK_MAX_GBL_ID; id++)
|
|
{
|
|
qse_size_t g;
|
|
|
|
g = qse_lda_insert (
|
|
awk->parse.gbls,
|
|
QSE_LDA_SIZE(awk->parse.gbls),
|
|
(qse_char_t*)gtab[id].name,
|
|
gtab[id].namelen);
|
|
if (g == QSE_LDA_NIL) return -1;
|
|
|
|
QSE_ASSERT ((int)g == id);
|
|
|
|
awk->tree.ngbls_base++;
|
|
awk->tree.ngbls++;
|
|
}
|
|
|
|
QSE_ASSERT (awk->tree.ngbls_base ==
|
|
QSE_AWK_MAX_GBL_ID-QSE_AWK_MIN_GBL_ID+1);
|
|
return 0;
|
|
}
|
|
|
|
static void adjust_static_globals (qse_awk_t* awk)
|
|
{
|
|
int id;
|
|
|
|
QSE_ASSERT (awk->tree.ngbls_base >=
|
|
QSE_AWK_MAX_GBL_ID - QSE_AWK_MAX_GBL_ID + 1);
|
|
|
|
for (id = QSE_AWK_MIN_GBL_ID; id <= QSE_AWK_MAX_GBL_ID; id++)
|
|
{
|
|
if ((awk->opt.trait & gtab[id].trait) != gtab[id].trait)
|
|
{
|
|
QSE_LDA_DLEN(awk->parse.gbls,id) = 0;
|
|
}
|
|
else
|
|
{
|
|
QSE_LDA_DLEN(awk->parse.gbls,id) = gtab[id].namelen;
|
|
}
|
|
}
|
|
}
|
|
|
|
static qse_size_t get_global (qse_awk_t* awk, const qse_cstr_t* name)
|
|
{
|
|
qse_size_t i;
|
|
qse_lda_t* gbls = awk->parse.gbls;
|
|
|
|
for (i = QSE_LDA_SIZE(gbls); i > 0; )
|
|
{
|
|
i--;
|
|
|
|
if (qse_strxncmp (
|
|
QSE_LDA_DPTR(gbls,i), QSE_LDA_DLEN(gbls,i),
|
|
name->ptr, name->len) == 0) return i;
|
|
}
|
|
|
|
return QSE_LDA_NIL;
|
|
}
|
|
|
|
static qse_size_t find_global (qse_awk_t* awk, const qse_cstr_t* name)
|
|
{
|
|
qse_size_t i;
|
|
qse_lda_t* gbls = awk->parse.gbls;
|
|
|
|
for (i = 0; i < QSE_LDA_SIZE(gbls); i++)
|
|
{
|
|
if (qse_strxncmp (
|
|
QSE_LDA_DPTR(gbls,i), QSE_LDA_DLEN(gbls,i),
|
|
name->ptr, name->len) == 0) return i;
|
|
}
|
|
|
|
return QSE_LDA_NIL;
|
|
}
|
|
|
|
static int add_global (
|
|
qse_awk_t* awk, const qse_cstr_t* name,
|
|
qse_awk_loc_t* xloc, int disabled)
|
|
{
|
|
qse_size_t ngbls;
|
|
|
|
/* check if it is a keyword */
|
|
if (classify_ident (awk, name) != TOK_IDENT)
|
|
{
|
|
SETERR_ARG_LOC (awk, QSE_AWK_EKWRED, name->ptr, name->len, xloc);
|
|
return -1;
|
|
}
|
|
|
|
/* check if it conflicts with a builtin function name */
|
|
if (qse_awk_findfnc (awk, name) != QSE_NULL)
|
|
{
|
|
SETERR_ARG_LOC (awk, QSE_AWK_EFNCRED, name->ptr, name->len, xloc);
|
|
return -1;
|
|
}
|
|
|
|
/* check if it conflicts with a function name */
|
|
if (qse_htb_search (awk->tree.funs, name->ptr, name->len) != QSE_NULL)
|
|
{
|
|
SETERR_ARG_LOC (awk, QSE_AWK_EFUNRED, name->ptr, name->len, xloc);
|
|
return -1;
|
|
}
|
|
|
|
/* check if it conflicts with a function name
|
|
* caught in the function call table */
|
|
if (qse_htb_search (awk->parse.funs, name->ptr, name->len) != QSE_NULL)
|
|
{
|
|
SETERR_ARG_LOC (awk, QSE_AWK_EFUNRED, name->ptr, name->len, xloc);
|
|
return -1;
|
|
}
|
|
|
|
/* check if it conflicts with other global variable names */
|
|
if (find_global (awk, name) != QSE_LDA_NIL)
|
|
{
|
|
SETERR_ARG_LOC (awk, QSE_AWK_EDUPGBL, name->ptr, name->len, xloc);
|
|
return -1;
|
|
}
|
|
|
|
#if 0
|
|
/* TODO: need to check if it conflicts with a named variable to
|
|
* disallow such a program shown below (IMPLICIT & EXPLICIT on)
|
|
* BEGIN {X=20; x(); x(); x(); print X}
|
|
* global X;
|
|
* function x() { print X++; }
|
|
*/
|
|
if (qse_htb_search (awk->parse.named, name, len) != QSE_NULL)
|
|
{
|
|
SETERR_ARG_LOC (awk, QSE_AWK_EVARRED, name, len, xloc);
|
|
return -1;
|
|
}
|
|
#endif
|
|
|
|
ngbls = QSE_LDA_SIZE (awk->parse.gbls);
|
|
if (ngbls >= QSE_AWK_MAX_GBLS)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_EGBLTM, xloc);
|
|
return -1;
|
|
}
|
|
|
|
if (qse_lda_insert (awk->parse.gbls,
|
|
QSE_LDA_SIZE(awk->parse.gbls),
|
|
(qse_char_t*)name->ptr, name->len) == QSE_LDA_NIL)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ENOMEM, xloc);
|
|
return -1;
|
|
}
|
|
|
|
QSE_ASSERT (ngbls == QSE_LDA_SIZE(awk->parse.gbls) - 1);
|
|
|
|
/* the disabled item is inserted normally but
|
|
* the name length is reset to zero. */
|
|
if (disabled) QSE_LDA_DLEN(awk->parse.gbls,ngbls) = 0;
|
|
|
|
awk->tree.ngbls = QSE_LDA_SIZE (awk->parse.gbls);
|
|
QSE_ASSERT (ngbls == awk->tree.ngbls-1);
|
|
|
|
/* return the id which is the index to the gbl table. */
|
|
return (int)ngbls;
|
|
}
|
|
|
|
int qse_awk_addgbl (qse_awk_t* awk, const qse_char_t* name)
|
|
{
|
|
int n;
|
|
qse_cstr_t ncs;
|
|
|
|
ncs.ptr = (qse_char_t*)name;
|
|
ncs.len = qse_strlen(name);;
|
|
if (ncs.len <= 0)
|
|
{
|
|
SETERR_COD (awk, QSE_AWK_EINVAL);
|
|
return -1;
|
|
}
|
|
|
|
if (awk->tree.ngbls > awk->tree.ngbls_base)
|
|
{
|
|
/* this function is not allowed after qse_awk_parse is called */
|
|
SETERR_COD (awk, QSE_AWK_ENOPER);
|
|
return -1;
|
|
}
|
|
|
|
n = add_global (awk, &ncs, QSE_NULL, 0);
|
|
|
|
/* update the count of the static globals.
|
|
* the total global count has been updated inside add_global. */
|
|
if (n >= 0) awk->tree.ngbls_base++;
|
|
|
|
return n;
|
|
}
|
|
|
|
#define QSE_AWK_NUM_STATIC_GBLS \
|
|
(QSE_AWK_MAX_GBL_ID-QSE_AWK_MIN_GBL_ID+1)
|
|
|
|
int qse_awk_delgbl (qse_awk_t* awk, const qse_char_t* name)
|
|
{
|
|
qse_size_t n;
|
|
qse_cstr_t ncs;
|
|
|
|
ncs.ptr = (qse_char_t*)name;
|
|
ncs.len = qse_strlen (name);
|
|
|
|
if (awk->tree.ngbls > awk->tree.ngbls_base)
|
|
{
|
|
/* this function is not allow after qse_awk_parse is called */
|
|
qse_awk_seterrnum (awk, QSE_AWK_ENOPER, QSE_NULL);
|
|
return -1;
|
|
}
|
|
|
|
n = qse_lda_search (awk->parse.gbls,
|
|
QSE_AWK_NUM_STATIC_GBLS, ncs.ptr, ncs.len);
|
|
if (n == QSE_LDA_NIL)
|
|
{
|
|
qse_awk_seterrnum (awk, QSE_AWK_ENOENT, &ncs);
|
|
return -1;
|
|
}
|
|
|
|
/* invalidate the name if deletion is requested.
|
|
* this approach does not delete the entry.
|
|
* if qse_delgbl() is called with the same name
|
|
* again, the entry will be appended again.
|
|
* never call this funciton unless it is really required. */
|
|
/*
|
|
awk->parse.gbls.buf[n].name.ptr[0] = QSE_T('\0');
|
|
awk->parse.gbls.buf[n].name.len = 0;
|
|
*/
|
|
n = qse_lda_uplete (awk->parse.gbls, n, 1);
|
|
QSE_ASSERT (n == 1);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int qse_awk_findgbl (qse_awk_t* awk, const qse_char_t* name)
|
|
{
|
|
qse_size_t n;
|
|
qse_cstr_t ncs;
|
|
|
|
ncs.ptr = (qse_char_t*)name;
|
|
ncs.len = qse_strlen (name);
|
|
|
|
n = qse_lda_search (awk->parse.gbls,
|
|
QSE_AWK_NUM_STATIC_GBLS, ncs.ptr, ncs.len);
|
|
if (n == QSE_LDA_NIL)
|
|
{
|
|
qse_awk_seterrnum (awk, QSE_AWK_ENOENT, &ncs);
|
|
return -1;
|
|
}
|
|
|
|
return (int)n;
|
|
}
|
|
|
|
static qse_awk_t* collect_globals (qse_awk_t* awk)
|
|
{
|
|
if (MATCH(awk,TOK_NEWLINE))
|
|
{
|
|
/* special check if the first name is on the
|
|
* same line when QSE_AWK_NEWLINE is on */
|
|
SETERR_COD (awk, QSE_AWK_EVARMS);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
while (1)
|
|
{
|
|
if (!MATCH(awk,TOK_IDENT))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_EBADVAR);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
if (add_global (
|
|
awk, QSE_STR_XSTR(awk->tok.name),
|
|
&awk->tok.loc, 0) <= -1) return QSE_NULL;
|
|
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
|
|
if (MATCH_TERMINATOR_NORMAL(awk))
|
|
{
|
|
/* skip a terminator (;, <NL>) */
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* unlike collect_locals(), the right brace cannot
|
|
* terminate a global declaration as it can never be
|
|
* placed within a block.
|
|
* so do not perform MATCH_TERMINATOR_RBRACE(awk))
|
|
*/
|
|
|
|
if (!MATCH(awk,TOK_COMMA))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ECOMMA);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
do
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
}
|
|
while (MATCH(awk,TOK_NEWLINE));
|
|
}
|
|
|
|
return awk;
|
|
}
|
|
|
|
static qse_awk_t* collect_locals (
|
|
qse_awk_t* awk, qse_size_t nlcls, int istop)
|
|
{
|
|
if (MATCH(awk,TOK_NEWLINE))
|
|
{
|
|
/* special check if the first name is on the
|
|
* same line when QSE_AWK_NEWLINE is on */
|
|
SETERR_COD (awk, QSE_AWK_EVARMS);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
while (1)
|
|
{
|
|
qse_cstr_t lcl;
|
|
qse_size_t n;
|
|
|
|
if (!MATCH(awk,TOK_IDENT))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_EBADVAR);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
lcl = *QSE_STR_XSTR(awk->tok.name);
|
|
|
|
/* check if it conflicts with a builtin function name
|
|
* function f() { local length; } */
|
|
if (qse_awk_findfnc (awk, &lcl) != QSE_NULL)
|
|
{
|
|
SETERR_ARG_LOC (
|
|
awk, QSE_AWK_EFNCRED,
|
|
lcl.ptr, lcl.len, &awk->tok.loc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
if (istop)
|
|
{
|
|
/* check if it conflicts with a parameter name.
|
|
* the first level declaration is treated as the same
|
|
* scope as the parameter list */
|
|
n = qse_lda_search (
|
|
awk->parse.params, 0, lcl.ptr, lcl.len);
|
|
if (n != QSE_LDA_NIL)
|
|
{
|
|
SETERR_ARG_LOC (
|
|
awk, QSE_AWK_EPARRED,
|
|
lcl.ptr, lcl.len, &awk->tok.loc);
|
|
return QSE_NULL;
|
|
}
|
|
}
|
|
|
|
if (awk->opt.trait & QSE_AWK_STRICTNAMING)
|
|
{
|
|
/* check if it conflicts with the owning function */
|
|
if (awk->tree.cur_fun.ptr != QSE_NULL)
|
|
{
|
|
if (qse_strxncmp (
|
|
lcl.ptr, lcl.len,
|
|
awk->tree.cur_fun.ptr,
|
|
awk->tree.cur_fun.len) == 0)
|
|
{
|
|
SETERR_ARG_LOC (
|
|
awk, QSE_AWK_EFUNRED,
|
|
lcl.ptr, lcl.len, &awk->tok.loc);
|
|
return QSE_NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* check if it conflicts with other local variable names */
|
|
n = qse_lda_search (awk->parse.lcls, nlcls, lcl.ptr, lcl.len);
|
|
if (n != QSE_LDA_NIL)
|
|
{
|
|
SETERR_ARG_LOC (
|
|
awk, QSE_AWK_EDUPLCL,
|
|
lcl.ptr, lcl.len, &awk->tok.loc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
/* check if it conflicts with global variable names */
|
|
n = find_global (awk, &lcl);
|
|
if (n != QSE_LDA_NIL)
|
|
{
|
|
if (n < awk->tree.ngbls_base)
|
|
{
|
|
/* it is a conflict only if it is one of a
|
|
* static global variable */
|
|
SETERR_ARG_LOC (
|
|
awk, QSE_AWK_EDUPLCL,
|
|
lcl.ptr, lcl.len, &awk->tok.loc);
|
|
return QSE_NULL;
|
|
}
|
|
}
|
|
|
|
if (QSE_LDA_SIZE(awk->parse.lcls) >= QSE_AWK_MAX_LCLS)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ELCLTM, &awk->tok.loc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
if (qse_lda_insert (
|
|
awk->parse.lcls,
|
|
QSE_LDA_SIZE(awk->parse.lcls),
|
|
lcl.ptr, lcl.len) == QSE_LDA_NIL)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ENOMEM, &awk->tok.loc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
|
|
if (MATCH_TERMINATOR_NORMAL(awk))
|
|
{
|
|
/* skip the terminator (;, <NL>) */
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
break;
|
|
}
|
|
|
|
if (MATCH_TERMINATOR_RBRACE(awk))
|
|
{
|
|
/* should not skip } */
|
|
break;
|
|
}
|
|
|
|
if (!MATCH(awk,TOK_COMMA))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ECOMMA);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
do
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
}
|
|
while (MATCH(awk,TOK_NEWLINE));
|
|
}
|
|
|
|
return awk;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_if (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_t* test = QSE_NULL;
|
|
qse_awk_nde_t* then_part = QSE_NULL;
|
|
qse_awk_nde_t* else_part = QSE_NULL;
|
|
qse_awk_nde_if_t* nde;
|
|
qse_awk_loc_t eloc, tloc;
|
|
|
|
if (!MATCH(awk,TOK_LPAREN))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ELPAREN);
|
|
return QSE_NULL;
|
|
}
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
|
|
eloc = awk->tok.loc;
|
|
test = parse_expr_withdc (awk, &eloc);
|
|
if (test == QSE_NULL) goto oops;
|
|
|
|
if (!MATCH(awk,TOK_RPAREN))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ERPAREN);
|
|
goto oops;
|
|
}
|
|
|
|
/* TODO: optimization. if you know 'tese' evaluates to true or false,
|
|
* you can drop the 'if' statement and take either the 'then_part'
|
|
* or 'else_part'. */
|
|
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
tloc = awk->tok.loc;
|
|
then_part = parse_statement (awk, &tloc);
|
|
if (then_part == QSE_NULL) goto oops;
|
|
|
|
/* skip any new lines before the else block */
|
|
while (MATCH(awk,TOK_NEWLINE))
|
|
{
|
|
if (get_token(awk) <= -1) goto oops;
|
|
}
|
|
|
|
if (MATCH(awk,TOK_ELSE))
|
|
{
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
{
|
|
qse_awk_loc_t eloc = awk->tok.loc;
|
|
else_part = parse_statement (awk, &eloc);
|
|
if (else_part == QSE_NULL) goto oops;
|
|
}
|
|
}
|
|
|
|
nde = (qse_awk_nde_if_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
goto oops;
|
|
}
|
|
|
|
nde->type = QSE_AWK_NDE_IF;
|
|
nde->loc = *xloc;
|
|
nde->test = test;
|
|
nde->then_part = then_part;
|
|
nde->else_part = else_part;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
|
|
oops:
|
|
if (else_part) qse_awk_clrpt (awk, else_part);
|
|
if (then_part) qse_awk_clrpt (awk, then_part);
|
|
if (test) qse_awk_clrpt (awk, test);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_while (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_t* test = QSE_NULL;
|
|
qse_awk_nde_t* body = QSE_NULL;
|
|
qse_awk_nde_while_t* nde;
|
|
qse_awk_loc_t ploc;
|
|
|
|
if (!MATCH(awk,TOK_LPAREN))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ELPAREN);
|
|
goto oops;
|
|
}
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
ploc = awk->tok.loc;
|
|
test = parse_expr_withdc (awk, &ploc);
|
|
if (test == QSE_NULL) goto oops;
|
|
|
|
if (!MATCH(awk,TOK_RPAREN))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ERPAREN);
|
|
goto oops;
|
|
}
|
|
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
ploc = awk->tok.loc;
|
|
body = parse_statement (awk, &ploc);
|
|
if (body == QSE_NULL) goto oops;
|
|
|
|
nde = (qse_awk_nde_while_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
goto oops;
|
|
}
|
|
|
|
nde->type = QSE_AWK_NDE_WHILE;
|
|
nde->loc = *xloc;
|
|
nde->test = test;
|
|
nde->body = body;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
|
|
oops:
|
|
if (body) qse_awk_clrpt (awk, body);
|
|
if (test) qse_awk_clrpt (awk, test);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_for (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_t* init = QSE_NULL, * test = QSE_NULL;
|
|
qse_awk_nde_t* incr = QSE_NULL, * body = QSE_NULL;
|
|
qse_awk_nde_for_t* nde_for;
|
|
qse_awk_nde_foreach_t* nde_foreach;
|
|
qse_awk_loc_t ploc;
|
|
|
|
if (!MATCH(awk,TOK_LPAREN))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ELPAREN);
|
|
return QSE_NULL;
|
|
}
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
|
|
if (!MATCH(awk,TOK_SEMICOLON))
|
|
{
|
|
/* this line is very ugly. it checks the entire next
|
|
* expression or the first element in the expression
|
|
* is wrapped by a parenthesis */
|
|
int no_foreach = MATCH(awk,TOK_LPAREN);
|
|
|
|
ploc = awk->tok.loc;
|
|
init = parse_expr_withdc (awk, &ploc);
|
|
if (init == QSE_NULL) goto oops;
|
|
|
|
if (!no_foreach && init->type == QSE_AWK_NDE_EXP_BIN &&
|
|
((qse_awk_nde_exp_t*)init)->opcode == QSE_AWK_BINOP_IN &&
|
|
is_plain_var(((qse_awk_nde_exp_t*)init)->left))
|
|
{
|
|
/* switch to foreach - for (x in y) */
|
|
|
|
if (!MATCH(awk,TOK_RPAREN))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ERPAREN);
|
|
goto oops;
|
|
}
|
|
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
ploc = awk->tok.loc;
|
|
body = parse_statement (awk, &ploc);
|
|
if (body == QSE_NULL) goto oops;
|
|
|
|
nde_foreach = (qse_awk_nde_foreach_t*) qse_awk_callocmem (
|
|
awk, QSE_SIZEOF(*nde_foreach));
|
|
if (nde_foreach == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
goto oops;
|
|
}
|
|
|
|
nde_foreach->type = QSE_AWK_NDE_FOREACH;
|
|
nde_foreach->loc = *xloc;
|
|
nde_foreach->test = init;
|
|
nde_foreach->body = body;
|
|
|
|
return (qse_awk_nde_t*)nde_foreach;
|
|
}
|
|
|
|
if (!MATCH(awk,TOK_SEMICOLON))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ESCOLON);
|
|
goto oops;
|
|
}
|
|
}
|
|
|
|
do
|
|
{
|
|
if (get_token(awk) <= -1) goto oops;
|
|
/* skip new lines after the first semicolon */
|
|
}
|
|
while (MATCH(awk,TOK_NEWLINE));
|
|
|
|
if (!MATCH(awk,TOK_SEMICOLON))
|
|
{
|
|
ploc = awk->tok.loc;
|
|
test = parse_expr_withdc (awk, &ploc);
|
|
if (test == QSE_NULL) goto oops;
|
|
|
|
if (!MATCH(awk,TOK_SEMICOLON))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ESCOLON);
|
|
goto oops;
|
|
}
|
|
}
|
|
|
|
do
|
|
{
|
|
if (get_token(awk) <= -1) goto oops;
|
|
/* skip new lines after the second semicolon */
|
|
}
|
|
while (MATCH(awk,TOK_NEWLINE));
|
|
|
|
if (!MATCH(awk,TOK_RPAREN))
|
|
{
|
|
{
|
|
qse_awk_loc_t eloc = awk->tok.loc;
|
|
incr = parse_expr_withdc (awk, &eloc);
|
|
if (incr == QSE_NULL) goto oops;
|
|
}
|
|
|
|
if (!MATCH(awk,TOK_RPAREN))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ERPAREN);
|
|
goto oops;
|
|
}
|
|
}
|
|
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
ploc = awk->tok.loc;
|
|
body = parse_statement (awk, &ploc);
|
|
if (body == QSE_NULL) goto oops;
|
|
|
|
nde_for = (qse_awk_nde_for_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde_for));
|
|
if (nde_for == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
goto oops;
|
|
}
|
|
|
|
nde_for->type = QSE_AWK_NDE_FOR;
|
|
nde_for->loc = *xloc;
|
|
nde_for->init = init;
|
|
nde_for->test = test;
|
|
nde_for->incr = incr;
|
|
nde_for->body = body;
|
|
|
|
return (qse_awk_nde_t*)nde_for;
|
|
|
|
oops:
|
|
if (init) qse_awk_clrpt (awk, init);
|
|
if (test) qse_awk_clrpt (awk, test);
|
|
if (incr) qse_awk_clrpt (awk, incr);
|
|
if (body) qse_awk_clrpt (awk, body);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_dowhile (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_t* test = QSE_NULL;
|
|
qse_awk_nde_t* body = QSE_NULL;
|
|
qse_awk_nde_while_t* nde;
|
|
qse_awk_loc_t ploc;
|
|
|
|
QSE_ASSERT (awk->ptok.type == TOK_DO);
|
|
|
|
ploc = awk->tok.loc;
|
|
body = parse_statement (awk, &ploc);
|
|
if (body == QSE_NULL) goto oops;
|
|
|
|
while (MATCH(awk,TOK_NEWLINE))
|
|
{
|
|
if (get_token(awk) <= -1) goto oops;
|
|
}
|
|
|
|
if (!MATCH(awk,TOK_WHILE))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_EKWWHL);
|
|
goto oops;
|
|
}
|
|
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
if (!MATCH(awk,TOK_LPAREN))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ELPAREN);
|
|
goto oops;
|
|
}
|
|
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
ploc = awk->tok.loc;
|
|
test = parse_expr_withdc (awk, &ploc);
|
|
if (test == QSE_NULL) goto oops;
|
|
|
|
if (!MATCH(awk,TOK_RPAREN))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ERPAREN);
|
|
goto oops;
|
|
}
|
|
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
nde = (qse_awk_nde_while_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
goto oops;
|
|
}
|
|
|
|
nde->type = QSE_AWK_NDE_DOWHILE;
|
|
nde->loc = *xloc;
|
|
nde->test = test;
|
|
nde->body = body;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
|
|
oops:
|
|
if (body) qse_awk_clrpt (awk, body);
|
|
if (test) qse_awk_clrpt (awk, test);
|
|
QSE_ASSERT (nde == QSE_NULL);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_break (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_break_t* nde;
|
|
|
|
QSE_ASSERT (awk->ptok.type == TOK_BREAK);
|
|
if (awk->parse.depth.loop <= 0)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_EBREAK, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde = (qse_awk_nde_break_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde->type = QSE_AWK_NDE_BREAK;
|
|
nde->loc = *xloc;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_continue (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_continue_t* nde;
|
|
|
|
QSE_ASSERT (awk->ptok.type == TOK_CONTINUE);
|
|
if (awk->parse.depth.loop <= 0)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ECONTINUE, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde = (qse_awk_nde_continue_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde->type = QSE_AWK_NDE_CONTINUE;
|
|
nde->loc = *xloc;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_return (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_return_t* nde;
|
|
qse_awk_nde_t* val;
|
|
|
|
QSE_ASSERT (awk->ptok.type == TOK_RETURN);
|
|
|
|
nde = (qse_awk_nde_return_t*) qse_awk_callocmem ( awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde->type = QSE_AWK_NDE_RETURN;
|
|
nde->loc = *xloc;
|
|
|
|
if (MATCH_TERMINATOR(awk))
|
|
{
|
|
/* no return value */
|
|
val = QSE_NULL;
|
|
}
|
|
else
|
|
{
|
|
qse_awk_loc_t eloc = awk->tok.loc;
|
|
val = parse_expr_withdc (awk, &eloc);
|
|
if (val == QSE_NULL)
|
|
{
|
|
QSE_AWK_FREE (awk, nde);
|
|
return QSE_NULL;
|
|
}
|
|
}
|
|
|
|
nde->val = val;
|
|
return (qse_awk_nde_t*)nde;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_exit (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_exit_t* nde;
|
|
qse_awk_nde_t* val;
|
|
|
|
QSE_ASSERT (awk->ptok.type == TOK_EXIT || awk->ptok.type == TOK_XABORT);
|
|
|
|
nde = (qse_awk_nde_exit_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde->type = QSE_AWK_NDE_EXIT;
|
|
nde->loc = *xloc;
|
|
nde->abort = (awk->ptok.type == TOK_XABORT);
|
|
|
|
if (MATCH_TERMINATOR(awk))
|
|
{
|
|
/* no exit code */
|
|
val = QSE_NULL;
|
|
}
|
|
else
|
|
{
|
|
qse_awk_loc_t eloc = awk->tok.loc;
|
|
val = parse_expr_withdc (awk, &eloc);
|
|
if (val == QSE_NULL)
|
|
{
|
|
QSE_AWK_FREE (awk, nde);
|
|
return QSE_NULL;
|
|
}
|
|
}
|
|
|
|
nde->val = val;
|
|
return (qse_awk_nde_t*)nde;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_next (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_next_t* nde;
|
|
|
|
QSE_ASSERT (awk->ptok.type == TOK_NEXT);
|
|
|
|
if (awk->parse.id.block == PARSE_BEGIN_BLOCK)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ENEXTBEG, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
if (awk->parse.id.block == PARSE_END_BLOCK)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ENEXTEND, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde = (qse_awk_nde_next_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
nde->type = QSE_AWK_NDE_NEXT;
|
|
nde->loc = *xloc;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_nextfile (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc, int out)
|
|
{
|
|
qse_awk_nde_nextfile_t* nde;
|
|
|
|
if (!out && awk->parse.id.block == PARSE_BEGIN_BLOCK)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ENEXTFBEG, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
if (!out && awk->parse.id.block == PARSE_END_BLOCK)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ENEXTFEND, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde = (qse_awk_nde_nextfile_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde->type = QSE_AWK_NDE_NEXTFILE;
|
|
nde->loc = *xloc;
|
|
nde->out = out;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_delete (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_delete_t* nde;
|
|
qse_awk_nde_t* var = QSE_NULL;
|
|
qse_awk_loc_t dloc;
|
|
qse_awk_nde_type_t type;
|
|
int inparen = 0;
|
|
|
|
QSE_ASSERT (awk->ptok.type == TOK_DELETE ||
|
|
awk->ptok.type == TOK_XRESET);
|
|
|
|
type = (awk->ptok.type == TOK_DELETE)?
|
|
QSE_AWK_NDE_DELETE: QSE_AWK_NDE_RESET;
|
|
|
|
if (MATCH(awk,TOK_LPAREN))
|
|
{
|
|
if (get_token(awk) <= -1) goto oops;
|
|
inparen = 1;
|
|
}
|
|
|
|
if (!MATCH(awk,TOK_IDENT))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_EIDENT);
|
|
goto oops;
|
|
}
|
|
|
|
dloc = awk->tok.loc;
|
|
var = parse_primary_ident (awk, &dloc);
|
|
if (var == QSE_NULL) goto oops;
|
|
|
|
if ((type == QSE_AWK_NDE_DELETE && !is_var (var)) ||
|
|
(type == QSE_AWK_NDE_RESET && !is_plain_var (var)))
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_EBADARG, &dloc);
|
|
goto oops;
|
|
}
|
|
|
|
if (inparen)
|
|
{
|
|
if (!MATCH(awk,TOK_RPAREN))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ERPAREN);
|
|
goto oops;
|
|
}
|
|
|
|
if (get_token(awk) <= -1) goto oops;
|
|
}
|
|
|
|
nde = (qse_awk_nde_delete_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
goto oops;
|
|
}
|
|
|
|
nde->type = type;
|
|
nde->loc = *xloc;
|
|
nde->var = var;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
|
|
oops:
|
|
if (var) qse_awk_clrpt (awk, var);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_print (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_print_t* nde;
|
|
qse_awk_nde_t* args = QSE_NULL;
|
|
qse_awk_nde_t* out = QSE_NULL;
|
|
int out_type;
|
|
qse_awk_nde_type_t type;
|
|
qse_awk_loc_t eloc;
|
|
|
|
QSE_ASSERT (awk->ptok.type == TOK_PRINT ||
|
|
awk->ptok.type == TOK_PRINTF);
|
|
|
|
type = (awk->ptok.type == TOK_PRINT)?
|
|
QSE_AWK_NDE_PRINT: QSE_AWK_NDE_PRINTF;
|
|
|
|
if (!MATCH_TERMINATOR(awk) &&
|
|
!MATCH(awk,TOK_GT) &&
|
|
!MATCH(awk,TOK_RS) &&
|
|
!MATCH(awk,TOK_BOR) &&
|
|
!MATCH(awk,TOK_LOR))
|
|
{
|
|
qse_awk_nde_t* args_tail;
|
|
qse_awk_nde_t* tail_prev;
|
|
|
|
int in_parens = 0, gm_in_parens = 0;
|
|
qse_size_t opening_lparen_seq;
|
|
|
|
if (MATCH(awk,TOK_LPAREN))
|
|
{
|
|
/* just remember the sequence number of the left
|
|
* parenthesis before calling parse_expr_withdc()
|
|
* that eventually calls parse_primary_lparen() */
|
|
opening_lparen_seq = awk->parse.lparen_seq;
|
|
in_parens = 1; /* maybe. not confirmed yet */
|
|
|
|
/* print and printf provide weird syntaxs.
|
|
*
|
|
* 1. print 10, 20;
|
|
* 2. print (10, 20);
|
|
* 3. print (10,20,30) in a;
|
|
* 4. print ((10,20,30) in a);
|
|
*
|
|
* Due to case 3, i can't consume LPAREN
|
|
* here and expect RPAREN later.
|
|
*/
|
|
}
|
|
|
|
eloc = awk->tok.loc;
|
|
args = parse_expr_withdc (awk, &eloc);
|
|
if (args == QSE_NULL) goto oops;
|
|
|
|
args_tail = args;
|
|
tail_prev = QSE_NULL;
|
|
|
|
if (args->type != QSE_AWK_NDE_GRP)
|
|
{
|
|
/* args->type == QSE_AWK_NDE_GRP when print (a, b, c)
|
|
* args->type != QSE_AWK_NDE_GRP when print a, b, c */
|
|
qse_size_t group_opening_lparen_seq;
|
|
|
|
while (MATCH(awk,TOK_COMMA))
|
|
{
|
|
do
|
|
{
|
|
if (get_token(awk) <= -1) goto oops;
|
|
}
|
|
while (MATCH(awk,TOK_NEWLINE));
|
|
|
|
/* if it's grouped, i must check if the last group member
|
|
* is enclosed in parentheses.
|
|
*
|
|
* i set the condition to false whenever i see
|
|
* a new group member. */
|
|
gm_in_parens = 0;
|
|
if (MATCH(awk,TOK_LPAREN))
|
|
{
|
|
group_opening_lparen_seq = awk->parse.lparen_seq;
|
|
gm_in_parens = 1; /* maybe */
|
|
}
|
|
|
|
eloc = awk->tok.loc;
|
|
args_tail->next = parse_expr_withdc (awk, &eloc);
|
|
if (args_tail->next == QSE_NULL) goto oops;
|
|
|
|
tail_prev = args_tail;
|
|
args_tail = args_tail->next;
|
|
|
|
if (gm_in_parens == 1 && awk->ptok.type == TOK_RPAREN &&
|
|
awk->parse.lparen_last_closed == group_opening_lparen_seq)
|
|
{
|
|
/* confirm that the last group seen so far
|
|
* is parenthesized */
|
|
gm_in_parens = 2;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* print 1 > 2 would print 1 to the file named 2.
|
|
* print (1 > 2) would print (1 > 2) on the console
|
|
*
|
|
* think of all these... there are many more possible combinations.
|
|
*
|
|
* print ((10,20,30) in a) > "x";
|
|
* print ((10,20,30) in a)
|
|
* print ((10,20,30) in a) > ("x");
|
|
* print ((10,20,30) in a) > (("x"));
|
|
* function abc() { return "abc"; } BEGIN { print (1 > abc()); }
|
|
* function abc() { return "abc"; } BEGIN { print 1 > abc(); }
|
|
* print 1, 2, 3 > 4;
|
|
* print (1, 2, 3) > 4;
|
|
* print ((1, 2, 3) > 4);
|
|
* print 1, 2, 3 > 4 + 5;
|
|
* print 1, 2, (3 > 4) > 5;
|
|
* print 1, 2, (3 > 4) > 5 + 6;
|
|
*/
|
|
if (in_parens == 1 && awk->ptok.type == TOK_RPAREN &&
|
|
awk->parse.lparen_last_closed == opening_lparen_seq)
|
|
{
|
|
in_parens = 2; /* confirmed */
|
|
}
|
|
|
|
if (in_parens != 2 && gm_in_parens != 2 && args_tail->type == QSE_AWK_NDE_EXP_BIN)
|
|
{
|
|
int i;
|
|
qse_awk_nde_exp_t* ep = (qse_awk_nde_exp_t*)args_tail;
|
|
struct
|
|
{
|
|
int opc;
|
|
int out;
|
|
int opt;
|
|
} tab[] =
|
|
{
|
|
{
|
|
QSE_AWK_BINOP_GT,
|
|
QSE_AWK_OUT_FILE,
|
|
0
|
|
},
|
|
{
|
|
QSE_AWK_BINOP_RS,
|
|
QSE_AWK_OUT_APFILE,
|
|
0
|
|
},
|
|
{
|
|
QSE_AWK_BINOP_BOR,
|
|
QSE_AWK_OUT_PIPE,
|
|
0
|
|
},
|
|
{
|
|
QSE_AWK_BINOP_LOR,
|
|
QSE_AWK_OUT_RWPIPE,
|
|
QSE_AWK_RWPIPE
|
|
}
|
|
};
|
|
|
|
for (i = 0; i < QSE_COUNTOF(tab); i++)
|
|
{
|
|
if (ep->opcode == tab[i].opc)
|
|
{
|
|
qse_awk_nde_t* tmp;
|
|
|
|
if (tab[i].opt &&
|
|
!(awk->opt.trait&tab[i].opt)) break;
|
|
|
|
tmp = args_tail;
|
|
|
|
if (tail_prev != QSE_NULL)
|
|
tail_prev->next = ep->left;
|
|
else args = ep->left;
|
|
|
|
out = ep->right;
|
|
out_type = tab[i].out;
|
|
|
|
QSE_AWK_FREE (awk, tmp);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!out)
|
|
{
|
|
out_type = MATCH(awk,TOK_GT)? QSE_AWK_OUT_FILE:
|
|
MATCH(awk,TOK_RS)? QSE_AWK_OUT_APFILE:
|
|
MATCH(awk,TOK_BOR)? QSE_AWK_OUT_PIPE:
|
|
((awk->opt.trait & QSE_AWK_RWPIPE) &&
|
|
MATCH(awk,TOK_LOR))? QSE_AWK_OUT_RWPIPE:
|
|
QSE_AWK_OUT_CONSOLE;
|
|
|
|
if (out_type != QSE_AWK_OUT_CONSOLE)
|
|
{
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
eloc = awk->tok.loc;
|
|
out = parse_expr_withdc (awk, &eloc);
|
|
if (out == QSE_NULL) goto oops;
|
|
}
|
|
}
|
|
|
|
if (type == QSE_AWK_NDE_PRINTF && !args)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ENOARG, xloc);
|
|
goto oops;
|
|
}
|
|
|
|
nde = (qse_awk_nde_print_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
goto oops;
|
|
}
|
|
|
|
nde->type = type;
|
|
nde->loc = *xloc;
|
|
nde->args = args;
|
|
nde->out_type = out_type;
|
|
nde->out = out;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
|
|
oops:
|
|
if (args) qse_awk_clrpt (awk, args);
|
|
if (out) qse_awk_clrpt (awk, out);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_statement_nb (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
/* parse a non-block statement */
|
|
qse_awk_nde_t* nde;
|
|
|
|
/* keywords that don't require any terminating semicolon */
|
|
if (MATCH(awk,TOK_IF))
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
return parse_if (awk, xloc);
|
|
}
|
|
else if (MATCH(awk,TOK_WHILE))
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
|
|
awk->parse.depth.loop++;
|
|
nde = parse_while (awk, xloc);
|
|
awk->parse.depth.loop--;
|
|
|
|
return nde;
|
|
}
|
|
else if (MATCH(awk,TOK_FOR))
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
|
|
awk->parse.depth.loop++;
|
|
nde = parse_for (awk, xloc);
|
|
awk->parse.depth.loop--;
|
|
|
|
return nde;
|
|
}
|
|
|
|
/* keywords that require a terminating semicolon */
|
|
if (MATCH(awk,TOK_DO))
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
|
|
awk->parse.depth.loop++;
|
|
nde = parse_dowhile (awk, xloc);
|
|
awk->parse.depth.loop--;
|
|
|
|
return nde;
|
|
}
|
|
else if (MATCH(awk,TOK_BREAK))
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
nde = parse_break (awk, xloc);
|
|
}
|
|
else if (MATCH(awk,TOK_CONTINUE))
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
nde = parse_continue (awk, xloc);
|
|
}
|
|
else if (MATCH(awk,TOK_RETURN))
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
nde = parse_return (awk, xloc);
|
|
}
|
|
else if (MATCH(awk,TOK_EXIT) || MATCH(awk,TOK_XABORT))
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
nde = parse_exit (awk, xloc);
|
|
}
|
|
else if (MATCH(awk,TOK_NEXT))
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
nde = parse_next (awk, xloc);
|
|
}
|
|
else if (MATCH(awk,TOK_NEXTFILE))
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
nde = parse_nextfile (awk, xloc, 0);
|
|
}
|
|
else if (MATCH(awk,TOK_NEXTOFILE))
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
nde = parse_nextfile (awk, xloc, 1);
|
|
}
|
|
else if (MATCH(awk,TOK_DELETE) || MATCH(awk,TOK_XRESET))
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
nde = parse_delete (awk, xloc);
|
|
}
|
|
else if (!(awk->opt.trait & QSE_AWK_TOLERANT))
|
|
{
|
|
/* in the non-tolerant mode, we treat print and printf
|
|
* as a separate statement */
|
|
if (MATCH(awk,TOK_PRINT) || MATCH(awk,TOK_PRINTF))
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
nde = parse_print (awk, xloc);
|
|
}
|
|
else nde = parse_expr_withdc (awk, xloc);
|
|
}
|
|
else
|
|
{
|
|
nde = parse_expr_withdc (awk, xloc);
|
|
}
|
|
|
|
if (nde == QSE_NULL) return QSE_NULL;
|
|
|
|
if (MATCH_TERMINATOR_NORMAL(awk))
|
|
{
|
|
/* check if a statement ends with a semicolon or <NL> */
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
if (nde != QSE_NULL) qse_awk_clrpt (awk, nde);
|
|
return QSE_NULL;
|
|
}
|
|
}
|
|
else if (MATCH_TERMINATOR_RBRACE(awk))
|
|
{
|
|
/* do not skip the right brace as a statement terminator.
|
|
* is there anything to do here? */
|
|
}
|
|
else
|
|
{
|
|
if (nde != QSE_NULL) qse_awk_clrpt (awk, nde);
|
|
SETERR_LOC (awk, QSE_AWK_ESTMEND, &awk->ptok.loc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
return nde;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_statement (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_t* nde;
|
|
|
|
/* skip new lines before a statement */
|
|
while (MATCH(awk,TOK_NEWLINE))
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
}
|
|
|
|
if (MATCH(awk,TOK_SEMICOLON))
|
|
{
|
|
/* null statement */
|
|
nde = (qse_awk_nde_t*)
|
|
QSE_AWK_ALLOC (awk, QSE_SIZEOF(qse_awk_nde_t));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ENOMEM, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde->type = QSE_AWK_NDE_NULL;
|
|
nde->loc = *xloc;
|
|
nde->next = QSE_NULL;
|
|
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
QSE_AWK_FREE (awk, nde);
|
|
return QSE_NULL;
|
|
}
|
|
}
|
|
else if (MATCH(awk,TOK_LBRACE))
|
|
{
|
|
/* a block statemnt { ... } */
|
|
qse_awk_loc_t tloc = awk->ptok.loc;
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
nde = parse_block_dc (awk, &tloc, 0);
|
|
}
|
|
else
|
|
{
|
|
/* the statement id held in awk->parse.id.stmt denotes
|
|
* the token id of the statement currently being parsed.
|
|
* the current statement id is saved here because the
|
|
* statement id can be changed in parse_statement_nb.
|
|
* it will, in turn, call parse_statement which will
|
|
* eventually change the statement id. */
|
|
int old_id = awk->parse.id.stmt;
|
|
qse_awk_loc_t tloc = awk->tok.loc;
|
|
|
|
/* set the current statement id */
|
|
awk->parse.id.stmt = awk->tok.type;
|
|
|
|
/* proceed parsing the statement */
|
|
nde = parse_statement_nb (awk, &tloc);
|
|
|
|
/* restore the statement id saved previously */
|
|
awk->parse.id.stmt = old_id;
|
|
}
|
|
|
|
return nde;
|
|
}
|
|
|
|
static int assign_to_opcode (qse_awk_t* awk)
|
|
{
|
|
/* synchronize it with qse_awk_assop_type_t in run.h */
|
|
static int assop[] =
|
|
{
|
|
QSE_AWK_ASSOP_NONE,
|
|
QSE_AWK_ASSOP_PLUS,
|
|
QSE_AWK_ASSOP_MINUS,
|
|
QSE_AWK_ASSOP_MUL,
|
|
QSE_AWK_ASSOP_DIV,
|
|
QSE_AWK_ASSOP_IDIV,
|
|
QSE_AWK_ASSOP_MOD,
|
|
QSE_AWK_ASSOP_EXP,
|
|
QSE_AWK_ASSOP_CONCAT,
|
|
QSE_AWK_ASSOP_RS,
|
|
QSE_AWK_ASSOP_LS,
|
|
QSE_AWK_ASSOP_BAND,
|
|
QSE_AWK_ASSOP_BXOR,
|
|
QSE_AWK_ASSOP_BOR
|
|
};
|
|
|
|
if (awk->tok.type >= TOK_ASSN &&
|
|
awk->tok.type <= TOK_BOR_ASSN)
|
|
{
|
|
return assop[awk->tok.type - TOK_ASSN];
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_expr_basic (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_t* nde, * n1, * n2;
|
|
|
|
nde = parse_logical_or (awk, xloc);
|
|
if (nde == QSE_NULL) return QSE_NULL;
|
|
|
|
if (MATCH(awk,TOK_QUEST))
|
|
if (MATCH(awk,TOK_QUEST))
|
|
{
|
|
qse_awk_loc_t eloc;
|
|
qse_awk_nde_cnd_t* cnd;
|
|
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
qse_awk_clrpt (awk, nde);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
eloc = awk->tok.loc;
|
|
n1 = parse_expr_withdc (awk, &eloc);
|
|
if (n1 == QSE_NULL)
|
|
{
|
|
qse_awk_clrpt (awk, nde);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
if (!MATCH(awk,TOK_COLON))
|
|
{
|
|
qse_awk_clrpt (awk, nde);
|
|
qse_awk_clrpt (awk, n1);
|
|
SETERR_TOK (awk, QSE_AWK_ECOLON);
|
|
return QSE_NULL;
|
|
}
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
qse_awk_clrpt (awk, nde);
|
|
qse_awk_clrpt (awk, n1);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
eloc = awk->tok.loc;
|
|
n2 = parse_expr_withdc (awk, &eloc);
|
|
if (n2 == QSE_NULL)
|
|
{
|
|
qse_awk_clrpt (awk, nde);
|
|
qse_awk_clrpt (awk, n1);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
cnd = (qse_awk_nde_cnd_t*) QSE_AWK_ALLOC (
|
|
awk, QSE_SIZEOF(qse_awk_nde_cnd_t));
|
|
if (cnd == QSE_NULL)
|
|
{
|
|
qse_awk_clrpt (awk, nde);
|
|
qse_awk_clrpt (awk, n1);
|
|
qse_awk_clrpt (awk, n2);
|
|
SETERR_LOC (awk, QSE_AWK_ENOMEM, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
cnd->type = QSE_AWK_NDE_CND;
|
|
cnd->loc = *xloc;
|
|
cnd->next = QSE_NULL;
|
|
cnd->test = nde;
|
|
cnd->left = n1;
|
|
cnd->right = n2;
|
|
|
|
nde = (qse_awk_nde_t*)cnd;
|
|
}
|
|
|
|
return nde;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_expr (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_t* x, * y;
|
|
qse_awk_nde_ass_t* nde;
|
|
int opcode;
|
|
|
|
x = parse_expr_basic (awk, xloc);
|
|
if (x == QSE_NULL) return QSE_NULL;
|
|
|
|
opcode = assign_to_opcode (awk);
|
|
if (opcode <= -1)
|
|
{
|
|
/* no assignment operator found. */
|
|
return x;
|
|
}
|
|
|
|
QSE_ASSERT (x->next == QSE_NULL);
|
|
if (!is_var(x) && x->type != QSE_AWK_NDE_POS)
|
|
{
|
|
qse_awk_clrpt (awk, x);
|
|
SETERR_LOC (awk, QSE_AWK_EASSIGN, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
qse_awk_clrpt (awk, x);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
{
|
|
qse_awk_loc_t eloc = awk->tok.loc;
|
|
y = parse_expr_withdc (awk, &eloc);
|
|
}
|
|
if (y == QSE_NULL)
|
|
{
|
|
qse_awk_clrpt (awk, x);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde = (qse_awk_nde_ass_t*)
|
|
QSE_AWK_ALLOC (awk, QSE_SIZEOF(qse_awk_nde_ass_t));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
qse_awk_clrpt (awk, x);
|
|
qse_awk_clrpt (awk, y);
|
|
|
|
SETERR_LOC (awk, QSE_AWK_ENOMEM, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde->type = QSE_AWK_NDE_ASS;
|
|
nde->loc = *xloc;
|
|
nde->next = QSE_NULL;
|
|
nde->opcode = opcode;
|
|
nde->left = x;
|
|
nde->right = y;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_expr_withdc (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_t* nde;
|
|
|
|
/* perform depth check before parsing expression */
|
|
|
|
if (awk->opt.depth.s.expr_parse > 0 &&
|
|
awk->parse.depth.expr >= awk->opt.depth.s.expr_parse)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_EEXPRNST, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
awk->parse.depth.expr++;
|
|
nde = parse_expr (awk, xloc);
|
|
awk->parse.depth.expr--;
|
|
|
|
return nde;
|
|
}
|
|
|
|
#define INT_BINOP_INT(x,op,y) \
|
|
(((qse_awk_nde_int_t*)x)->val op ((qse_awk_nde_int_t*)y)->val)
|
|
|
|
#define INT_BINOP_FLT(x,op,y) \
|
|
(((qse_awk_nde_int_t*)x)->val op ((qse_awk_nde_flt_t*)y)->val)
|
|
|
|
#define FLT_BINOP_INT(x,op,y) \
|
|
(((qse_awk_nde_flt_t*)x)->val op ((qse_awk_nde_int_t*)y)->val)
|
|
|
|
#define FLT_BINOP_FLT(x,op,y) \
|
|
(((qse_awk_nde_flt_t*)x)->val op ((qse_awk_nde_flt_t*)y)->val)
|
|
|
|
union folded_t
|
|
{
|
|
qse_awk_int_t l;
|
|
qse_awk_flt_t r;
|
|
};
|
|
typedef union folded_t folded_t;
|
|
|
|
static int fold_constants_for_binop (
|
|
qse_awk_t* awk, qse_awk_nde_t* left, qse_awk_nde_t* right,
|
|
int opcode, folded_t* folded)
|
|
{
|
|
int fold = -1;
|
|
|
|
/* TODO: can i shorten various comparisons below?
|
|
* i hate to repeat similar code just for type difference */
|
|
|
|
if (left->type == QSE_AWK_NDE_INT &&
|
|
right->type == QSE_AWK_NDE_INT)
|
|
{
|
|
fold = QSE_AWK_NDE_INT;
|
|
switch (opcode)
|
|
{
|
|
case QSE_AWK_BINOP_PLUS:
|
|
folded->l = INT_BINOP_INT(left,+,right);
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_MINUS:
|
|
folded->l = INT_BINOP_INT(left,-,right);
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_MUL:
|
|
folded->l = INT_BINOP_INT(left,*,right);
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_DIV:
|
|
if (INT_BINOP_INT(left,%,right))
|
|
{
|
|
folded->r = (qse_awk_flt_t)((qse_awk_nde_int_t*)left)->val /
|
|
(qse_awk_flt_t)((qse_awk_nde_int_t*)right)->val;
|
|
fold = QSE_AWK_NDE_FLT;
|
|
break;
|
|
}
|
|
/* fall through here */
|
|
case QSE_AWK_BINOP_IDIV:
|
|
folded->l = INT_BINOP_INT(left,/,right);
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_MOD:
|
|
folded->l = INT_BINOP_INT(left,%,right);
|
|
break;
|
|
|
|
default:
|
|
fold = -1;
|
|
break;
|
|
}
|
|
}
|
|
else if (left->type == QSE_AWK_NDE_FLT &&
|
|
right->type == QSE_AWK_NDE_FLT)
|
|
{
|
|
fold = QSE_AWK_NDE_FLT;
|
|
switch (opcode)
|
|
{
|
|
case QSE_AWK_BINOP_PLUS:
|
|
folded->r = FLT_BINOP_FLT(left,+,right);
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_MINUS:
|
|
folded->r = FLT_BINOP_FLT(left,-,right);
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_MUL:
|
|
folded->r = FLT_BINOP_FLT(left,*,right);
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_DIV:
|
|
folded->r = FLT_BINOP_FLT(left,/,right);
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_IDIV:
|
|
folded->l = (qse_awk_int_t)FLT_BINOP_FLT(left,/,right);
|
|
fold = QSE_AWK_NDE_INT;
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_MOD:
|
|
folded->r = awk->prm.math.mod (
|
|
awk,
|
|
((qse_awk_nde_flt_t*)left)->val,
|
|
((qse_awk_nde_flt_t*)right)->val
|
|
);
|
|
break;
|
|
|
|
default:
|
|
fold = -1;
|
|
break;
|
|
}
|
|
}
|
|
else if (left->type == QSE_AWK_NDE_INT &&
|
|
right->type == QSE_AWK_NDE_FLT)
|
|
{
|
|
fold = QSE_AWK_NDE_FLT;
|
|
switch (opcode)
|
|
{
|
|
case QSE_AWK_BINOP_PLUS:
|
|
folded->r = INT_BINOP_FLT(left,+,right);
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_MINUS:
|
|
folded->r = INT_BINOP_FLT(left,-,right);
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_MUL:
|
|
folded->r = INT_BINOP_FLT(left,*,right);
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_DIV:
|
|
folded->r = INT_BINOP_FLT(left,/,right);
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_IDIV:
|
|
folded->l = (qse_awk_int_t)
|
|
((qse_awk_flt_t)((qse_awk_nde_int_t*)left)->val /
|
|
((qse_awk_nde_flt_t*)right)->val);
|
|
fold = QSE_AWK_NDE_INT;
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_MOD:
|
|
folded->r = awk->prm.math.mod (
|
|
awk,
|
|
(qse_awk_flt_t)((qse_awk_nde_int_t*)left)->val,
|
|
((qse_awk_nde_flt_t*)right)->val
|
|
);
|
|
break;
|
|
|
|
default:
|
|
fold = -1;
|
|
break;
|
|
}
|
|
}
|
|
else if (left->type == QSE_AWK_NDE_FLT &&
|
|
right->type == QSE_AWK_NDE_INT)
|
|
{
|
|
fold = QSE_AWK_NDE_FLT;
|
|
switch (opcode)
|
|
{
|
|
case QSE_AWK_BINOP_PLUS:
|
|
folded->r = FLT_BINOP_INT(left,+,right);
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_MINUS:
|
|
folded->r = FLT_BINOP_INT(left,-,right);
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_MUL:
|
|
folded->r = FLT_BINOP_INT(left,*,right);
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_DIV:
|
|
folded->r = FLT_BINOP_INT(left,/,right);
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_IDIV:
|
|
folded->l = (qse_awk_int_t)
|
|
(((qse_awk_nde_int_t*)left)->val /
|
|
(qse_awk_flt_t)((qse_awk_nde_int_t*)right)->val);
|
|
fold = QSE_AWK_NDE_INT;
|
|
break;
|
|
|
|
case QSE_AWK_BINOP_MOD:
|
|
folded->r = awk->prm.math.mod (
|
|
awk,
|
|
((qse_awk_nde_flt_t*)left)->val,
|
|
(qse_awk_flt_t)((qse_awk_nde_int_t*)right)->val
|
|
);
|
|
break;
|
|
|
|
default:
|
|
fold = -1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return fold;
|
|
}
|
|
|
|
static qse_awk_nde_t* new_exp_bin_node (
|
|
qse_awk_t* awk, const qse_awk_loc_t* loc,
|
|
int opcode, qse_awk_nde_t* left, qse_awk_nde_t* right)
|
|
{
|
|
qse_awk_nde_exp_t* tmp;
|
|
|
|
tmp = (qse_awk_nde_exp_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*tmp));
|
|
if (tmp)
|
|
{
|
|
tmp->type = QSE_AWK_NDE_EXP_BIN;
|
|
tmp->loc = *loc;
|
|
tmp->opcode = opcode;
|
|
tmp->left = left;
|
|
tmp->right = right;
|
|
}
|
|
else ADJERR_LOC (awk, loc);
|
|
|
|
return (qse_awk_nde_t*)tmp;
|
|
}
|
|
|
|
static qse_awk_nde_t* new_int_node (
|
|
qse_awk_t* awk, qse_awk_int_t lv, const qse_awk_loc_t* loc)
|
|
{
|
|
qse_awk_nde_int_t* tmp;
|
|
|
|
tmp = (qse_awk_nde_int_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*tmp));
|
|
if (tmp)
|
|
{
|
|
tmp->type = QSE_AWK_NDE_INT;
|
|
tmp->loc = *loc;
|
|
tmp->val = lv;
|
|
}
|
|
else ADJERR_LOC (awk, loc);
|
|
|
|
return (qse_awk_nde_t*)tmp;
|
|
}
|
|
|
|
static qse_awk_nde_t* new_flt_node (
|
|
qse_awk_t* awk, qse_awk_flt_t rv, const qse_awk_loc_t* loc)
|
|
{
|
|
qse_awk_nde_flt_t* tmp;
|
|
|
|
tmp = (qse_awk_nde_flt_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*tmp));
|
|
if (tmp)
|
|
{
|
|
tmp->type = QSE_AWK_NDE_FLT;
|
|
tmp->loc = *loc;
|
|
tmp->val = rv;
|
|
}
|
|
else ADJERR_LOC (awk, loc);
|
|
|
|
return (qse_awk_nde_t*)tmp;
|
|
}
|
|
|
|
static QSE_INLINE void update_int_node (
|
|
qse_awk_t* awk, qse_awk_nde_int_t* node, qse_awk_int_t lv)
|
|
{
|
|
node->val = lv;
|
|
if (node->str)
|
|
{
|
|
QSE_AWK_FREE (awk, node->str);
|
|
node->str = QSE_NULL;
|
|
node->len = 0;
|
|
}
|
|
}
|
|
|
|
static QSE_INLINE void update_flt_node (
|
|
qse_awk_t* awk, qse_awk_nde_flt_t* node, qse_awk_flt_t rv)
|
|
{
|
|
node->val = rv;
|
|
if (node->str)
|
|
{
|
|
QSE_AWK_FREE (awk, node->str);
|
|
node->str = QSE_NULL;
|
|
node->len = 0;
|
|
}
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_binary (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc,
|
|
int skipnl, const binmap_t* binmap,
|
|
qse_awk_nde_t*(*next_level_func)(qse_awk_t*,const qse_awk_loc_t*))
|
|
{
|
|
qse_awk_nde_t* left = QSE_NULL;
|
|
qse_awk_nde_t* right = QSE_NULL;
|
|
qse_awk_loc_t rloc;
|
|
|
|
left = next_level_func (awk, xloc);
|
|
if (left == QSE_NULL) goto oops;
|
|
|
|
do
|
|
{
|
|
const binmap_t* p = binmap;
|
|
int matched = 0;
|
|
int opcode, fold;
|
|
folded_t folded;
|
|
|
|
while (p->token != TOK_EOF)
|
|
{
|
|
if (MATCH(awk,p->token))
|
|
{
|
|
opcode = p->binop;
|
|
matched = 1;
|
|
break;
|
|
}
|
|
p++;
|
|
}
|
|
if (!matched) break;
|
|
|
|
do
|
|
{
|
|
if (get_token(awk) <= -1) goto oops;
|
|
}
|
|
while (skipnl && MATCH(awk,TOK_NEWLINE));
|
|
|
|
rloc = awk->tok.loc;
|
|
right = next_level_func (awk, &rloc);
|
|
if (right == QSE_NULL) goto oops;
|
|
|
|
fold = fold_constants_for_binop (awk, left, right, opcode, &folded);
|
|
switch (fold)
|
|
{
|
|
case QSE_AWK_NDE_INT:
|
|
if (fold == left->type)
|
|
{
|
|
qse_awk_clrpt (awk, right);
|
|
right = QSE_NULL;
|
|
update_int_node (awk, (qse_awk_nde_int_t*)left, folded.l);
|
|
}
|
|
else if (fold == right->type)
|
|
{
|
|
qse_awk_clrpt (awk, left);
|
|
update_int_node (awk, (qse_awk_nde_int_t*)right, folded.l);
|
|
left = right;
|
|
right = QSE_NULL;
|
|
}
|
|
else
|
|
{
|
|
qse_awk_clrpt (awk, right); right = QSE_NULL;
|
|
qse_awk_clrpt (awk, left); left = QSE_NULL;
|
|
|
|
left = new_int_node (awk, folded.l, xloc);
|
|
if (left == QSE_NULL) goto oops;
|
|
}
|
|
|
|
break;
|
|
|
|
case QSE_AWK_NDE_FLT:
|
|
if (fold == left->type)
|
|
{
|
|
qse_awk_clrpt (awk, right);
|
|
right = QSE_NULL;
|
|
update_flt_node (awk, (qse_awk_nde_flt_t*)left, folded.r);
|
|
}
|
|
else if (fold == right->type)
|
|
{
|
|
qse_awk_clrpt (awk, left);
|
|
update_flt_node (awk, (qse_awk_nde_flt_t*)right, folded.r);
|
|
left = right;
|
|
right = QSE_NULL;
|
|
}
|
|
else
|
|
{
|
|
qse_awk_clrpt (awk, right); right = QSE_NULL;
|
|
qse_awk_clrpt (awk, left); left = QSE_NULL;
|
|
|
|
left = new_flt_node (awk, folded.r, xloc);
|
|
if (left == QSE_NULL) goto oops;
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
{
|
|
qse_awk_nde_t* tmp;
|
|
|
|
tmp = new_exp_bin_node (awk, xloc, opcode, left, right);
|
|
if (tmp == QSE_NULL) goto oops;
|
|
left = tmp; right = QSE_NULL;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
while (1);
|
|
|
|
return left;
|
|
|
|
oops:
|
|
if (right) qse_awk_clrpt (awk, right);
|
|
if (left) qse_awk_clrpt (awk, left);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_logical_or (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
static binmap_t map[] =
|
|
{
|
|
{ TOK_LOR, QSE_AWK_BINOP_LOR },
|
|
{ TOK_EOF, 0 }
|
|
};
|
|
|
|
return parse_binary (awk, xloc, 1, map, parse_logical_and);
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_logical_and (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
static binmap_t map[] =
|
|
{
|
|
{ TOK_LAND, QSE_AWK_BINOP_LAND },
|
|
{ TOK_EOF, 0 }
|
|
};
|
|
|
|
return parse_binary (awk, xloc, 1, map, parse_in);
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_in (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
/*
|
|
static binmap_t map[] =
|
|
{
|
|
{ TOK_IN, QSE_AWK_BINOP_IN },
|
|
{ TOK_EOF, 0 }
|
|
};
|
|
|
|
return parse_binary (awk, xloc, 0, map, parse_regex_match);
|
|
*/
|
|
|
|
qse_awk_nde_t* left = QSE_NULL;
|
|
qse_awk_nde_t* right = QSE_NULL;
|
|
qse_awk_loc_t rloc;
|
|
|
|
left = parse_regex_match (awk, xloc);
|
|
if (left == QSE_NULL) goto oops;
|
|
|
|
do
|
|
{
|
|
qse_awk_nde_t* tmp;
|
|
|
|
if (!MATCH(awk,TOK_IN)) break;
|
|
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
rloc = awk->tok.loc;
|
|
right = parse_regex_match (awk, &rloc);
|
|
if (right == QSE_NULL) goto oops;
|
|
|
|
if (!is_plain_var(right))
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ENOTVAR, &rloc);
|
|
goto oops;
|
|
}
|
|
|
|
tmp = new_exp_bin_node (
|
|
awk, xloc, QSE_AWK_BINOP_IN, left, right);
|
|
if (left == QSE_NULL) goto oops;
|
|
|
|
left = tmp;
|
|
right = QSE_NULL;
|
|
}
|
|
while (1);
|
|
|
|
return left;
|
|
|
|
oops:
|
|
if (right) qse_awk_clrpt (awk, right);
|
|
if (left) qse_awk_clrpt (awk, left);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_regex_match (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
static binmap_t map[] =
|
|
{
|
|
{ TOK_MA, QSE_AWK_BINOP_MA },
|
|
{ TOK_NM, QSE_AWK_BINOP_NM },
|
|
{ TOK_EOF, 0 },
|
|
};
|
|
|
|
return parse_binary (awk, xloc, 0, map, parse_bitwise_or);
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_bitwise_or (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
static binmap_t map[] =
|
|
{
|
|
{ TOK_BOR, QSE_AWK_BINOP_BOR },
|
|
{ TOK_EOF, 0 }
|
|
};
|
|
|
|
return parse_binary (awk, xloc, 0, map, parse_bitwise_xor);
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_bitwise_xor (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
static binmap_t map[] =
|
|
{
|
|
{ TOK_BXOR, QSE_AWK_BINOP_BXOR },
|
|
{ TOK_EOF, 0 }
|
|
};
|
|
|
|
return parse_binary (awk, xloc, 0, map, parse_bitwise_and);
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_bitwise_and (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
static binmap_t map[] =
|
|
{
|
|
{ TOK_BAND, QSE_AWK_BINOP_BAND },
|
|
{ TOK_EOF, 0 }
|
|
};
|
|
|
|
return parse_binary (awk, xloc, 0, map, parse_equality);
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_equality (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
static binmap_t map[] =
|
|
{
|
|
{ TOK_TEQ, QSE_AWK_BINOP_TEQ },
|
|
{ TOK_TNE, QSE_AWK_BINOP_TNE },
|
|
{ TOK_EQ, QSE_AWK_BINOP_EQ },
|
|
{ TOK_NE, QSE_AWK_BINOP_NE },
|
|
{ TOK_EOF, 0 }
|
|
};
|
|
|
|
return parse_binary (awk, xloc, 0, map, parse_relational);
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_relational (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
static binmap_t map[] =
|
|
{
|
|
{ TOK_GT, QSE_AWK_BINOP_GT },
|
|
{ TOK_GE, QSE_AWK_BINOP_GE },
|
|
{ TOK_LT, QSE_AWK_BINOP_LT },
|
|
{ TOK_LE, QSE_AWK_BINOP_LE },
|
|
{ TOK_EOF, 0 }
|
|
};
|
|
|
|
return parse_binary (awk, xloc, 0, map, parse_shift);
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_shift (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
static binmap_t map[] =
|
|
{
|
|
{ TOK_LS, QSE_AWK_BINOP_LS },
|
|
{ TOK_RS, QSE_AWK_BINOP_RS },
|
|
{ TOK_EOF, 0 }
|
|
};
|
|
|
|
return parse_binary (awk, xloc, 0, map, parse_concat);
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_concat (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_t* left = QSE_NULL;
|
|
qse_awk_nde_t* right = QSE_NULL;
|
|
qse_awk_loc_t rloc;
|
|
|
|
left = parse_additive (awk, xloc);
|
|
if (left == QSE_NULL) goto oops;
|
|
|
|
do
|
|
{
|
|
qse_awk_nde_t* tmp;
|
|
|
|
if (MATCH(awk,TOK_CONCAT))
|
|
{
|
|
if (get_token(awk) <= -1) goto oops;
|
|
}
|
|
else if (awk->opt.trait & QSE_AWK_BLANKCONCAT)
|
|
{
|
|
if (MATCH(awk,TOK_LPAREN) || MATCH(awk,TOK_DOLLAR) ||
|
|
/* unary operators */
|
|
MATCH(awk,TOK_PLUS) || MATCH(awk,TOK_MINUS) ||
|
|
MATCH(awk,TOK_LNOT) || MATCH(awk,TOK_BNOT) ||
|
|
MATCH(awk,TOK_BQUOTE) ||
|
|
/* increment operators */
|
|
MATCH(awk,TOK_PLUSPLUS) || MATCH(awk,TOK_MINUSMINUS) ||
|
|
((awk->opt.trait & QSE_AWK_TOLERANT) &&
|
|
(awk->tok.type == TOK_PRINT || awk->tok.type == TOK_PRINTF)) ||
|
|
awk->tok.type >= TOK_GETLINE)
|
|
{
|
|
/* proceed to handle concatenation expression */
|
|
/* nothing to to here. just fall through */
|
|
}
|
|
else break;
|
|
}
|
|
else break;
|
|
|
|
rloc = awk->tok.loc;
|
|
right = parse_additive (awk, &rloc);
|
|
if (right == QSE_NULL) goto oops;
|
|
|
|
tmp = new_exp_bin_node (awk, xloc, QSE_AWK_BINOP_CONCAT, left, right);
|
|
if (tmp == QSE_NULL) goto oops;
|
|
left = tmp; right = QSE_NULL;
|
|
}
|
|
while (1);
|
|
|
|
return left;
|
|
|
|
oops:
|
|
if (right) qse_awk_clrpt (awk, right);
|
|
if (left) qse_awk_clrpt (awk, left);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_additive (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
static binmap_t map[] =
|
|
{
|
|
{ TOK_PLUS, QSE_AWK_BINOP_PLUS },
|
|
{ TOK_MINUS, QSE_AWK_BINOP_MINUS },
|
|
{ TOK_EOF, 0 }
|
|
};
|
|
|
|
return parse_binary (awk, xloc, 0, map, parse_multiplicative);
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_multiplicative (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
static binmap_t map[] =
|
|
{
|
|
{ TOK_MUL, QSE_AWK_BINOP_MUL },
|
|
{ TOK_DIV, QSE_AWK_BINOP_DIV },
|
|
{ TOK_IDIV, QSE_AWK_BINOP_IDIV },
|
|
{ TOK_MOD, QSE_AWK_BINOP_MOD },
|
|
/* { TOK_EXP, QSE_AWK_BINOP_EXP }, */
|
|
{ TOK_EOF, 0 }
|
|
};
|
|
|
|
return parse_binary (awk, xloc, 0, map, parse_unary);
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_unary (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_t* left;
|
|
qse_awk_loc_t uloc;
|
|
int opcode;
|
|
int fold;
|
|
folded_t folded;
|
|
|
|
opcode = (MATCH(awk,TOK_PLUS))? QSE_AWK_UNROP_PLUS:
|
|
(MATCH(awk,TOK_MINUS))? QSE_AWK_UNROP_MINUS:
|
|
(MATCH(awk,TOK_LNOT))? QSE_AWK_UNROP_LNOT:
|
|
(MATCH(awk,TOK_BNOT))? QSE_AWK_UNROP_BNOT:
|
|
(MATCH(awk,TOK_BQUOTE))? QSE_AWK_UNROP_DEF: -1;
|
|
|
|
/*if (opcode <= -1) return parse_increment (awk);*/
|
|
if (opcode <= -1) return parse_exponent (awk, xloc);
|
|
|
|
if (awk->opt.depth.s.expr_parse > 0 &&
|
|
awk->parse.depth.expr >= awk->opt.depth.s.expr_parse)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_EEXPRNST, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
|
|
awk->parse.depth.expr++;
|
|
uloc = awk->tok.loc;
|
|
left = parse_unary (awk, &uloc);
|
|
awk->parse.depth.expr--;
|
|
if (left == QSE_NULL) return QSE_NULL;
|
|
|
|
fold = -1;
|
|
if (left->type == QSE_AWK_NDE_INT)
|
|
{
|
|
fold = QSE_AWK_NDE_INT;
|
|
switch (opcode)
|
|
{
|
|
case QSE_AWK_UNROP_PLUS:
|
|
folded.l = ((qse_awk_nde_int_t*)left)->val;
|
|
break;
|
|
|
|
case QSE_AWK_UNROP_MINUS:
|
|
folded.l = -((qse_awk_nde_int_t*)left)->val;
|
|
break;
|
|
|
|
case QSE_AWK_UNROP_LNOT:
|
|
folded.l = !((qse_awk_nde_int_t*)left)->val;
|
|
break;
|
|
|
|
case QSE_AWK_UNROP_BNOT:
|
|
folded.l = ~((qse_awk_nde_int_t*)left)->val;
|
|
break;
|
|
|
|
default:
|
|
fold = -1;
|
|
break;
|
|
}
|
|
}
|
|
else if (left->type == QSE_AWK_NDE_FLT)
|
|
{
|
|
fold = QSE_AWK_NDE_FLT;
|
|
switch (opcode)
|
|
{
|
|
case QSE_AWK_UNROP_PLUS:
|
|
folded.r = ((qse_awk_nde_flt_t*)left)->val;
|
|
break;
|
|
|
|
case QSE_AWK_UNROP_MINUS:
|
|
folded.r = -((qse_awk_nde_flt_t*)left)->val;
|
|
break;
|
|
|
|
case QSE_AWK_UNROP_LNOT:
|
|
folded.r = !((qse_awk_nde_flt_t*)left)->val;
|
|
break;
|
|
|
|
case QSE_AWK_UNROP_BNOT:
|
|
folded.l = ~((qse_awk_int_t)((qse_awk_nde_flt_t*)left)->val);
|
|
fold = QSE_AWK_NDE_INT;
|
|
break;
|
|
|
|
default:
|
|
fold = -1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
switch (fold)
|
|
{
|
|
case QSE_AWK_NDE_INT:
|
|
if (left->type == fold)
|
|
{
|
|
update_int_node (awk, (qse_awk_nde_int_t*)left, folded.l);
|
|
return left;
|
|
}
|
|
else
|
|
{
|
|
QSE_ASSERT (left->type == QSE_AWK_NDE_FLT);
|
|
qse_awk_clrpt (awk, left);
|
|
return new_int_node (awk, folded.l, xloc);
|
|
}
|
|
|
|
case QSE_AWK_NDE_FLT:
|
|
if (left->type == fold)
|
|
{
|
|
update_flt_node (awk, (qse_awk_nde_flt_t*)left, folded.r);
|
|
return left;
|
|
}
|
|
else
|
|
{
|
|
QSE_ASSERT (left->type == QSE_AWK_NDE_INT);
|
|
qse_awk_clrpt (awk, left);
|
|
return new_flt_node (awk, folded.r, xloc);
|
|
}
|
|
|
|
default:
|
|
{
|
|
qse_awk_nde_exp_t* nde;
|
|
|
|
nde = (qse_awk_nde_exp_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
qse_awk_clrpt (awk, left);
|
|
SETERR_LOC (awk, QSE_AWK_ENOMEM, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde->type = QSE_AWK_NDE_EXP_UNR;
|
|
nde->loc = *xloc;
|
|
nde->opcode = opcode;
|
|
nde->left = left;
|
|
/*nde->right = QSE_NULL;*/
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
}
|
|
}
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_exponent (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
static binmap_t map[] =
|
|
{
|
|
{ TOK_EXP, QSE_AWK_BINOP_EXP },
|
|
{ TOK_EOF, 0 }
|
|
};
|
|
|
|
return parse_binary (awk, xloc, 0, map, parse_unary_exp);
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_unary_exp (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_exp_t* nde;
|
|
qse_awk_nde_t* left;
|
|
qse_awk_loc_t uloc;
|
|
int opcode;
|
|
|
|
opcode = (MATCH(awk,TOK_PLUS))? QSE_AWK_UNROP_PLUS:
|
|
(MATCH(awk,TOK_MINUS))? QSE_AWK_UNROP_MINUS:
|
|
(MATCH(awk,TOK_LNOT))? QSE_AWK_UNROP_LNOT:
|
|
(MATCH(awk,TOK_BNOT))? QSE_AWK_UNROP_BNOT:
|
|
(MATCH(awk,TOK_BQUOTE))? QSE_AWK_UNROP_DEF: -1;
|
|
|
|
if (opcode <= -1) return parse_increment (awk, xloc);
|
|
|
|
if (awk->opt.depth.s.expr_parse > 0 &&
|
|
awk->parse.depth.expr >= awk->opt.depth.s.expr_parse)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_EEXPRNST, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
|
|
awk->parse.depth.expr++;
|
|
uloc = awk->tok.loc;
|
|
left = parse_unary (awk, &uloc);
|
|
awk->parse.depth.expr--;
|
|
if (left == QSE_NULL) return QSE_NULL;
|
|
|
|
nde = (qse_awk_nde_exp_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
qse_awk_clrpt (awk, left);
|
|
ADJERR_LOC (awk, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde->type = QSE_AWK_NDE_EXP_UNR;
|
|
nde->loc = *xloc;
|
|
nde->opcode = opcode;
|
|
nde->left = left;
|
|
nde->right = QSE_NULL;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_increment (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_exp_t* nde;
|
|
qse_awk_nde_t* left;
|
|
int type, opcode, opcode1, opcode2;
|
|
qse_awk_loc_t ploc;
|
|
|
|
/* check for prefix increment operator */
|
|
opcode1 = MATCH(awk,TOK_PLUSPLUS)? QSE_AWK_INCOP_PLUS:
|
|
MATCH(awk,TOK_MINUSMINUS)? QSE_AWK_INCOP_MINUS: -1;
|
|
|
|
if (opcode1 != -1)
|
|
{
|
|
/* there is a prefix increment operator */
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
}
|
|
|
|
ploc = awk->tok.loc;
|
|
left = parse_primary (awk, &ploc);
|
|
if (left == QSE_NULL) return QSE_NULL;
|
|
|
|
/* check for postfix increment operator */
|
|
opcode2 = MATCH(awk,TOK_PLUSPLUS)? QSE_AWK_INCOP_PLUS:
|
|
MATCH(awk,TOK_MINUSMINUS)? QSE_AWK_INCOP_MINUS: -1;
|
|
|
|
if (!(awk->opt.trait & QSE_AWK_BLANKCONCAT))
|
|
{
|
|
if (opcode1 != -1 && opcode2 != -1)
|
|
{
|
|
/* both prefix and postfix increment operator.
|
|
* not allowed */
|
|
qse_awk_clrpt (awk, left);
|
|
SETERR_LOC (awk, QSE_AWK_EPREPST, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
}
|
|
|
|
if (opcode1 == -1 && opcode2 == -1)
|
|
{
|
|
/* no increment operators */
|
|
return left;
|
|
}
|
|
else if (opcode1 != -1)
|
|
{
|
|
/* prefix increment operator.
|
|
* ignore a potential postfix operator */
|
|
type = QSE_AWK_NDE_EXP_INCPRE;
|
|
opcode = opcode1;
|
|
}
|
|
else if (opcode2 != -1)
|
|
{
|
|
/* postfix increment operator */
|
|
type = QSE_AWK_NDE_EXP_INCPST;
|
|
opcode = opcode2;
|
|
|
|
/* let's do it later
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
qse_awk_clrpt (awk, left);
|
|
return QSE_NULL;
|
|
}
|
|
*/
|
|
}
|
|
|
|
if (!is_var(left) && left->type != QSE_AWK_NDE_POS)
|
|
{
|
|
if (type == QSE_AWK_NDE_EXP_INCPST)
|
|
{
|
|
/* For an expression like 1 ++y,
|
|
* left is 1. so we leave ++ for y. */
|
|
return left;
|
|
}
|
|
else
|
|
{
|
|
qse_awk_clrpt (awk, left);
|
|
SETERR_LOC (awk, QSE_AWK_EINCDECOPR, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
}
|
|
|
|
if (type == QSE_AWK_NDE_EXP_INCPST)
|
|
{
|
|
/* consume the postfix operator */
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
qse_awk_clrpt (awk, left);
|
|
return QSE_NULL;
|
|
}
|
|
}
|
|
|
|
nde = (qse_awk_nde_exp_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
qse_awk_clrpt (awk, left);
|
|
ADJERR_LOC (awk, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde->type = type;
|
|
nde->loc = *xloc;
|
|
nde->opcode = opcode;
|
|
nde->left = left;
|
|
nde->right = QSE_NULL;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
}
|
|
|
|
#define FNTYPE_UNKNOWN 0
|
|
#define FNTYPE_FNC 1
|
|
#define FNTYPE_FUN 2
|
|
|
|
static QSE_INLINE int isfunname (qse_awk_t* awk, const qse_cstr_t* name)
|
|
{
|
|
/* check if it is an awk function being processed currently */
|
|
if (awk->tree.cur_fun.ptr)
|
|
{
|
|
if (qse_strxncmp (
|
|
awk->tree.cur_fun.ptr, awk->tree.cur_fun.len,
|
|
name->ptr, name->len) == 0)
|
|
{
|
|
/* the current function begin parsed */
|
|
return FNTYPE_FUN;
|
|
}
|
|
}
|
|
|
|
/* check the funtion name in the function table */
|
|
if (qse_htb_search (awk->tree.funs, name->ptr, name->len) != QSE_NULL)
|
|
{
|
|
/* one of the functions defined previously */
|
|
return FNTYPE_FUN;
|
|
}
|
|
|
|
/* check if it is a function not resolved so far */
|
|
if (qse_htb_search (awk->parse.funs, name->ptr, name->len) != QSE_NULL)
|
|
{
|
|
/* one of the function calls not resolved so far. */
|
|
return FNTYPE_FUN;
|
|
}
|
|
|
|
return FNTYPE_UNKNOWN;
|
|
}
|
|
|
|
static QSE_INLINE int isfnname (qse_awk_t* awk, const qse_cstr_t* name)
|
|
{
|
|
if (qse_awk_findfnc (awk, name) != QSE_NULL)
|
|
{
|
|
/* implicit function */
|
|
return FNTYPE_FNC;
|
|
}
|
|
|
|
return isfunname (awk, name);
|
|
}
|
|
|
|
|
|
static qse_awk_nde_t* parse_primary_int (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_int_t* nde;
|
|
|
|
/* create the node for the literal */
|
|
nde = (qse_awk_nde_int_t*)new_int_node (
|
|
awk,
|
|
qse_awk_strxtoint (awk,
|
|
QSE_STR_PTR(awk->tok.name),
|
|
QSE_STR_LEN(awk->tok.name),
|
|
0, QSE_NULL
|
|
),
|
|
xloc
|
|
);
|
|
if (nde == QSE_NULL) return QSE_NULL;
|
|
|
|
QSE_ASSERT (
|
|
QSE_STR_LEN(awk->tok.name) ==
|
|
qse_strlen(QSE_STR_PTR(awk->tok.name)));
|
|
|
|
/* remember the literal in the original form */
|
|
nde->len = QSE_STR_LEN(awk->tok.name);
|
|
nde->str = qse_awk_cstrdup (awk, QSE_STR_XSTR(awk->tok.name));
|
|
if (nde->str == QSE_NULL || get_token(awk) <= -1) goto oops;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
|
|
oops:
|
|
QSE_ASSERT (nde != QSE_NULL);
|
|
if (nde->str) QSE_AWK_FREE (awk, nde->str);
|
|
QSE_AWK_FREE (awk, nde);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_primary_flt (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_flt_t* nde;
|
|
|
|
/* create the node for the literal */
|
|
nde = (qse_awk_nde_flt_t*) new_flt_node (
|
|
awk,
|
|
qse_awk_strxtoflt (awk,
|
|
QSE_STR_PTR(awk->tok.name),
|
|
QSE_STR_LEN(awk->tok.name),
|
|
QSE_NULL
|
|
),
|
|
xloc
|
|
);
|
|
if (nde == QSE_NULL) return QSE_NULL;
|
|
|
|
QSE_ASSERT (
|
|
QSE_STR_LEN(awk->tok.name) ==
|
|
qse_strlen(QSE_STR_PTR(awk->tok.name)));
|
|
|
|
/* remember the literal in the original form */
|
|
nde->len = QSE_STR_LEN(awk->tok.name);
|
|
nde->str = qse_awk_cstrdup (awk, QSE_STR_XSTR(awk->tok.name));
|
|
if (nde->str == QSE_NULL || get_token(awk) <= -1) goto oops;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
|
|
oops:
|
|
QSE_ASSERT (nde != QSE_NULL);
|
|
if (nde->str) QSE_AWK_FREE (awk, nde->str);
|
|
QSE_AWK_FREE (awk, nde);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_primary_str (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_str_t* nde;
|
|
|
|
nde = (qse_awk_nde_str_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde->type = QSE_AWK_NDE_STR;
|
|
nde->loc = *xloc;
|
|
nde->len = QSE_STR_LEN(awk->tok.name);
|
|
nde->ptr = qse_awk_cstrdup (awk, QSE_STR_XSTR(awk->tok.name));
|
|
if (nde->ptr == QSE_NULL || get_token(awk) <= -1) goto oops;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
|
|
oops:
|
|
QSE_ASSERT (nde != QSE_NULL);
|
|
if (nde->ptr) QSE_AWK_FREE (awk, nde->ptr);
|
|
QSE_AWK_FREE (awk, nde);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_primary_rex (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_rex_t* nde;
|
|
qse_awk_errnum_t errnum;
|
|
|
|
/* the regular expression is tokenized here because
|
|
* of the context-sensitivity of the slash symbol.
|
|
* if TOK_DIV is seen as a primary, it tries to compile
|
|
* it as a regular expression */
|
|
qse_str_clear (awk->tok.name);
|
|
|
|
if (MATCH(awk,TOK_DIV_ASSN) &&
|
|
qse_str_ccat (awk->tok.name, QSE_T('=')) == (qse_size_t)-1)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ENOMEM, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
SET_TOKEN_TYPE (awk, &awk->tok, TOK_REX);
|
|
if (get_rexstr (awk, &awk->tok) <= -1) return QSE_NULL;
|
|
|
|
QSE_ASSERT (MATCH(awk,TOK_REX));
|
|
|
|
nde = (qse_awk_nde_rex_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde->type = QSE_AWK_NDE_REX;
|
|
nde->loc = *xloc;
|
|
nde->str.len = QSE_STR_LEN(awk->tok.name);
|
|
nde->str.ptr = qse_awk_cstrdup (awk, QSE_STR_XSTR(awk->tok.name));
|
|
if (nde->str.ptr == QSE_NULL) goto oops;
|
|
|
|
if (qse_awk_buildrex (awk, QSE_STR_PTR(awk->tok.name), QSE_STR_LEN(awk->tok.name), &errnum, &nde->code[0], &nde->code[1]) <= -1)
|
|
{
|
|
SETERR_LOC (awk, errnum, xloc);
|
|
goto oops;
|
|
}
|
|
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
|
|
oops:
|
|
QSE_ASSERT (nde != QSE_NULL);
|
|
if (nde->code[0]) qse_awk_freerex (awk, nde->code[0], nde->code[1]);
|
|
if (nde->str.ptr) QSE_AWK_FREE (awk, nde->str.ptr);
|
|
QSE_AWK_FREE (awk, nde);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_primary_positional (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_pos_t* nde;
|
|
qse_awk_loc_t ploc;
|
|
|
|
nde = (qse_awk_nde_pos_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
goto oops;
|
|
}
|
|
|
|
nde->type = QSE_AWK_NDE_POS;
|
|
nde->loc = *xloc;
|
|
|
|
if (get_token(awk)) return QSE_NULL;
|
|
|
|
ploc = awk->tok.loc;
|
|
nde->val = parse_primary (awk, &ploc);
|
|
if (nde->val == QSE_NULL) goto oops;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
|
|
oops:
|
|
if (nde)
|
|
{
|
|
if (nde->val) qse_awk_clrpt (awk, nde->val);
|
|
QSE_AWK_FREE (awk, nde);
|
|
}
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_primary_lparen (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_t* nde;
|
|
qse_awk_nde_t* last;
|
|
qse_awk_loc_t eloc;
|
|
qse_size_t opening_lparen_seq;
|
|
|
|
opening_lparen_seq = awk->parse.lparen_seq++;
|
|
|
|
/* eat up the left parenthesis */
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
|
|
/* parse the sub-expression inside the parentheses */
|
|
eloc = awk->tok.loc;
|
|
nde = parse_expr_withdc (awk, &eloc);
|
|
if (nde == QSE_NULL) return QSE_NULL;
|
|
|
|
/* parse subsequent expressions separated by a comma, if any */
|
|
last = nde;
|
|
QSE_ASSERT (last->next == QSE_NULL);
|
|
|
|
while (MATCH(awk,TOK_COMMA))
|
|
{
|
|
qse_awk_nde_t* tmp;
|
|
|
|
do
|
|
{
|
|
if (get_token(awk) <= -1) goto oops;
|
|
}
|
|
while (MATCH(awk,TOK_NEWLINE));
|
|
|
|
eloc = awk->tok.loc;
|
|
tmp = parse_expr_withdc (awk, &eloc);
|
|
if (tmp == QSE_NULL) goto oops;
|
|
|
|
QSE_ASSERT (tmp->next == QSE_NULL);
|
|
last->next = tmp;
|
|
last = tmp;
|
|
}
|
|
/* ----------------- */
|
|
|
|
/* check for the closing parenthesis */
|
|
if (!MATCH(awk,TOK_RPAREN))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ERPAREN);
|
|
goto oops;
|
|
}
|
|
|
|
/* remember the sequence number of the left parenthesis
|
|
* that' been just closed by the matching right parenthesis */
|
|
awk->parse.lparen_last_closed = opening_lparen_seq;
|
|
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
/* check if it is a chained node */
|
|
if (nde->next)
|
|
{
|
|
/* if so, it is an expression group */
|
|
/* (expr1, expr2, expr2) */
|
|
|
|
qse_awk_nde_grp_t* tmp;
|
|
|
|
if ((awk->parse.id.stmt != TOK_PRINT &&
|
|
awk->parse.id.stmt != TOK_PRINTF) ||
|
|
awk->parse.depth.expr != 1)
|
|
{
|
|
if (!(awk->opt.trait & QSE_AWK_TOLERANT) &&
|
|
!MATCH(awk,TOK_IN))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_EKWIN);
|
|
goto oops;
|
|
}
|
|
}
|
|
|
|
tmp = (qse_awk_nde_grp_t*) qse_awk_callocmem (
|
|
awk, QSE_SIZEOF(qse_awk_nde_grp_t));
|
|
if (tmp == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
goto oops;
|
|
}
|
|
|
|
tmp->type = QSE_AWK_NDE_GRP;
|
|
tmp->loc = *xloc;
|
|
tmp->body = nde;
|
|
|
|
nde = (qse_awk_nde_t*)tmp;
|
|
}
|
|
/* ----------------- */
|
|
|
|
return nde;
|
|
|
|
oops:
|
|
if (nde) qse_awk_clrpt (awk, nde);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_primary_getline (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
/* parse the statement-level getline.
|
|
* getline after the pipe symbols(|,||) is parsed
|
|
* by parse_primary().
|
|
*/
|
|
|
|
qse_awk_nde_getline_t* nde;
|
|
qse_awk_loc_t ploc;
|
|
|
|
nde = (qse_awk_nde_getline_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL) goto oops;
|
|
|
|
nde->type = QSE_AWK_NDE_GETLINE;
|
|
nde->loc = *xloc;
|
|
nde->in_type = QSE_AWK_IN_CONSOLE;
|
|
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
if (MATCH(awk,TOK_IDENT) || MATCH(awk,TOK_DOLLAR))
|
|
{
|
|
/* getline var
|
|
* getline $XXX */
|
|
|
|
if ((awk->opt.trait & QSE_AWK_BLANKCONCAT) && MATCH(awk,TOK_IDENT))
|
|
{
|
|
/* i need to perform some precheck on if the identifier is
|
|
* really a variable */
|
|
if (preget_token(awk) <= -1) goto oops;
|
|
|
|
if (awk->ntok.type == TOK_DBLCOLON) goto novar;
|
|
if (awk->ntok.type == TOK_LPAREN)
|
|
{
|
|
if (awk->ntok.loc.line == awk->tok.loc.line &&
|
|
awk->ntok.loc.colm == awk->tok.loc.colm + QSE_STR_LEN(awk->tok.name))
|
|
{
|
|
/* it's in the form of a function call since
|
|
* there is no spaces between the identifier
|
|
* and the left parenthesis. */
|
|
goto novar;
|
|
}
|
|
}
|
|
|
|
if (isfnname (awk, QSE_STR_XSTR(awk->tok.name)) != FNTYPE_UNKNOWN) goto novar;
|
|
}
|
|
|
|
ploc = awk->tok.loc;
|
|
nde->var = parse_primary (awk, &ploc);
|
|
if (nde->var == QSE_NULL) goto oops;
|
|
|
|
if (!is_var(nde->var) && nde->var->type != QSE_AWK_NDE_POS)
|
|
{
|
|
/* this is 'getline' followed by an expression probably.
|
|
* getline a()
|
|
* getline sys::WNOHANG
|
|
*/
|
|
SETERR_LOC (awk, QSE_AWK_EBADARG, &ploc);
|
|
goto oops;
|
|
}
|
|
}
|
|
|
|
novar:
|
|
if (MATCH(awk, TOK_LT))
|
|
{
|
|
/* getline [var] < file */
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
ploc = awk->tok.loc;
|
|
/* TODO: is this correct? */
|
|
/*nde->in = parse_expr_withdc (awk, &ploc);*/
|
|
nde->in = parse_primary (awk, &ploc);
|
|
if (nde->in == QSE_NULL) goto oops;
|
|
|
|
nde->in_type = QSE_AWK_IN_FILE;
|
|
}
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
|
|
oops:
|
|
if (nde)
|
|
{
|
|
if (nde->in) qse_awk_clrpt (awk, nde->in);
|
|
if (nde->var) qse_awk_clrpt (awk, nde->var);
|
|
QSE_AWK_FREE (awk, nde);
|
|
}
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_primary_nopipe (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
switch (awk->tok.type)
|
|
{
|
|
case TOK_IDENT:
|
|
return parse_primary_ident (awk, xloc);
|
|
|
|
case TOK_INT:
|
|
return parse_primary_int (awk, xloc);
|
|
|
|
case TOK_FLT:
|
|
return parse_primary_flt (awk, xloc);
|
|
|
|
case TOK_STR:
|
|
return parse_primary_str (awk, xloc);
|
|
|
|
case TOK_DIV:
|
|
case TOK_DIV_ASSN:
|
|
return parse_primary_rex (awk, xloc);
|
|
|
|
case TOK_DOLLAR:
|
|
return parse_primary_positional (awk, xloc);
|
|
|
|
case TOK_LPAREN:
|
|
return parse_primary_lparen (awk, xloc);
|
|
|
|
case TOK_GETLINE:
|
|
return parse_primary_getline (awk, xloc);
|
|
|
|
default:
|
|
/* in the tolerant mode, we treat print and printf
|
|
* as a function like getline */
|
|
if ((awk->opt.trait & QSE_AWK_TOLERANT) &&
|
|
(MATCH(awk,TOK_PRINT) || MATCH(awk,TOK_PRINTF)))
|
|
{
|
|
if (get_token(awk) <= -1) return QSE_NULL;
|
|
return parse_print (awk, xloc);
|
|
}
|
|
|
|
/* valid expression introducer is expected */
|
|
if (MATCH(awk,TOK_NEWLINE))
|
|
{
|
|
SETERR_ARG_LOC (
|
|
awk, QSE_AWK_EEXPRNR,
|
|
QSE_STR_PTR(awk->ptok.name),
|
|
QSE_STR_LEN(awk->ptok.name),
|
|
&awk->ptok.loc
|
|
);
|
|
}
|
|
else
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_EEXPRNR);
|
|
}
|
|
return QSE_NULL;
|
|
}
|
|
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_primary (qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_t* left;
|
|
qse_awk_nde_getline_t* nde;
|
|
qse_awk_nde_t* var = QSE_NULL;
|
|
qse_awk_loc_t ploc;
|
|
|
|
left = parse_primary_nopipe (awk, xloc);
|
|
if (left == QSE_NULL) goto oops;
|
|
|
|
/* handle the piping part */
|
|
do
|
|
{
|
|
int intype = -1;
|
|
|
|
if (awk->opt.trait & QSE_AWK_RIO)
|
|
{
|
|
if (MATCH(awk,TOK_BOR))
|
|
{
|
|
intype = QSE_AWK_IN_PIPE;
|
|
}
|
|
else if (MATCH(awk,TOK_LOR) &&
|
|
(awk->opt.trait & QSE_AWK_RWPIPE))
|
|
{
|
|
intype = QSE_AWK_IN_RWPIPE;
|
|
}
|
|
}
|
|
|
|
if (intype == -1) break;
|
|
|
|
if (preget_token(awk) <= -1) goto oops;
|
|
|
|
if (awk->ntok.type != TOK_GETLINE) break;
|
|
|
|
/* consume ntok('getline') */
|
|
get_token (awk);
|
|
|
|
/* get the next token */
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
/* TODO: is this correct? */
|
|
if (MATCH(awk,TOK_IDENT) || MATCH(awk,TOK_DOLLAR))
|
|
{
|
|
/* command | getline var
|
|
* command || getline var */
|
|
|
|
if ((awk->opt.trait & QSE_AWK_BLANKCONCAT) && MATCH(awk,TOK_IDENT))
|
|
{
|
|
/* i need to perform some precheck on if the identifier is
|
|
* really a variable */
|
|
if (preget_token(awk) <= -1) goto oops;
|
|
|
|
if (awk->ntok.type == TOK_DBLCOLON) goto novar;
|
|
if (awk->ntok.type == TOK_LPAREN)
|
|
{
|
|
if (awk->ntok.loc.line == awk->tok.loc.line &&
|
|
awk->ntok.loc.colm == awk->tok.loc.colm + QSE_STR_LEN(awk->tok.name))
|
|
{
|
|
/* it's in the form of a function call since
|
|
* there is no spaces between the identifier
|
|
* and the left parenthesis. */
|
|
goto novar;
|
|
}
|
|
}
|
|
|
|
if (isfnname (awk, QSE_STR_XSTR(awk->tok.name)) != FNTYPE_UNKNOWN) goto novar;
|
|
}
|
|
|
|
ploc = awk->tok.loc;
|
|
var = parse_primary (awk, &ploc);
|
|
if (var == QSE_NULL) goto oops;
|
|
|
|
if (!is_var(var) && var->type != QSE_AWK_NDE_POS)
|
|
{
|
|
/* fucntion a() {}
|
|
* print ("ls -laF" | getline a()) */
|
|
SETERR_LOC (awk, QSE_AWK_EBADARG, &ploc);
|
|
goto oops;
|
|
}
|
|
}
|
|
|
|
novar:
|
|
nde = (qse_awk_nde_getline_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
goto oops;
|
|
}
|
|
|
|
nde->type = QSE_AWK_NDE_GETLINE;
|
|
nde->loc = *xloc;
|
|
nde->var = var;
|
|
nde->in_type = intype;
|
|
nde->in = left;
|
|
|
|
left = (qse_awk_nde_t*)nde;
|
|
var = QSE_NULL;
|
|
}
|
|
while (1);
|
|
|
|
return left;
|
|
|
|
oops:
|
|
if (var) qse_awk_clrpt (awk, var);
|
|
qse_awk_clrpt (awk, left);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_variable (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc, qse_awk_nde_type_t type,
|
|
const qse_cstr_t* name, qse_size_t idxa)
|
|
{
|
|
qse_awk_nde_var_t* nde;
|
|
|
|
if (!(awk->opt.trait & QSE_AWK_BLANKCONCAT))
|
|
{
|
|
/* if concatenation by blanks is not allowed, the explicit
|
|
* concatenation operator(%%) must be used. so it is obvious
|
|
* that it is a function call, which is illegal for a variable.
|
|
* if implicit, "var_xxx (1)" may be concatenation of
|
|
* the value of var_xxx and 1.
|
|
*/
|
|
if (MATCH(awk,TOK_LPAREN))
|
|
{
|
|
/* a variable is not a function */
|
|
SETERR_ARG_LOC (
|
|
awk, QSE_AWK_EFUNNAM,
|
|
name->ptr, name->len, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
}
|
|
|
|
nde = (qse_awk_nde_var_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde->type = type;
|
|
nde->loc = *xloc;
|
|
/*nde->id.name.ptr = QSE_NULL;*/
|
|
nde->id.name.ptr = name->ptr;
|
|
nde->id.name.len = name->len;
|
|
nde->id.idxa = idxa;
|
|
nde->idx = QSE_NULL;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
}
|
|
|
|
static int dup_ident_and_get_next (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc, qse_cstr_t* name, int max)
|
|
{
|
|
int nsegs = 0;
|
|
|
|
QSE_ASSERT (MATCH(awk,TOK_IDENT));
|
|
|
|
do
|
|
{
|
|
name[nsegs].ptr = QSE_STR_PTR(awk->tok.name);
|
|
name[nsegs].len = QSE_STR_LEN(awk->tok.name);
|
|
|
|
/* duplicate the identifier */
|
|
name[nsegs].ptr = qse_strxdup (name[nsegs].ptr, name[nsegs].len, awk->mmgr);
|
|
if (name[nsegs].ptr == QSE_NULL)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ENOMEM, xloc);
|
|
goto oops;
|
|
}
|
|
|
|
nsegs++;
|
|
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
if (!MATCH(awk,TOK_DBLCOLON)) break;
|
|
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
/* the identifier after ::
|
|
* allow reserved words as well since i view the whole name(mod::ident)
|
|
* as one segment. however, i don't want the identifier part to begin
|
|
* with @. some extended keywords begin with @ like @include.
|
|
* TOK_XGLOBAL to TOK_XRESET are excuded from the check for that reason. */
|
|
if (!MATCH(awk, TOK_IDENT) && !(MATCH_RANGE(awk, TOK_BEGIN, TOK_GETLINE)))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_EIDENT);
|
|
goto oops;
|
|
}
|
|
|
|
if (nsegs >= max)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ESEGTM, xloc);
|
|
goto oops;
|
|
}
|
|
}
|
|
while (1);
|
|
|
|
return nsegs;
|
|
|
|
oops:
|
|
while (nsegs > 0) QSE_AWK_FREE (awk, name[--nsegs].ptr);
|
|
return -1;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_primary_ident_noseg (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc, const qse_cstr_t* name)
|
|
{
|
|
qse_awk_fnc_t* fnc;
|
|
qse_size_t idxa;
|
|
qse_awk_nde_t* nde = QSE_NULL;
|
|
|
|
/* check if name is an intrinsic function name */
|
|
fnc = qse_awk_findfnc (awk, name);
|
|
if (fnc)
|
|
{
|
|
if (MATCH(awk,TOK_LPAREN) || fnc->dfl0)
|
|
{
|
|
if (fnc->spec.arg.min > fnc->spec.arg.max)
|
|
{
|
|
/* this intrinsic function is located in the specificed module.
|
|
* convert the function call to a module call. i do this to
|
|
* exclude some instrinsic functions from the main engine.
|
|
* e.g) sin -> math::sin
|
|
* cos -> math::cos
|
|
*/
|
|
qse_cstr_t segs[2];
|
|
|
|
QSE_ASSERT (fnc->spec.arg.spec != QSE_NULL);
|
|
|
|
segs[0].ptr = (qse_char_t*)fnc->spec.arg.spec;
|
|
segs[0].len = qse_strlen(fnc->spec.arg.spec);
|
|
segs[1] = *name;
|
|
|
|
return parse_primary_ident_segs (awk, xloc, name, segs, 2);
|
|
}
|
|
|
|
/* fnc->dfl0 means that the function can be called without ().
|
|
* i.e. length */
|
|
nde = parse_fncall (awk, name, fnc, xloc, (fnc->dfl0? 1: 0));
|
|
}
|
|
else
|
|
{
|
|
/* an intrinsic function should be in the form
|
|
* of the function call */
|
|
SETERR_TOK (awk, QSE_AWK_ELPAREN);
|
|
}
|
|
}
|
|
/* now we know that name is a normal identifier. */
|
|
else if (MATCH(awk,TOK_LBRACK))
|
|
{
|
|
nde = parse_hashidx (awk, name, xloc);
|
|
}
|
|
else if ((idxa = qse_lda_rsearch (awk->parse.lcls, QSE_LDA_SIZE(awk->parse.lcls), name->ptr, name->len)) != QSE_LDA_NIL)
|
|
{
|
|
/* local variable */
|
|
nde = parse_variable (awk, xloc, QSE_AWK_NDE_LCL, name, idxa);
|
|
}
|
|
else if ((idxa = qse_lda_search (awk->parse.params, 0, name->ptr, name->len)) != QSE_LDA_NIL)
|
|
{
|
|
/* parameter */
|
|
nde = parse_variable (awk, xloc, QSE_AWK_NDE_ARG, name, idxa);
|
|
}
|
|
else if ((idxa = get_global (awk, name)) != QSE_LDA_NIL)
|
|
{
|
|
/* global variable */
|
|
nde = parse_variable (awk, xloc, QSE_AWK_NDE_GBL, name, idxa);
|
|
}
|
|
else
|
|
{
|
|
int fntype;
|
|
|
|
fntype = isfunname (awk, name);
|
|
|
|
if (fntype)
|
|
{
|
|
QSE_ASSERT (fntype == FNTYPE_FUN);
|
|
|
|
if (MATCH(awk,TOK_LPAREN))
|
|
{
|
|
/* must be a function name */
|
|
QSE_ASSERT (qse_htb_search (
|
|
awk->parse.named, name->ptr, name->len) == QSE_NULL);
|
|
|
|
nde = parse_fncall (awk, name, QSE_NULL, xloc, 0);
|
|
}
|
|
else
|
|
{
|
|
/* function name appeared without () */
|
|
SETERR_ARG_LOC (
|
|
awk, QSE_AWK_EFUNRED,
|
|
name->ptr, name->len, xloc
|
|
);
|
|
}
|
|
}
|
|
else if (awk->opt.trait & QSE_AWK_IMPLICIT)
|
|
{
|
|
/* if the name is followed by ( without spaces,
|
|
* it's considered a function call though the name
|
|
* has not been seen/resolved.
|
|
*
|
|
* it is a function call so long as it's followed
|
|
* by a left parenthesis if concatenation by blanks
|
|
* is not allowed.
|
|
*/
|
|
|
|
if (MATCH(awk,TOK_LPAREN) &&
|
|
(!(awk->opt.trait & QSE_AWK_BLANKCONCAT) ||
|
|
(awk->tok.loc.line == xloc->line &&
|
|
awk->tok.loc.colm == xloc->colm + name->len)))
|
|
{
|
|
/* it is a function call to an undefined function yet */
|
|
|
|
if (qse_htb_search (
|
|
awk->parse.named, name->ptr, name->len) != QSE_NULL)
|
|
{
|
|
/* the function call conflicts with a named variable */
|
|
SETERR_ARG_LOC (
|
|
awk, QSE_AWK_EVARRED,
|
|
name->ptr, name->len, xloc
|
|
);
|
|
}
|
|
else
|
|
{
|
|
nde = parse_fncall (awk, name, QSE_NULL, xloc, 0);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
qse_awk_nde_var_t* tmp;
|
|
|
|
/* if there is a space between the name and the left parenthesis
|
|
* while the name is not resolved to anything, we treat the space
|
|
* as concatention by blanks. so we handle the name as a named
|
|
* variable. */
|
|
tmp = (qse_awk_nde_var_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*tmp));
|
|
if (tmp == QSE_NULL) ADJERR_LOC (awk, xloc);
|
|
else
|
|
{
|
|
/* collect unique instances of a named variable
|
|
* for reference */
|
|
if (qse_htb_upsert (
|
|
awk->parse.named,
|
|
name->ptr, name->len, QSE_NULL, 0) == QSE_NULL)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ENOMEM, xloc);
|
|
QSE_AWK_FREE (awk, tmp);
|
|
}
|
|
else
|
|
{
|
|
tmp->type = QSE_AWK_NDE_NAMED;
|
|
tmp->loc = *xloc;
|
|
tmp->id.name.ptr = name->ptr;
|
|
tmp->id.name.len = name->len;
|
|
tmp->id.idxa = (qse_size_t)-1;
|
|
tmp->idx = QSE_NULL;
|
|
|
|
nde = (qse_awk_nde_t*)tmp;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (MATCH(awk,TOK_LPAREN))
|
|
{
|
|
/* it is a function call as the name is followed
|
|
* by ( and implicit variables are disabled. */
|
|
nde = parse_fncall (awk, name, QSE_NULL, xloc, 0);
|
|
}
|
|
else
|
|
{
|
|
/* undefined variable */
|
|
SETERR_ARG_LOC (awk, QSE_AWK_EUNDEF, name->ptr, name->len, xloc);
|
|
}
|
|
}
|
|
}
|
|
|
|
return nde;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_primary_ident_segs (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc, const qse_cstr_t* full,
|
|
const qse_cstr_t segs[], int nsegs)
|
|
{
|
|
/* parse xxx::yyy */
|
|
|
|
qse_awk_nde_t* nde = QSE_NULL;
|
|
qse_awk_mod_t* mod;
|
|
qse_awk_mod_sym_t sym;
|
|
qse_awk_fnc_t fnc;
|
|
|
|
CLRERR (awk);
|
|
mod = query_module (awk, segs, nsegs, &sym);
|
|
if (mod == QSE_NULL)
|
|
{
|
|
if (ISNOERR(awk)) SETERR_LOC (awk, QSE_AWK_ENOSUP, xloc);
|
|
else ADJERR_LOC (awk, xloc);
|
|
}
|
|
else
|
|
{
|
|
switch (sym.type)
|
|
{
|
|
case QSE_AWK_MOD_FNC:
|
|
if ((awk->opt.trait & sym.u.fnc.trait) != sym.u.fnc.trait)
|
|
{
|
|
SETERR_ARG_LOC (awk, QSE_AWK_EUNDEF, full->ptr, full->len, xloc);
|
|
break;
|
|
}
|
|
|
|
if (MATCH(awk,TOK_LPAREN))
|
|
{
|
|
QSE_MEMSET (&fnc, 0, QSE_SIZEOF(fnc));
|
|
fnc.name.ptr = full->ptr;
|
|
fnc.name.len = full->len;
|
|
fnc.spec = sym.u.fnc;
|
|
fnc.mod = mod;
|
|
|
|
nde = parse_fncall (awk, full, &fnc, xloc, 0);
|
|
}
|
|
else
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ELPAREN);
|
|
}
|
|
break;
|
|
|
|
case QSE_AWK_MOD_INT:
|
|
nde = new_int_node (awk, sym.u.in.val, xloc);
|
|
/* i don't remember the symbol in the original form */
|
|
break;
|
|
|
|
case QSE_AWK_MOD_FLT:
|
|
nde = new_flt_node (awk, sym.u.flt.val, xloc);
|
|
/* i don't remember the symbol in the original form */
|
|
break;
|
|
|
|
default:
|
|
/* TODO: support MOD_VAR */
|
|
SETERR_ARG_LOC (awk, QSE_AWK_EUNDEF, full->ptr, full->len, xloc);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return nde;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_primary_ident (
|
|
qse_awk_t* awk, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_t* nde = QSE_NULL;
|
|
qse_cstr_t name[2]; /* TODO: support more than 2 segments??? */
|
|
int nsegs;
|
|
|
|
QSE_ASSERT (MATCH(awk,TOK_IDENT));
|
|
|
|
nsegs = dup_ident_and_get_next (awk, xloc, name, QSE_COUNTOF(name));
|
|
if (nsegs <= -1) return QSE_NULL;
|
|
|
|
if (nsegs <= 1)
|
|
{
|
|
nde = parse_primary_ident_noseg (awk, xloc, &name[0]);
|
|
if (!nde) QSE_AWK_FREE (awk, name[0].ptr);
|
|
}
|
|
else
|
|
{
|
|
qse_cstr_t full; /* full name including :: */
|
|
qse_size_t capa;
|
|
int i;
|
|
|
|
for (capa = 0, i = 0; i < nsegs; i++) capa += name[i].len + 2; /* +2 for :: */
|
|
full.ptr = qse_awk_allocmem (awk, QSE_SIZEOF(*full.ptr) * (capa + 1));
|
|
if (full.ptr)
|
|
{
|
|
capa = qse_strncpy (&full.ptr[0], name[0].ptr, name[0].len);
|
|
for (i = 1; i < nsegs; i++)
|
|
{
|
|
capa += qse_strcpy (&full.ptr[capa], QSE_T("::"));
|
|
capa += qse_strncpy (&full.ptr[capa], name[i].ptr, name[i].len);
|
|
}
|
|
full.ptr[capa] = QSE_T('\0');
|
|
full.len = capa;
|
|
|
|
nde = parse_primary_ident_segs (awk, xloc, &full, name, nsegs);
|
|
if (!nde || nde->type != QSE_AWK_NDE_FNC)
|
|
{
|
|
/* the FNC node takes the full name but other
|
|
* nodes don't. so i need to free it. i know it's ugly. */
|
|
QSE_MMGR_FREE (awk->mmgr, full.ptr);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* error number is set in qse_awk_allocmem */
|
|
ADJERR_LOC (awk, xloc);
|
|
}
|
|
|
|
/* i don't need the name segments */
|
|
while (nsegs > 0) QSE_AWK_FREE (awk, name[--nsegs].ptr);
|
|
}
|
|
|
|
return nde;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_hashidx (
|
|
qse_awk_t* awk, const qse_cstr_t* name, const qse_awk_loc_t* xloc)
|
|
{
|
|
qse_awk_nde_t* idx, * tmp, * last;
|
|
qse_awk_nde_var_t* nde;
|
|
qse_size_t idxa;
|
|
|
|
idx = QSE_NULL;
|
|
last = QSE_NULL;
|
|
|
|
do
|
|
{
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
if (idx != QSE_NULL) qse_awk_clrpt (awk, idx);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
{
|
|
qse_awk_loc_t eloc = awk->tok.loc;
|
|
tmp = parse_expr_withdc (awk, &eloc);
|
|
}
|
|
if (tmp == QSE_NULL)
|
|
{
|
|
if (idx != QSE_NULL) qse_awk_clrpt (awk, idx);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
if (idx == QSE_NULL)
|
|
{
|
|
QSE_ASSERT (last == QSE_NULL);
|
|
idx = tmp; last = tmp;
|
|
}
|
|
else
|
|
{
|
|
last->next = tmp;
|
|
last = tmp;
|
|
}
|
|
}
|
|
while (MATCH(awk,TOK_COMMA));
|
|
|
|
QSE_ASSERT (idx != QSE_NULL);
|
|
|
|
if (!MATCH(awk,TOK_RBRACK))
|
|
{
|
|
qse_awk_clrpt (awk, idx);
|
|
SETERR_TOK (awk, QSE_AWK_ERBRACK);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
if (get_token(awk) <= -1)
|
|
{
|
|
qse_awk_clrpt (awk, idx);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
nde = (qse_awk_nde_var_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*nde));
|
|
if (nde == QSE_NULL)
|
|
{
|
|
qse_awk_clrpt (awk, idx);
|
|
ADJERR_LOC (awk, xloc);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
/* search the local variable list */
|
|
idxa = qse_lda_rsearch (
|
|
awk->parse.lcls,
|
|
QSE_LDA_SIZE(awk->parse.lcls),
|
|
name->ptr, name->len
|
|
);
|
|
if (idxa != QSE_LDA_NIL)
|
|
{
|
|
nde->type = QSE_AWK_NDE_LCLIDX;
|
|
nde->loc = *xloc;
|
|
/*nde->id.name = QSE_NULL; */
|
|
nde->id.name.ptr = name->ptr;
|
|
nde->id.name.len = name->len;
|
|
nde->id.idxa = idxa;
|
|
nde->idx = idx;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
}
|
|
|
|
/* search the parameter name list */
|
|
idxa = qse_lda_search (awk->parse.params, 0, name->ptr, name->len);
|
|
if (idxa != QSE_LDA_NIL)
|
|
{
|
|
nde->type = QSE_AWK_NDE_ARGIDX;
|
|
nde->loc = *xloc;
|
|
/*nde->id.name = QSE_NULL; */
|
|
nde->id.name.ptr = name->ptr;
|
|
nde->id.name.len = name->len;
|
|
nde->id.idxa = idxa;
|
|
nde->idx = idx;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
}
|
|
|
|
/* gets the global variable index */
|
|
idxa = get_global (awk, name);
|
|
if (idxa != QSE_LDA_NIL)
|
|
{
|
|
nde->type = QSE_AWK_NDE_GBLIDX;
|
|
nde->loc = *xloc;
|
|
/*nde->id.name = QSE_NULL;*/
|
|
nde->id.name.ptr = name->ptr;
|
|
nde->id.name.len = name->len;
|
|
nde->id.idxa = idxa;
|
|
nde->idx = idx;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
}
|
|
|
|
if (awk->opt.trait & QSE_AWK_IMPLICIT)
|
|
{
|
|
int fnname = isfnname (awk, name);
|
|
switch (fnname)
|
|
{
|
|
case FNTYPE_FNC:
|
|
SETERR_ARG_LOC (
|
|
awk, QSE_AWK_EFNCRED, name->ptr, name->len, xloc);
|
|
goto exit_func;
|
|
|
|
case FNTYPE_FUN:
|
|
SETERR_ARG_LOC (
|
|
awk, QSE_AWK_EFUNRED, name->ptr, name->len, xloc);
|
|
goto exit_func;
|
|
}
|
|
|
|
QSE_ASSERT (fnname == 0);
|
|
|
|
nde->type = QSE_AWK_NDE_NAMEDIDX;
|
|
nde->loc = *xloc;
|
|
nde->id.name.ptr = name->ptr;
|
|
nde->id.name.len = name->len;
|
|
nde->id.idxa = (qse_size_t)-1;
|
|
nde->idx = idx;
|
|
|
|
return (qse_awk_nde_t*)nde;
|
|
}
|
|
|
|
/* undefined variable */
|
|
SETERR_ARG_LOC (awk, QSE_AWK_EUNDEF, name->ptr, name->len, xloc);
|
|
|
|
exit_func:
|
|
qse_awk_clrpt (awk, idx);
|
|
QSE_AWK_FREE (awk, nde);
|
|
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static qse_awk_nde_t* parse_fncall (
|
|
qse_awk_t* awk, const qse_cstr_t* name,
|
|
qse_awk_fnc_t* fnc, const qse_awk_loc_t* xloc, int noarg)
|
|
{
|
|
qse_awk_nde_t* head, * curr, * nde;
|
|
qse_awk_nde_fncall_t* call;
|
|
qse_size_t nargs;
|
|
qse_awk_loc_t eloc;
|
|
|
|
head = curr = QSE_NULL;
|
|
call = QSE_NULL;
|
|
nargs = 0;
|
|
|
|
if (noarg) goto make_node;
|
|
if (get_token(awk) <= -1) goto oops;
|
|
|
|
if (MATCH(awk,TOK_RPAREN))
|
|
{
|
|
/* no parameters to the function call */
|
|
if (get_token(awk) <= -1) goto oops;
|
|
}
|
|
else
|
|
{
|
|
/* parse function parameters */
|
|
|
|
while (1)
|
|
{
|
|
eloc = awk->tok.loc;
|
|
nde = parse_expr_withdc (awk, &eloc);
|
|
if (nde == QSE_NULL) goto oops;
|
|
|
|
if (head == QSE_NULL) head = nde;
|
|
else curr->next = nde;
|
|
curr = nde;
|
|
|
|
nargs++;
|
|
|
|
if (MATCH(awk,TOK_RPAREN))
|
|
{
|
|
if (get_token(awk) <= -1) goto oops;
|
|
break;
|
|
}
|
|
|
|
if (!MATCH(awk,TOK_COMMA))
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_ECOMMA);
|
|
goto oops;
|
|
}
|
|
|
|
do
|
|
{
|
|
if (get_token(awk) <= -1) goto oops;
|
|
}
|
|
while (MATCH(awk,TOK_NEWLINE));
|
|
}
|
|
|
|
}
|
|
|
|
make_node:
|
|
call = (qse_awk_nde_fncall_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*call));
|
|
if (call == QSE_NULL)
|
|
{
|
|
ADJERR_LOC (awk, xloc);
|
|
goto oops;
|
|
}
|
|
|
|
if (fnc)
|
|
{
|
|
call->type = QSE_AWK_NDE_FNC;
|
|
call->loc = *xloc;
|
|
|
|
call->u.fnc.info.name.ptr = name->ptr;
|
|
call->u.fnc.info.name.len = name->len;
|
|
call->u.fnc.info.mod = fnc->mod;
|
|
call->u.fnc.spec = fnc->spec;
|
|
|
|
call->args = head;
|
|
call->nargs = nargs;
|
|
|
|
if (nargs > call->u.fnc.spec.arg.max)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_EARGTM, xloc);
|
|
goto oops;
|
|
}
|
|
else if (nargs < call->u.fnc.spec.arg.min)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_EARGTF, xloc);
|
|
goto oops;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
call->type = QSE_AWK_NDE_FUN;
|
|
call->loc = *xloc;
|
|
call->u.fun.name.ptr = name->ptr;
|
|
call->u.fun.name.len = name->len;
|
|
call->args = head;
|
|
call->nargs = nargs;
|
|
|
|
/* store a non-builtin function call into the awk->parse.funs
|
|
* table */
|
|
if (qse_htb_upsert (
|
|
awk->parse.funs, name->ptr, name->len, call, 0) == QSE_NULL)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ENOMEM, xloc);
|
|
goto oops;
|
|
}
|
|
}
|
|
|
|
return (qse_awk_nde_t*)call;
|
|
|
|
oops:
|
|
if (call) QSE_AWK_FREE (awk, call);
|
|
if (head) qse_awk_clrpt (awk, head);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
static int get_number (qse_awk_t* awk, qse_awk_tok_t* tok)
|
|
{
|
|
qse_cint_t c;
|
|
|
|
QSE_ASSERT (QSE_STR_LEN(tok->name) == 0);
|
|
SET_TOKEN_TYPE (awk, tok, TOK_INT);
|
|
|
|
c = awk->sio.last.c;
|
|
|
|
if (c == QSE_T('0'))
|
|
{
|
|
ADD_TOKEN_CHAR (awk, tok, c);
|
|
GET_CHAR_TO (awk, c);
|
|
|
|
if (c == QSE_T('x') || c == QSE_T('X'))
|
|
{
|
|
/* hexadecimal number */
|
|
do
|
|
{
|
|
ADD_TOKEN_CHAR (awk, tok, c);
|
|
GET_CHAR_TO (awk, c);
|
|
}
|
|
while (QSE_AWK_ISXDIGIT (awk, c));
|
|
|
|
return 0;
|
|
}
|
|
else if (c == QSE_T('b') || c == QSE_T('B'))
|
|
{
|
|
/* binary number */
|
|
do
|
|
{
|
|
ADD_TOKEN_CHAR (awk, tok, c);
|
|
GET_CHAR_TO (awk, c);
|
|
}
|
|
while (c == QSE_T('0') || c == QSE_T('1'));
|
|
|
|
return 0;
|
|
}
|
|
else if (c != '.')
|
|
{
|
|
/* octal number */
|
|
while (c >= QSE_T('0') && c <= QSE_T('7'))
|
|
{
|
|
ADD_TOKEN_CHAR (awk, tok, c);
|
|
GET_CHAR_TO (awk, c);
|
|
}
|
|
|
|
if (c == QSE_T('8') || c == QSE_T('9'))
|
|
{
|
|
qse_char_t cc = (qse_char_t)c;
|
|
SETERR_ARG_LOC (
|
|
awk, QSE_AWK_ELXDIG,
|
|
&cc, 1, &awk->tok.loc);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
while (QSE_AWK_ISDIGIT (awk, c))
|
|
{
|
|
ADD_TOKEN_CHAR (awk, tok, c);
|
|
GET_CHAR_TO (awk, c);
|
|
}
|
|
|
|
if (c == QSE_T('.'))
|
|
{
|
|
/* floating-point number */
|
|
SET_TOKEN_TYPE (awk, tok, TOK_FLT);
|
|
|
|
ADD_TOKEN_CHAR (awk, tok, c);
|
|
GET_CHAR_TO (awk, c);
|
|
|
|
while (QSE_AWK_ISDIGIT (awk, c))
|
|
{
|
|
ADD_TOKEN_CHAR (awk, tok, c);
|
|
GET_CHAR_TO (awk, c);
|
|
}
|
|
}
|
|
|
|
if (c == QSE_T('E') || c == QSE_T('e'))
|
|
{
|
|
SET_TOKEN_TYPE (awk, tok, TOK_FLT);
|
|
|
|
ADD_TOKEN_CHAR (awk, tok, c);
|
|
GET_CHAR_TO (awk, c);
|
|
|
|
if (c == QSE_T('+') || c == QSE_T('-'))
|
|
{
|
|
ADD_TOKEN_CHAR (awk, tok, c);
|
|
GET_CHAR_TO (awk, c);
|
|
}
|
|
|
|
while (QSE_AWK_ISDIGIT (awk, c))
|
|
{
|
|
ADD_TOKEN_CHAR (awk, tok, c);
|
|
GET_CHAR_TO (awk, c);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int get_string (
|
|
qse_awk_t* awk, qse_char_t end_char,
|
|
qse_char_t esc_char, int keep_esc_char,
|
|
qse_size_t preescaped, qse_awk_tok_t* tok)
|
|
{
|
|
qse_cint_t c;
|
|
qse_size_t escaped = preescaped;
|
|
qse_size_t digit_count = 0;
|
|
qse_cint_t c_acc = 0;
|
|
|
|
while (1)
|
|
{
|
|
GET_CHAR_TO (awk, c);
|
|
|
|
if (c == QSE_CHAR_EOF)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ESTRNC, &awk->tok.loc);
|
|
return -1;
|
|
}
|
|
|
|
if (escaped == 3)
|
|
{
|
|
if (c >= QSE_T('0') && c <= QSE_T('7'))
|
|
{
|
|
c_acc = c_acc * 8 + c - QSE_T('0');
|
|
digit_count++;
|
|
if (digit_count >= escaped)
|
|
{
|
|
/* should i limit the max to 0xFF/0377?
|
|
* if (c_acc > 0377) c_acc = 0377;*/
|
|
ADD_TOKEN_CHAR (awk, tok, c_acc);
|
|
escaped = 0;
|
|
}
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
ADD_TOKEN_CHAR (awk, tok, c_acc);
|
|
escaped = 0;
|
|
}
|
|
}
|
|
else if (escaped == QSE_TYPE_MAX(qse_size_t) || escaped == 4 || escaped == 8)
|
|
{
|
|
if (c >= QSE_T('0') && c <= QSE_T('9'))
|
|
{
|
|
c_acc = c_acc * 16 + c - QSE_T('0');
|
|
digit_count++;
|
|
if (digit_count >= escaped)
|
|
{
|
|
ADD_TOKEN_CHAR (awk, tok, c_acc);
|
|
escaped = 0;
|
|
}
|
|
continue;
|
|
}
|
|
else if (c >= QSE_T('A') && c <= QSE_T('F'))
|
|
{
|
|
c_acc = c_acc * 16 + c - QSE_T('A') + 10;
|
|
digit_count++;
|
|
if (digit_count >= escaped)
|
|
{
|
|
ADD_TOKEN_CHAR (awk, tok, c_acc);
|
|
escaped = 0;
|
|
}
|
|
continue;
|
|
}
|
|
else if (c >= QSE_T('a') && c <= QSE_T('f'))
|
|
{
|
|
c_acc = c_acc * 16 + c - QSE_T('a') + 10;
|
|
digit_count++;
|
|
if (digit_count >= escaped)
|
|
{
|
|
ADD_TOKEN_CHAR (awk, tok, c_acc);
|
|
escaped = 0;
|
|
}
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
qse_char_t rc;
|
|
|
|
rc = (escaped == QSE_TYPE_MAX(qse_size_t))? QSE_T('x'):
|
|
(escaped == 4)? QSE_T('u'): QSE_T('U');
|
|
if (digit_count == 0)
|
|
ADD_TOKEN_CHAR (awk, tok, rc);
|
|
else ADD_TOKEN_CHAR (awk, tok, c_acc);
|
|
|
|
escaped = 0;
|
|
}
|
|
}
|
|
|
|
if (escaped == 0 && c == end_char)
|
|
{
|
|
/* terminating quote */
|
|
/*GET_CHAR_TO (awk, c);*/
|
|
GET_CHAR (awk);
|
|
break;
|
|
}
|
|
|
|
if (escaped == 0 && c == esc_char)
|
|
{
|
|
escaped = 1;
|
|
continue;
|
|
}
|
|
|
|
if (escaped == 1)
|
|
{
|
|
if (c == QSE_T('n')) c = QSE_T('\n');
|
|
else if (c == QSE_T('r')) c = QSE_T('\r');
|
|
else if (c == QSE_T('t')) c = QSE_T('\t');
|
|
else if (c == QSE_T('f')) c = QSE_T('\f');
|
|
else if (c == QSE_T('b')) c = QSE_T('\b');
|
|
else if (c == QSE_T('v')) c = QSE_T('\v');
|
|
else if (c == QSE_T('a')) c = QSE_T('\a');
|
|
else if (c >= QSE_T('0') && c <= QSE_T('7') && end_char != QSE_T('/'))
|
|
{
|
|
/* i don't support the octal notation for a regular expression.
|
|
* it conflicts with the backreference notation between \1 and \7 inclusive. */
|
|
escaped = 3;
|
|
digit_count = 1;
|
|
c_acc = c - QSE_T('0');
|
|
continue;
|
|
}
|
|
else if (c == QSE_T('x'))
|
|
{
|
|
escaped = QSE_TYPE_MAX(qse_size_t);
|
|
digit_count = 0;
|
|
c_acc = 0;
|
|
continue;
|
|
}
|
|
#if defined(QSE_CHAR_IS_WCHAR)
|
|
else if (c == QSE_T('u') && QSE_SIZEOF(qse_char_t) >= 2)
|
|
{
|
|
escaped = 4;
|
|
digit_count = 0;
|
|
c_acc = 0;
|
|
continue;
|
|
}
|
|
else if (c == QSE_T('U') && QSE_SIZEOF(qse_char_t) >= 4)
|
|
{
|
|
escaped = 8;
|
|
digit_count = 0;
|
|
c_acc = 0;
|
|
continue;
|
|
}
|
|
#endif
|
|
else if (keep_esc_char)
|
|
{
|
|
/* if the following character doesn't compose a proper
|
|
* escape sequence, keep the escape character.
|
|
* an unhandled escape sequence can be handled
|
|
* outside this function since the escape character
|
|
* is preserved.*/
|
|
ADD_TOKEN_CHAR (awk, tok, esc_char);
|
|
}
|
|
|
|
escaped = 0;
|
|
}
|
|
|
|
ADD_TOKEN_CHAR (awk, tok, c);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int get_rexstr (qse_awk_t* awk, qse_awk_tok_t* tok)
|
|
{
|
|
if (awk->sio.last.c == QSE_T('/'))
|
|
{
|
|
/* handle an empty regular expression.
|
|
*
|
|
* this condition is met when the input is //.
|
|
* the first / has been tokenized to TOK_DIV already.
|
|
* if TOK_DIV is seen as a primary, this function is called.
|
|
* as the token buffer has been cleared by the caller and
|
|
* the token type is set to TOK_REX, this function can
|
|
* just return after reading the next character.
|
|
* see parse_primary_rex(). */
|
|
GET_CHAR (awk);
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
qse_size_t preescaped = 0;
|
|
if (awk->sio.last.c == QSE_T('\\'))
|
|
{
|
|
/* for input like /\//, this condition is met.
|
|
* the initial escape character is added when the
|
|
* second charater is handled in get_string() */
|
|
preescaped = 1;
|
|
}
|
|
else
|
|
{
|
|
/* add other initial characters here as get_string()
|
|
* begins with reading the next character */
|
|
ADD_TOKEN_CHAR (awk, tok, awk->sio.last.c);
|
|
}
|
|
return get_string (awk, QSE_T('/'), QSE_T('\\'), 1, preescaped, tok);
|
|
}
|
|
}
|
|
|
|
static int skip_spaces (qse_awk_t* awk)
|
|
{
|
|
qse_cint_t c = awk->sio.last.c;
|
|
|
|
if (awk->opt.trait & QSE_AWK_NEWLINE)
|
|
{
|
|
do
|
|
{
|
|
while (c != QSE_T('\n') && QSE_AWK_ISSPACE(awk,c))
|
|
GET_CHAR_TO (awk, c);
|
|
|
|
if (c == QSE_T('\\'))
|
|
{
|
|
qse_awk_sio_lxc_t bs;
|
|
qse_awk_sio_lxc_t cr;
|
|
int hascr = 0;
|
|
|
|
bs = awk->sio.last;
|
|
GET_CHAR_TO (awk, c);
|
|
if (c == QSE_T('\r'))
|
|
{
|
|
hascr = 1;
|
|
cr = awk->sio.last;
|
|
GET_CHAR_TO (awk, c);
|
|
}
|
|
|
|
if (c == QSE_T('\n'))
|
|
{
|
|
GET_CHAR_TO (awk, c);
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
/* push back the last character */
|
|
unget_char (awk, &awk->sio.last);
|
|
/* push CR if any */
|
|
if (hascr) unget_char (awk, &cr);
|
|
/* restore the orginal backslash */
|
|
awk->sio.last = bs;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
while (1);
|
|
}
|
|
else
|
|
{
|
|
while (QSE_AWK_ISSPACE (awk, c)) GET_CHAR_TO (awk, c);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int skip_comment (qse_awk_t* awk)
|
|
{
|
|
qse_cint_t c = awk->sio.last.c;
|
|
qse_awk_sio_lxc_t lc;
|
|
|
|
if (c == QSE_T('#'))
|
|
{
|
|
/* skip up to \n */
|
|
do { GET_CHAR_TO (awk, c); }
|
|
while (c != QSE_T('\n') && c != QSE_CHAR_EOF);
|
|
|
|
if (!(awk->opt.trait & QSE_AWK_NEWLINE)) GET_CHAR (awk);
|
|
return 1; /* comment by # */
|
|
}
|
|
|
|
/* handle c-style comment */
|
|
if (c != QSE_T('/')) return 0; /* not a comment */
|
|
|
|
/* save the last character */
|
|
lc = awk->sio.last;
|
|
/* read a new character */
|
|
GET_CHAR_TO (awk, c);
|
|
|
|
if (c == QSE_T('*'))
|
|
{
|
|
do
|
|
{
|
|
GET_CHAR_TO (awk, c);
|
|
if (c == QSE_CHAR_EOF)
|
|
{
|
|
qse_awk_loc_t loc;
|
|
loc.line = awk->sio.inp->line;
|
|
loc.colm = awk->sio.inp->colm;
|
|
loc.file = awk->sio.inp->name;
|
|
SETERR_LOC (awk, QSE_AWK_ECMTNC, &loc);
|
|
return -1;
|
|
}
|
|
|
|
if (c == QSE_T('*'))
|
|
{
|
|
GET_CHAR_TO (awk, c);
|
|
if (c == QSE_CHAR_EOF)
|
|
{
|
|
qse_awk_loc_t loc;
|
|
loc.line = awk->sio.inp->line;
|
|
loc.colm = awk->sio.inp->colm;
|
|
loc.file = awk->sio.inp->name;
|
|
SETERR_LOC (awk, QSE_AWK_ECMTNC, &loc);
|
|
return -1;
|
|
}
|
|
|
|
if (c == QSE_T('/'))
|
|
{
|
|
/*GET_CHAR_TO (awk, c);*/
|
|
GET_CHAR (awk);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
while (1);
|
|
|
|
return 1; /* c-style comment */
|
|
}
|
|
|
|
/* unget '*' */
|
|
unget_char (awk, &awk->sio.last);
|
|
/* restore the previous state */
|
|
awk->sio.last = lc;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int get_symbols (qse_awk_t* awk, qse_cint_t c, qse_awk_tok_t* tok)
|
|
{
|
|
struct ops_t
|
|
{
|
|
const qse_char_t* str;
|
|
qse_size_t len;
|
|
int tid;
|
|
int trait;
|
|
};
|
|
|
|
static struct ops_t ops[] =
|
|
{
|
|
{ QSE_T("==="), 3, TOK_TEQ, 0 },
|
|
{ QSE_T("=="), 2, TOK_EQ, 0 },
|
|
{ QSE_T("="), 1, TOK_ASSN, 0 },
|
|
{ QSE_T("!=="), 3, TOK_TNE, 0 },
|
|
{ QSE_T("!="), 2, TOK_NE, 0 },
|
|
{ QSE_T("!~"), 2, TOK_NM, 0 },
|
|
{ QSE_T("!"), 1, TOK_LNOT, 0 },
|
|
{ QSE_T(">>="), 3, TOK_RS_ASSN, 0 },
|
|
{ QSE_T(">>"), 2, TOK_RS, 0 },
|
|
{ QSE_T(">="), 2, TOK_GE, 0 },
|
|
{ QSE_T(">"), 1, TOK_GT, 0 },
|
|
{ QSE_T("<<="), 3, TOK_LS_ASSN, 0 },
|
|
{ QSE_T("<<"), 2, TOK_LS, 0 },
|
|
{ QSE_T("<="), 2, TOK_LE, 0 },
|
|
{ QSE_T("<"), 1, TOK_LT, 0 },
|
|
{ QSE_T("||"), 2, TOK_LOR, 0 },
|
|
{ QSE_T("|="), 2, TOK_BOR_ASSN, 0 },
|
|
{ QSE_T("|"), 1, TOK_BOR, 0 },
|
|
{ QSE_T("&&"), 2, TOK_LAND, 0 },
|
|
{ QSE_T("&="), 2, TOK_BAND_ASSN, 0 },
|
|
{ QSE_T("&"), 1, TOK_BAND, 0 },
|
|
{ QSE_T("^^="), 3, TOK_BXOR_ASSN, 0 },
|
|
{ QSE_T("^^"), 2, TOK_BXOR, 0 },
|
|
{ QSE_T("^="), 2, TOK_EXP_ASSN, 0 },
|
|
{ QSE_T("^"), 1, TOK_EXP, 0 },
|
|
{ QSE_T("++"), 2, TOK_PLUSPLUS, 0 },
|
|
{ QSE_T("+="), 2, TOK_PLUS_ASSN, 0 },
|
|
{ QSE_T("+"), 1, TOK_PLUS, 0 },
|
|
{ QSE_T("--"), 2, TOK_MINUSMINUS, 0 },
|
|
{ QSE_T("-="), 2, TOK_MINUS_ASSN, 0 },
|
|
{ QSE_T("-"), 1, TOK_MINUS, 0 },
|
|
{ QSE_T("**="), 3, TOK_EXP_ASSN, 0 },
|
|
{ QSE_T("**"), 2, TOK_EXP, 0 },
|
|
{ QSE_T("*="), 2, TOK_MUL_ASSN, 0 },
|
|
{ QSE_T("*"), 1, TOK_MUL, 0 },
|
|
{ QSE_T("/="), 2, TOK_DIV_ASSN, 0 },
|
|
{ QSE_T("/"), 1, TOK_DIV, 0 },
|
|
{ QSE_T("\\="), 2, TOK_IDIV_ASSN, 0 },
|
|
{ QSE_T("\\"), 1, TOK_IDIV, 0 },
|
|
{ QSE_T("%%="), 3, TOK_CONCAT_ASSN, 0 },
|
|
{ QSE_T("%%"), 2, TOK_CONCAT, 0 },
|
|
{ QSE_T("%="), 2, TOK_MOD_ASSN, 0 },
|
|
{ QSE_T("%"), 1, TOK_MOD, 0 },
|
|
{ QSE_T("~~"), 2, TOK_BNOT, 0 },
|
|
{ QSE_T("~"), 1, TOK_MA, 0 },
|
|
{ QSE_T("("), 1, TOK_LPAREN, 0 },
|
|
{ QSE_T(")"), 1, TOK_RPAREN, 0 },
|
|
{ QSE_T("{"), 1, TOK_LBRACE, 0 },
|
|
{ QSE_T("}"), 1, TOK_RBRACE, 0 },
|
|
{ QSE_T("["), 1, TOK_LBRACK, 0 },
|
|
{ QSE_T("]"), 1, TOK_RBRACK, 0 },
|
|
{ QSE_T("$"), 1, TOK_DOLLAR, 0 },
|
|
{ QSE_T(","), 1, TOK_COMMA, 0 },
|
|
{ QSE_T(";"), 1, TOK_SEMICOLON, 0 },
|
|
{ QSE_T("::"), 2, TOK_DBLCOLON, 0 },
|
|
{ QSE_T(":"), 1, TOK_COLON, 0 },
|
|
{ QSE_T("?"), 1, TOK_QUEST, 0 },
|
|
{ QSE_T("`"), 1, TOK_BQUOTE, 0 },
|
|
{ QSE_NULL, 0, 0, 0 }
|
|
};
|
|
|
|
struct ops_t* p;
|
|
int idx = 0;
|
|
|
|
/* note that the loop below is not generaic enough.
|
|
* you must keep the operators strings in a particular order */
|
|
|
|
|
|
for (p = ops; p->str != QSE_NULL; )
|
|
{
|
|
if (p->trait == 0 || (awk->opt.trait & p->trait))
|
|
{
|
|
if (p->str[idx] == QSE_T('\0'))
|
|
{
|
|
ADD_TOKEN_STR (awk, tok, p->str, p->len);
|
|
SET_TOKEN_TYPE (awk, tok, p->tid);
|
|
return 1;
|
|
}
|
|
|
|
if (c == p->str[idx])
|
|
{
|
|
idx++;
|
|
GET_CHAR_TO (awk, c);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
p++;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int get_token_into (qse_awk_t* awk, qse_awk_tok_t* tok)
|
|
{
|
|
qse_cint_t c;
|
|
int n;
|
|
int skip_semicolon_after_include = 0;
|
|
|
|
retry:
|
|
do
|
|
{
|
|
if (skip_spaces(awk) <= -1) return -1;
|
|
if ((n = skip_comment(awk)) <= -1) return -1;
|
|
}
|
|
while (n >= 1);
|
|
|
|
qse_str_clear (tok->name);
|
|
tok->loc.file = awk->sio.last.file;
|
|
tok->loc.line = awk->sio.last.line;
|
|
tok->loc.colm = awk->sio.last.colm;
|
|
|
|
c = awk->sio.last.c;
|
|
|
|
if (c == QSE_CHAR_EOF)
|
|
{
|
|
n = end_include (awk);
|
|
if (n <= -1) return -1;
|
|
if (n >= 1)
|
|
{
|
|
/*awk->sio.last = awk->sio.inp->last;*/
|
|
/* mark that i'm retrying after end of an included file */
|
|
skip_semicolon_after_include = 1;
|
|
goto retry;
|
|
}
|
|
|
|
ADD_TOKEN_STR (awk, tok, QSE_T("<EOF>"), 5);
|
|
SET_TOKEN_TYPE (awk, tok, TOK_EOF);
|
|
}
|
|
else if (c == QSE_T('\n'))
|
|
{
|
|
/*ADD_TOKEN_CHAR (awk, tok, QSE_T('\n'));*/
|
|
ADD_TOKEN_STR (awk, tok, QSE_T("<NL>"), 4);
|
|
SET_TOKEN_TYPE (awk, tok, TOK_NEWLINE);
|
|
GET_CHAR (awk);
|
|
}
|
|
else if (QSE_AWK_ISDIGIT (awk, c)/*|| c == QSE_T('.')*/)
|
|
{
|
|
if (get_number (awk, tok) <= -1) return -1;
|
|
}
|
|
else if (c == QSE_T('.'))
|
|
{
|
|
qse_awk_sio_lxc_t lc;
|
|
|
|
lc = awk->sio.last;
|
|
GET_CHAR_TO (awk, c);
|
|
|
|
unget_char (awk, &awk->sio.last);
|
|
awk->sio.last = lc;
|
|
|
|
if (QSE_AWK_ISDIGIT (awk, c))
|
|
{
|
|
/* for a token such as .123 */
|
|
if (get_number (awk, tok) <= -1) return -1;
|
|
}
|
|
else
|
|
{
|
|
c = QSE_T('.');
|
|
goto try_get_symbols;
|
|
}
|
|
}
|
|
else if (c == QSE_T('@'))
|
|
{
|
|
int type;
|
|
|
|
ADD_TOKEN_CHAR (awk, tok, c);
|
|
GET_CHAR_TO (awk, c);
|
|
|
|
if (c != QSE_T('_') && !QSE_AWK_ISALPHA (awk, c))
|
|
{
|
|
/* this extended keyword is empty,
|
|
* not followed by a valid word */
|
|
SETERR_LOC (awk, QSE_AWK_EXKWEM, &(awk)->tok.loc);
|
|
return -1;
|
|
}
|
|
|
|
/* expect normal identifier starting with an alphabet */
|
|
do
|
|
{
|
|
ADD_TOKEN_CHAR (awk, tok, c);
|
|
GET_CHAR_TO (awk, c);
|
|
}
|
|
while (c == QSE_T('_') ||
|
|
QSE_AWK_ISALPHA (awk, c) ||
|
|
QSE_AWK_ISDIGIT (awk, c));
|
|
|
|
type = classify_ident (awk, QSE_STR_XSTR(tok->name));
|
|
if (type == TOK_IDENT)
|
|
{
|
|
SETERR_TOK (awk, QSE_AWK_EXKWNR);
|
|
return -1;
|
|
}
|
|
SET_TOKEN_TYPE (awk, tok, type);
|
|
}
|
|
else if (c == QSE_T('_') || QSE_AWK_ISALPHA (awk, c))
|
|
{
|
|
int type;
|
|
|
|
/* identifier */
|
|
do
|
|
{
|
|
ADD_TOKEN_CHAR (awk, tok, c);
|
|
GET_CHAR_TO (awk, c);
|
|
}
|
|
while (c == QSE_T('_') ||
|
|
QSE_AWK_ISALPHA (awk, c) ||
|
|
QSE_AWK_ISDIGIT (awk, c));
|
|
|
|
type = classify_ident (awk, QSE_STR_XSTR(tok->name));
|
|
SET_TOKEN_TYPE (awk, tok, type);
|
|
}
|
|
else if (c == QSE_T('\"'))
|
|
{
|
|
/* double-quoted string */
|
|
SET_TOKEN_TYPE (awk, tok, TOK_STR);
|
|
if (get_string (awk, c, QSE_T('\\'), 0, 0, tok) <= -1) return -1;
|
|
}
|
|
else if (c == QSE_T('\''))
|
|
{
|
|
/* single-quoted string - no escaping */
|
|
|
|
SET_TOKEN_TYPE (awk, tok, TOK_STR);
|
|
|
|
while (1)
|
|
{
|
|
GET_CHAR_TO (awk, c);
|
|
|
|
if (c == QSE_CHAR_EOF)
|
|
{
|
|
SETERR_LOC (awk, QSE_AWK_ESTRNC, &awk->tok.loc);
|
|
return -1;
|
|
}
|
|
|
|
if (c == QSE_T('\''))
|
|
{
|
|
/* terminating quote */
|
|
GET_CHAR (awk);
|
|
break;
|
|
}
|
|
|
|
ADD_TOKEN_CHAR (awk, tok, c);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
try_get_symbols:
|
|
n = get_symbols (awk, c, tok);
|
|
if (n <= -1) return -1;
|
|
if (n == 0)
|
|
{
|
|
/* not handled yet */
|
|
if (c == QSE_T('\0'))
|
|
{
|
|
SETERR_ARG_LOC (
|
|
awk, QSE_AWK_ELXCHR,
|
|
QSE_T("<NUL>"), 5, &tok->loc);
|
|
}
|
|
else
|
|
{
|
|
qse_char_t cc = (qse_char_t)c;
|
|
SETERR_ARG_LOC (awk, QSE_AWK_ELXCHR, &cc, 1, &tok->loc);
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
if (skip_semicolon_after_include && (tok->type == TOK_SEMICOLON || tok->type == TOK_NEWLINE))
|
|
{
|
|
/* this handles the optional semicolon after the
|
|
* included file named as in @include "file-name"; */
|
|
skip_semicolon_after_include = 0;
|
|
goto retry;
|
|
}
|
|
}
|
|
|
|
if (skip_semicolon_after_include && !(awk->opt.trait & QSE_AWK_NEWLINE))
|
|
{
|
|
/* semiclon has not been skipped yet and the
|
|
* newline option is not set. */
|
|
qse_awk_seterror (awk, QSE_AWK_ESCOLON, QSE_STR_XSTR(tok->name), &tok->loc);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int get_token (qse_awk_t* awk)
|
|
{
|
|
awk->ptok.type = awk->tok.type;
|
|
awk->ptok.loc.file = awk->tok.loc.file;
|
|
awk->ptok.loc.line = awk->tok.loc.line;
|
|
awk->ptok.loc.colm = awk->tok.loc.colm;
|
|
qse_str_swap (awk->ptok.name, awk->tok.name);
|
|
|
|
if (QSE_STR_LEN(awk->ntok.name) > 0)
|
|
{
|
|
awk->tok.type = awk->ntok.type;
|
|
awk->tok.loc.file = awk->ntok.loc.file;
|
|
awk->tok.loc.line = awk->ntok.loc.line;
|
|
awk->tok.loc.colm = awk->ntok.loc.colm;
|
|
|
|
qse_str_swap (awk->tok.name, awk->ntok.name);
|
|
qse_str_clear (awk->ntok.name);
|
|
|
|
return 0;
|
|
}
|
|
|
|
return get_token_into (awk, &awk->tok);
|
|
}
|
|
|
|
static int preget_token (qse_awk_t* awk)
|
|
{
|
|
/* LIMITATION: no more than one token can be pre-read in a row
|
|
without consumption. */
|
|
|
|
if (QSE_STR_LEN(awk->ntok.name) > 0)
|
|
{
|
|
/* you can't read more than 1 token in advance.
|
|
*
|
|
* if there is a token already read in, it is just
|
|
* retained.
|
|
*
|
|
* parsing an expression like '$0 | a' causes this
|
|
* funtion to be called before get_token() consumes the
|
|
* pre-read token.
|
|
*
|
|
* Because the expression like this
|
|
* print $1 | getline x;
|
|
* must be parsed as
|
|
* print $(1 | getline x);
|
|
* preget_token() is called from parse_primary().
|
|
*
|
|
* For the expression '$0 | $2',
|
|
* 1) parse_primary() calls parse_primary_positional() if $ is encountered.
|
|
* 2) parse_primary_positional() calls parse_primary() recursively for the positional part after $.
|
|
* 3) parse_primary() in #2 calls preget_token()
|
|
* 4) parse_primary() in #1 also calls preget_token().
|
|
*
|
|
* this block is reached because no token is consumed between #3 and #4.
|
|
*
|
|
* in short, it happens if getline doesn't doesn't follow | after the positional.
|
|
* $1 | $2
|
|
* $1 | abc + 20
|
|
*/
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
/* if there is no token pre-read, we get a new
|
|
* token and place it to awk->ntok. */
|
|
return get_token_into (awk, &awk->ntok);
|
|
}
|
|
}
|
|
|
|
static int classify_ident (qse_awk_t* awk, const qse_cstr_t* name)
|
|
{
|
|
/* perform binary search */
|
|
|
|
/* declaring left, right, mid to be the int type is ok
|
|
* because we know kwtab is small enough. */
|
|
int left = 0, right = QSE_COUNTOF(kwtab) - 1, mid;
|
|
|
|
while (left <= right)
|
|
{
|
|
int n;
|
|
kwent_t* kwp;
|
|
|
|
mid = (left + right) / 2;
|
|
kwp = &kwtab[mid];
|
|
|
|
n = qse_strxncmp (kwp->name.ptr, kwp->name.len, name->ptr, name->len);
|
|
if (n > 0)
|
|
{
|
|
/* if left, right, mid were of qse_size_t,
|
|
* you would need the following line.
|
|
if (mid == 0) break;
|
|
*/
|
|
right = mid - 1;
|
|
}
|
|
else if (n < 0) left = mid + 1;
|
|
else
|
|
{
|
|
if ((awk->opt.trait & kwp->trait) != kwp->trait) break;
|
|
return kwp->type;
|
|
}
|
|
}
|
|
|
|
return TOK_IDENT;
|
|
}
|
|
|
|
struct deparse_func_t
|
|
{
|
|
qse_awk_t* awk;
|
|
qse_char_t* tmp;
|
|
qse_size_t tmp_len;
|
|
int ret;
|
|
};
|
|
|
|
static int deparse (qse_awk_t* awk)
|
|
{
|
|
qse_awk_nde_t* nde;
|
|
qse_awk_chain_t* chain;
|
|
qse_char_t tmp[QSE_SIZEOF(qse_size_t)*8 + 32];
|
|
struct deparse_func_t df;
|
|
int n = 0;
|
|
qse_ssize_t op;
|
|
qse_cstr_t kw;
|
|
|
|
QSE_ASSERT (awk->sio.outf != QSE_NULL);
|
|
|
|
QSE_MEMSET (&awk->sio.arg, 0, QSE_SIZEOF(awk->sio.arg));
|
|
|
|
CLRERR (awk);
|
|
op = awk->sio.outf (
|
|
awk, QSE_AWK_SIO_OPEN, &awk->sio.arg, QSE_NULL, 0);
|
|
if (op <= -1)
|
|
{
|
|
if (ISNOERR(awk))
|
|
SETERR_ARG (awk, QSE_AWK_EOPEN, QSE_T("<SOUT>"), 6);
|
|
return -1;
|
|
}
|
|
|
|
#define EXIT_DEPARSE() do { n = -1; goto exit_deparse; } while(0)
|
|
|
|
if (awk->tree.ngbls > awk->tree.ngbls_base)
|
|
{
|
|
qse_size_t i, len;
|
|
|
|
QSE_ASSERT (awk->tree.ngbls > 0);
|
|
|
|
qse_awk_getkwname (awk, QSE_AWK_KWID_XGLOBAL, &kw);
|
|
if (qse_awk_putsrcstrn(awk,kw.ptr,kw.len) <= -1 ||
|
|
qse_awk_putsrcstr (awk, QSE_T(" ")) <= -1)
|
|
{
|
|
EXIT_DEPARSE ();
|
|
}
|
|
|
|
for (i = awk->tree.ngbls_base; i < awk->tree.ngbls - 1; i++)
|
|
{
|
|
if (!(awk->opt.trait & QSE_AWK_IMPLICIT))
|
|
{
|
|
/* use the actual name if no named variable
|
|
* is allowed */
|
|
if (qse_awk_putsrcstrn (awk,
|
|
QSE_LDA_DPTR(awk->parse.gbls,i),
|
|
QSE_LDA_DLEN(awk->parse.gbls,i)) <= -1)
|
|
{
|
|
EXIT_DEPARSE ();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
len = qse_awk_inttostr (
|
|
awk, (qse_awk_int_t)i,
|
|
10, QSE_T("__g"), tmp, QSE_COUNTOF(tmp));
|
|
QSE_ASSERT (len != (qse_size_t)-1);
|
|
if (qse_awk_putsrcstrn (awk, tmp, len) <= -1)
|
|
{
|
|
EXIT_DEPARSE ();
|
|
}
|
|
}
|
|
|
|
if (qse_awk_putsrcstr (awk, QSE_T(", ")) <= -1)
|
|
EXIT_DEPARSE ();
|
|
}
|
|
|
|
if (!(awk->opt.trait & QSE_AWK_IMPLICIT))
|
|
{
|
|
if (qse_awk_putsrcstrn (awk,
|
|
QSE_LDA_DPTR(awk->parse.gbls,i),
|
|
QSE_LDA_DLEN(awk->parse.gbls,i)) <= -1)
|
|
{
|
|
EXIT_DEPARSE ();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
len = qse_awk_inttostr (
|
|
awk, (qse_awk_int_t)i,
|
|
10, QSE_T("__g"), tmp, QSE_COUNTOF(tmp));
|
|
QSE_ASSERT (len != (qse_size_t)-1);
|
|
if (qse_awk_putsrcstrn (awk, tmp, len) <= -1)
|
|
{
|
|
EXIT_DEPARSE ();
|
|
}
|
|
}
|
|
|
|
if (awk->opt.trait & QSE_AWK_CRLF)
|
|
{
|
|
if (qse_awk_putsrcstr (awk, QSE_T(";\r\n\r\n")) <= -1)
|
|
{
|
|
EXIT_DEPARSE ();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (qse_awk_putsrcstr (awk, QSE_T(";\n\n")) <= -1)
|
|
{
|
|
EXIT_DEPARSE ();
|
|
}
|
|
}
|
|
}
|
|
|
|
df.awk = awk;
|
|
df.tmp = tmp;
|
|
df.tmp_len = QSE_COUNTOF(tmp);
|
|
df.ret = 0;
|
|
|
|
qse_htb_walk (awk->tree.funs, deparse_func, &df);
|
|
if (df.ret <= -1)
|
|
{
|
|
EXIT_DEPARSE ();
|
|
}
|
|
|
|
for (nde = awk->tree.begin; nde != QSE_NULL; nde = nde->next)
|
|
{
|
|
qse_cstr_t kw;
|
|
|
|
qse_awk_getkwname (awk, QSE_AWK_KWID_BEGIN, &kw);
|
|
|
|
if (qse_awk_putsrcstrn (awk, kw.ptr, kw.len) <= -1) EXIT_DEPARSE ();
|
|
if (qse_awk_putsrcstr (awk, QSE_T(" ")) <= -1) EXIT_DEPARSE ();
|
|
if (qse_awk_prnnde (awk, nde) <= -1) EXIT_DEPARSE ();
|
|
|
|
if (awk->opt.trait & QSE_AWK_CRLF)
|
|
{
|
|
if (put_char (awk, QSE_T('\r')) <= -1) EXIT_DEPARSE ();
|
|
}
|
|
|
|
if (put_char (awk, QSE_T('\n')) <= -1) EXIT_DEPARSE ();
|
|
}
|
|
|
|
chain = awk->tree.chain;
|
|
while (chain != QSE_NULL)
|
|
{
|
|
if (chain->pattern != QSE_NULL)
|
|
{
|
|
if (qse_awk_prnptnpt (awk, chain->pattern) <= -1)
|
|
EXIT_DEPARSE ();
|
|
}
|
|
|
|
if (chain->action == QSE_NULL)
|
|
{
|
|
/* blockless pattern */
|
|
if (awk->opt.trait & QSE_AWK_CRLF)
|
|
{
|
|
if (put_char (awk, QSE_T('\r')) <= -1)
|
|
EXIT_DEPARSE ();
|
|
}
|
|
|
|
if (put_char (awk, QSE_T('\n')) <= -1)
|
|
EXIT_DEPARSE ();
|
|
}
|
|
else
|
|
{
|
|
if (chain->pattern != QSE_NULL)
|
|
{
|
|
if (put_char (awk, QSE_T(' ')) <= -1)
|
|
EXIT_DEPARSE ();
|
|
}
|
|
if (qse_awk_prnpt (awk, chain->action) <= -1)
|
|
EXIT_DEPARSE ();
|
|
}
|
|
|
|
if (awk->opt.trait & QSE_AWK_CRLF)
|
|
{
|
|
if (put_char (awk, QSE_T('\r')) <= -1)
|
|
EXIT_DEPARSE ();
|
|
}
|
|
|
|
if (put_char (awk, QSE_T('\n')) <= -1)
|
|
EXIT_DEPARSE ();
|
|
|
|
chain = chain->next;
|
|
}
|
|
|
|
for (nde = awk->tree.end; nde != QSE_NULL; nde = nde->next)
|
|
{
|
|
qse_cstr_t kw;
|
|
|
|
qse_awk_getkwname (awk, QSE_AWK_KWID_END, &kw);
|
|
|
|
if (qse_awk_putsrcstrn (awk, kw.ptr, kw.len) <= -1) EXIT_DEPARSE ();
|
|
if (qse_awk_putsrcstr (awk, QSE_T(" ")) <= -1) EXIT_DEPARSE ();
|
|
if (qse_awk_prnnde (awk, nde) <= -1) EXIT_DEPARSE ();
|
|
|
|
/*
|
|
if (awk->opt.trait & QSE_AWK_CRLF)
|
|
{
|
|
if (put_char (awk, QSE_T('\r')) <= -1) EXIT_DEPARSE ();
|
|
}
|
|
|
|
if (put_char (awk, QSE_T('\n')) <= -1) EXIT_DEPARSE ();
|
|
*/
|
|
}
|
|
|
|
if (flush_out (awk) <= -1) EXIT_DEPARSE ();
|
|
|
|
exit_deparse:
|
|
if (n == 0) CLRERR (awk);
|
|
if (awk->sio.outf (
|
|
awk, QSE_AWK_SIO_CLOSE, &awk->sio.arg, QSE_NULL, 0) != 0)
|
|
{
|
|
if (n == 0)
|
|
{
|
|
if (ISNOERR(awk))
|
|
SETERR_ARG (awk, QSE_AWK_ECLOSE, QSE_T("<SOUT>"), 6);
|
|
n = -1;
|
|
}
|
|
}
|
|
|
|
return n;
|
|
}
|
|
|
|
static qse_htb_walk_t deparse_func (
|
|
qse_htb_t* map, qse_htb_pair_t* pair, void* arg)
|
|
{
|
|
struct deparse_func_t* df = (struct deparse_func_t*)arg;
|
|
qse_awk_fun_t* fun = (qse_awk_fun_t*)QSE_HTB_VPTR(pair);
|
|
qse_size_t i, n;
|
|
qse_cstr_t kw;
|
|
|
|
QSE_ASSERT (qse_strxncmp (QSE_HTB_KPTR(pair), QSE_HTB_KLEN(pair), fun->name.ptr, fun->name.len) == 0);
|
|
|
|
#define PUT_C(x,c) \
|
|
if (put_char(x->awk,c)==-1) { \
|
|
x->ret = -1; return QSE_HTB_WALK_STOP; \
|
|
}
|
|
|
|
#define PUT_S(x,str) \
|
|
if (qse_awk_putsrcstr(x->awk,str) <= -1) { \
|
|
x->ret = -1; return QSE_HTB_WALK_STOP; \
|
|
}
|
|
|
|
#define PUT_SX(x,str,len) \
|
|
if (qse_awk_putsrcstrn (x->awk, str, len) <= -1) { \
|
|
x->ret = -1; return QSE_HTB_WALK_STOP; \
|
|
}
|
|
|
|
qse_awk_getkwname (df->awk, QSE_AWK_KWID_FUNCTION, &kw);
|
|
PUT_SX (df, kw.ptr, kw.len);
|
|
|
|
PUT_C (df, QSE_T(' '));
|
|
PUT_SX (df, fun->name.ptr, fun->name.len);
|
|
PUT_S (df, QSE_T(" ("));
|
|
|
|
for (i = 0; i < fun->nargs; )
|
|
{
|
|
n = qse_awk_inttostr (
|
|
df->awk, i++, 10,
|
|
QSE_T("__p"), df->tmp, df->tmp_len);
|
|
QSE_ASSERT (n != (qse_size_t)-1);
|
|
PUT_SX (df, df->tmp, n);
|
|
|
|
if (i >= fun->nargs) break;
|
|
PUT_S (df, QSE_T(", "));
|
|
}
|
|
|
|
PUT_S (df, QSE_T(")"));
|
|
if (df->awk->opt.trait & QSE_AWK_CRLF) PUT_C (df, QSE_T('\r'));
|
|
|
|
PUT_C (df, QSE_T('\n'));
|
|
|
|
if (qse_awk_prnpt (df->awk, fun->body) <= -1) return -1;
|
|
if (df->awk->opt.trait & QSE_AWK_CRLF)
|
|
{
|
|
PUT_C (df, QSE_T('\r'));
|
|
}
|
|
PUT_C (df, QSE_T('\n'));
|
|
|
|
return QSE_HTB_WALK_FORWARD;
|
|
|
|
#undef PUT_C
|
|
#undef PUT_S
|
|
#undef PUT_SX
|
|
}
|
|
|
|
static int put_char (qse_awk_t* awk, qse_char_t c)
|
|
{
|
|
awk->sio.arg.b.buf[awk->sio.arg.b.len++] = c;
|
|
if (awk->sio.arg.b.len >= QSE_COUNTOF(awk->sio.arg.b.buf))
|
|
{
|
|
if (flush_out (awk) <= -1) return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int flush_out (qse_awk_t* awk)
|
|
{
|
|
qse_ssize_t n;
|
|
|
|
while (awk->sio.arg.b.pos < awk->sio.arg.b.len)
|
|
{
|
|
CLRERR (awk);
|
|
n = awk->sio.outf (
|
|
awk, QSE_AWK_SIO_WRITE, &awk->sio.arg,
|
|
&awk->sio.arg.b.buf[awk->sio.arg.b.pos],
|
|
awk->sio.arg.b.len - awk->sio.arg.b.pos
|
|
);
|
|
if (n <= 0)
|
|
{
|
|
if (ISNOERR(awk))
|
|
SETERR_ARG (awk, QSE_AWK_EWRITE, QSE_T("<SOUT>"), 6);
|
|
return -1;
|
|
}
|
|
|
|
awk->sio.arg.b.pos += n;
|
|
}
|
|
|
|
awk->sio.arg.b.pos = 0;
|
|
awk->sio.arg.b.len = 0;
|
|
return 0;
|
|
}
|
|
|
|
int qse_awk_putsrcstr (qse_awk_t* awk, const qse_char_t* str)
|
|
{
|
|
while (*str != QSE_T('\0'))
|
|
{
|
|
if (put_char (awk, *str) <= -1) return -1;
|
|
str++;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int qse_awk_putsrcstrn (
|
|
qse_awk_t* awk, const qse_char_t* str, qse_size_t len)
|
|
{
|
|
const qse_char_t* end = str + len;
|
|
|
|
while (str < end)
|
|
{
|
|
if (put_char (awk, *str) <= -1) return -1;
|
|
str++;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#if defined(QSE_ENABLE_STATIC_MODULE)
|
|
|
|
/* let's hardcode module information */
|
|
#include "mod-dir.h"
|
|
#include "mod-math.h"
|
|
#include "mod-str.h"
|
|
#include "mod-sys.h"
|
|
#include "mod-sed.h"
|
|
#if defined(HAVE_MPI)
|
|
# include "mod-mpi.h"
|
|
#endif
|
|
#if defined(HAVE_UCI)
|
|
# include "mod-uci.h"
|
|
#endif
|
|
|
|
/*
|
|
* if modules are linked statically into the main awk module,
|
|
* this table is used to find the entry point of the modules.
|
|
* you must update this table if you add more modules
|
|
*/
|
|
|
|
static struct
|
|
{
|
|
qse_char_t* modname;
|
|
int (*modload) (qse_awk_mod_t* mod, qse_awk_t* awk);
|
|
} static_modtab[] =
|
|
{
|
|
{ QSE_T("dir"), qse_awk_mod_dir },
|
|
{ QSE_T("math"), qse_awk_mod_math },
|
|
#if defined(HAVE_MPI)
|
|
{ QSE_T("mpi"), qse_awk_mod_mpi },
|
|
#endif
|
|
{ QSE_T("sed"), qse_awk_mod_sed },
|
|
{ QSE_T("str"), qse_awk_mod_str },
|
|
{ QSE_T("sys"), qse_awk_mod_sys },
|
|
#if defined(HAVE_UCI)
|
|
{ QSE_T("uci"), qse_awk_mod_uci }
|
|
#endif
|
|
};
|
|
#endif
|
|
|
|
static qse_awk_mod_t* query_module (
|
|
qse_awk_t* awk, const qse_cstr_t segs[], int nsegs,
|
|
qse_awk_mod_sym_t* sym)
|
|
{
|
|
|
|
qse_rbt_pair_t* pair;
|
|
qse_awk_mod_data_t* mdp;
|
|
qse_cstr_t ea;
|
|
int n;
|
|
|
|
QSE_ASSERT (nsegs == 2);
|
|
|
|
|
|
pair = qse_rbt_search (awk->modtab, segs[0].ptr, segs[0].len);
|
|
if (pair)
|
|
{
|
|
mdp = (qse_awk_mod_data_t*)QSE_RBT_VPTR(pair);
|
|
}
|
|
else
|
|
{
|
|
qse_awk_mod_data_t md;
|
|
qse_awk_mod_load_t load;
|
|
qse_awk_mod_spec_t spec;
|
|
qse_size_t buflen;
|
|
/*qse_char_t buf[64 + 15] = QSE_T("_qse_awk_mod_");*/
|
|
qse_char_t buf[64 + 15] =
|
|
{
|
|
QSE_T('_'),
|
|
QSE_T('q'),
|
|
QSE_T('s'),
|
|
QSE_T('e'),
|
|
QSE_T('_'),
|
|
QSE_T('a'),
|
|
QSE_T('w'),
|
|
QSE_T('k'),
|
|
QSE_T('_'),
|
|
QSE_T('m'),
|
|
QSE_T('o'),
|
|
QSE_T('d'),
|
|
QSE_T('_')
|
|
/* the terminating null isn't needed */
|
|
};
|
|
|
|
if (segs[0].len > QSE_COUNTOF(buf) - 15)
|
|
{
|
|
/* module name too long */
|
|
ea.ptr = segs[0].ptr;
|
|
ea.len = segs[0].len;
|
|
qse_awk_seterror (awk, QSE_AWK_ESEGTL, &ea, QSE_NULL);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
#if defined(QSE_ENABLE_STATIC_MODULE)
|
|
/* TODO: binary search ... */
|
|
for (n = 0; n < QSE_COUNTOF(static_modtab); n++)
|
|
{
|
|
if (qse_strcmp (static_modtab[n].modname, segs[0].ptr) == 0)
|
|
{
|
|
load = static_modtab[n].modload;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (n >= QSE_COUNTOF(static_modtab))
|
|
{
|
|
ea.ptr = segs[0].ptr;
|
|
ea.len = segs[0].len;
|
|
qse_awk_seterror (awk, QSE_AWK_ENOENT, &ea, QSE_NULL);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
QSE_MEMSET (&md, 0, QSE_SIZEOF(md));
|
|
|
|
/* i copy-insert 'md' into the table before calling 'load'.
|
|
* to pass the same address to load(), query(), etc */
|
|
pair = qse_rbt_insert (awk->modtab, segs[0].ptr, segs[0].len, &md, QSE_SIZEOF(md));
|
|
if (pair == QSE_NULL)
|
|
{
|
|
qse_awk_seterrnum (awk, QSE_AWK_ENOMEM, QSE_NULL);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
mdp = (qse_awk_mod_data_t*)QSE_RBT_VPTR(pair);
|
|
if (load (&mdp->mod, awk) <= -1)
|
|
{
|
|
qse_rbt_delete (awk->modtab, segs[0].ptr, segs[0].len);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
#else
|
|
|
|
QSE_MEMSET (&spec, 0, QSE_SIZEOF(spec));
|
|
|
|
if (awk->opt.mod[0].len > 0)
|
|
spec.prefix = awk->opt.mod[0].ptr;
|
|
else spec.prefix = QSE_T(QSE_AWK_DEFAULT_MODPREFIX);
|
|
|
|
if (awk->opt.mod[1].len > 0)
|
|
spec.postfix = awk->opt.mod[1].ptr;
|
|
else spec.postfix = QSE_T(QSE_AWK_DEFAULT_MODPOSTFIX);
|
|
|
|
QSE_MEMSET (&md, 0, QSE_SIZEOF(md));
|
|
if (awk->prm.modopen && awk->prm.modsym && awk->prm.modclose)
|
|
{
|
|
spec.name = segs[0].ptr;
|
|
md.handle = awk->prm.modopen (awk, &spec);
|
|
}
|
|
else md.handle = QSE_NULL;
|
|
|
|
if (md.handle == QSE_NULL)
|
|
{
|
|
ea.ptr = segs[0].ptr;
|
|
ea.len = segs[0].len;
|
|
qse_awk_seterror (awk, QSE_AWK_ENOENT, &ea, QSE_NULL);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
buflen = qse_strcpy (&buf[13], segs[0].ptr);
|
|
/* attempt qse_awk_mod_xxx */
|
|
load = awk->prm.modsym (awk, md.handle, &buf[1]);
|
|
if (!load)
|
|
{
|
|
/* attempt _qse_awk_mod_xxx */
|
|
load = awk->prm.modsym (awk, md.handle, &buf[0]);
|
|
if (!load)
|
|
{
|
|
/* attempt qse_awk_mod_xxx_ */
|
|
buf[13 + buflen] = QSE_T('_');
|
|
buf[13 + buflen + 1] = QSE_T('\0');
|
|
load = awk->prm.modsym (awk, md.handle, &buf[1]);
|
|
if (!load)
|
|
{
|
|
ea.ptr = &buf[1];
|
|
ea.len = 12 + buflen;
|
|
qse_awk_seterror (awk, QSE_AWK_ENOENT, &ea, QSE_NULL);
|
|
|
|
awk->prm.modclose (awk, md.handle);
|
|
return QSE_NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* i copy-insert 'md' into the table before calling 'load'.
|
|
* to pass the same address to load(), query(), etc */
|
|
pair = qse_rbt_insert (awk->modtab, segs[0].ptr, segs[0].len, &md, QSE_SIZEOF(md));
|
|
if (pair == QSE_NULL)
|
|
{
|
|
qse_awk_seterrnum (awk, QSE_AWK_ENOMEM, QSE_NULL);
|
|
awk->prm.modclose (awk, md.handle);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
mdp = (qse_awk_mod_data_t*)QSE_RBT_VPTR(pair);
|
|
if (load (&mdp->mod, awk) <= -1)
|
|
{
|
|
qse_rbt_delete (awk->modtab, segs[0].ptr, segs[0].len);
|
|
awk->prm.modclose (awk, mdp->handle);
|
|
return QSE_NULL;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
n = mdp->mod.query (&mdp->mod, awk, segs[1].ptr, sym);
|
|
return (n <= -1)? QSE_NULL: &mdp->mod;
|
|
}
|
|
|