/* * $Id: parse.c,v 1.223 2006-12-16 14:43:50 bacon Exp $ */ #include enum { TOKEN_EOF, TOKEN_NEWLINE, /* TOKEN_XXX_ASSIGNs should in sync * with __assop in __assign_to_opcode */ TOKEN_ASSIGN, TOKEN_PLUS_ASSIGN, TOKEN_MINUS_ASSIGN, TOKEN_MUL_ASSIGN, TOKEN_DIV_ASSIGN, TOKEN_IDIV_ASSIGN, TOKEN_MOD_ASSIGN, TOKEN_EXP_ASSIGN, TOKEN_EQ, TOKEN_NE, TOKEN_LE, TOKEN_LT, TOKEN_GE, TOKEN_GT, TOKEN_NM, /* not match */ TOKEN_NOT, TOKEN_PLUS, TOKEN_PLUSPLUS, TOKEN_MINUS, TOKEN_MINUSMINUS, TOKEN_MUL, TOKEN_DIV, TOKEN_IDIV, TOKEN_MOD, TOKEN_LOR, TOKEN_LAND, TOKEN_BOR, TOKEN_BXOR, TOKEN_BAND, TOKEN_BORAND, TOKEN_TILDE, /* used for unary bitwise-not and regex match */ TOKEN_RSHIFT, TOKEN_LSHIFT, TOKEN_IN, TOKEN_EXP, TOKEN_LPAREN, TOKEN_RPAREN, TOKEN_LBRACE, TOKEN_RBRACE, TOKEN_LBRACK, TOKEN_RBRACK, TOKEN_DOLLAR, TOKEN_COMMA, TOKEN_PERIOD, TOKEN_SEMICOLON, TOKEN_COLON, TOKEN_QUEST, TOKEN_BEGIN, TOKEN_END, TOKEN_FUNCTION, TOKEN_LOCAL, TOKEN_GLOBAL, TOKEN_IF, TOKEN_ELSE, TOKEN_WHILE, TOKEN_FOR, TOKEN_DO, TOKEN_BREAK, TOKEN_CONTINUE, TOKEN_RETURN, TOKEN_EXIT, TOKEN_NEXT, TOKEN_NEXTFILE, TOKEN_NEXTINFILE, TOKEN_NEXTOFILE, TOKEN_DELETE, TOKEN_PRINT, TOKEN_PRINTF, TOKEN_GETLINE, TOKEN_IDENT, TOKEN_INT, TOKEN_REAL, TOKEN_STR, TOKEN_REX, __TOKEN_COUNT__ }; enum { PARSE_GLOBAL, PARSE_FUNCTION, PARSE_BEGIN, PARSE_END, PARSE_BEGIN_BLOCK, PARSE_END_BLOCK, PARSE_PATTERN, PARSE_ACTION_BLOCK }; enum { PARSE_LOOP_NONE, PARSE_LOOP_WHILE, PARSE_LOOP_FOR, PARSE_LOOP_DOWHILE }; typedef struct __binmap_t __binmap_t; struct __binmap_t { int token; int binop; }; static int __parse (ase_awk_t* awk); static ase_awk_t* __parse_progunit (ase_awk_t* awk); static ase_awk_t* __collect_globals (ase_awk_t* awk); static ase_awk_t* __add_builtin_globals (ase_awk_t* awk); static ase_awk_t* __add_global ( ase_awk_t* awk, const ase_char_t* name, ase_size_t len, int force); static ase_awk_t* __collect_locals (ase_awk_t* awk, ase_size_t nlocals); static ase_awk_nde_t* __parse_function (ase_awk_t* awk); static ase_awk_nde_t* __parse_begin (ase_awk_t* awk); static ase_awk_nde_t* __parse_end (ase_awk_t* awk); static ase_awk_chain_t* __parse_pattern_block ( ase_awk_t* awk, ase_awk_nde_t* ptn, ase_bool_t blockless); static ase_awk_nde_t* __parse_block (ase_awk_t* awk, ase_bool_t is_top); static ase_awk_nde_t* __parse_block_dc (ase_awk_t* awk, ase_bool_t is_top); static ase_awk_nde_t* __parse_statement (ase_awk_t* awk); static ase_awk_nde_t* __parse_statement_nb (ase_awk_t* awk); static ase_awk_nde_t* __parse_expression (ase_awk_t* awk); static ase_awk_nde_t* __parse_expression0 (ase_awk_t* awk); static ase_awk_nde_t* __parse_basic_expr (ase_awk_t* awk); static ase_awk_nde_t* __parse_binary_expr ( ase_awk_t* awk, const __binmap_t* binmap, ase_awk_nde_t*(*next_level_func)(ase_awk_t*)); static ase_awk_nde_t* __parse_logical_or (ase_awk_t* awk); static ase_awk_nde_t* __parse_logical_and (ase_awk_t* awk); static ase_awk_nde_t* __parse_in (ase_awk_t* awk); static ase_awk_nde_t* __parse_regex_match (ase_awk_t* awk); static ase_awk_nde_t* __parse_bitwise_or (ase_awk_t* awk); static ase_awk_nde_t* __parse_bitwise_or_with_extio (ase_awk_t* awk); static ase_awk_nde_t* __parse_bitwise_xor (ase_awk_t* awk); static ase_awk_nde_t* __parse_bitwise_and (ase_awk_t* awk); static ase_awk_nde_t* __parse_equality (ase_awk_t* awk); static ase_awk_nde_t* __parse_relational (ase_awk_t* awk); static ase_awk_nde_t* __parse_shift (ase_awk_t* awk); static ase_awk_nde_t* __parse_concat (ase_awk_t* awk); static ase_awk_nde_t* __parse_additive (ase_awk_t* awk); static ase_awk_nde_t* __parse_multiplicative (ase_awk_t* awk); static ase_awk_nde_t* __parse_unary (ase_awk_t* awk); static ase_awk_nde_t* __parse_exponent (ase_awk_t* awk); static ase_awk_nde_t* __parse_unary_exp (ase_awk_t* awk); static ase_awk_nde_t* __parse_increment (ase_awk_t* awk); static ase_awk_nde_t* __parse_primary (ase_awk_t* awk); static ase_awk_nde_t* __parse_primary_ident (ase_awk_t* awk); static ase_awk_nde_t* __parse_hashidx ( ase_awk_t* awk, ase_char_t* name, ase_size_t name_len); static ase_awk_nde_t* __parse_fncall ( ase_awk_t* awk, ase_char_t* name, ase_size_t name_len, ase_awk_bfn_t* bfn); static ase_awk_nde_t* __parse_if (ase_awk_t* awk); static ase_awk_nde_t* __parse_while (ase_awk_t* awk); static ase_awk_nde_t* __parse_for (ase_awk_t* awk); static ase_awk_nde_t* __parse_dowhile (ase_awk_t* awk); static ase_awk_nde_t* __parse_break (ase_awk_t* awk); static ase_awk_nde_t* __parse_continue (ase_awk_t* awk); static ase_awk_nde_t* __parse_return (ase_awk_t* awk); static ase_awk_nde_t* __parse_exit (ase_awk_t* awk); static ase_awk_nde_t* __parse_next (ase_awk_t* awk); static ase_awk_nde_t* __parse_nextfile (ase_awk_t* awk, int out); static ase_awk_nde_t* __parse_delete (ase_awk_t* awk); static ase_awk_nde_t* __parse_print (ase_awk_t* awk, int type); static int __get_token (ase_awk_t* awk); static int __get_number (ase_awk_t* awk); static int __get_charstr (ase_awk_t* awk); static int __get_rexstr (ase_awk_t* awk); static int __get_string ( ase_awk_t* awk, ase_char_t end_char, ase_char_t esc_char, ase_bool_t keep_esc_char); static int __get_char (ase_awk_t* awk); static int __unget_char (ase_awk_t* awk, ase_cint_t c); static int __skip_spaces (ase_awk_t* awk); static int __skip_comment (ase_awk_t* awk); static int __classify_ident ( ase_awk_t* awk, const ase_char_t* name, ase_size_t len); static int __assign_to_opcode (ase_awk_t* awk); static int __is_plain_var (ase_awk_nde_t* nde); static int __is_var (ase_awk_nde_t* nde); static int __deparse (ase_awk_t* awk); static int __deparse_func (ase_awk_pair_t* pair, void* arg); static int __put_char (ase_awk_t* awk, ase_char_t c); static int __flush (ase_awk_t* awk); struct __kwent { const ase_char_t* name; ase_size_t name_len; int type; int valid; /* the entry is valid when this option is set */ }; static struct __kwent __kwtab[] = { /* operators */ { ASE_T("in"), 2, TOKEN_IN, 0 }, /* top-level block starters */ { ASE_T("BEGIN"), 5, TOKEN_BEGIN, 0 }, { ASE_T("END"), 3, TOKEN_END, 0 }, { ASE_T("function"), 8, TOKEN_FUNCTION, 0 }, { ASE_T("func"), 4, TOKEN_FUNCTION, 0 }, /* keywords for variable declaration */ { ASE_T("local"), 5, TOKEN_LOCAL, ASE_AWK_EXPLICIT }, { ASE_T("global"), 6, TOKEN_GLOBAL, ASE_AWK_EXPLICIT }, /* keywords that start statements excluding expression statements */ { ASE_T("if"), 2, TOKEN_IF, 0 }, { ASE_T("else"), 4, TOKEN_ELSE, 0 }, { ASE_T("while"), 5, TOKEN_WHILE, 0 }, { ASE_T("for"), 3, TOKEN_FOR, 0 }, { ASE_T("do"), 2, TOKEN_DO, 0 }, { ASE_T("break"), 5, TOKEN_BREAK, 0 }, { ASE_T("continue"), 8, TOKEN_CONTINUE, 0 }, { ASE_T("return"), 6, TOKEN_RETURN, 0 }, { ASE_T("exit"), 4, TOKEN_EXIT, 0 }, { ASE_T("next"), 4, TOKEN_NEXT, 0 }, { ASE_T("nextfile"), 8, TOKEN_NEXTFILE, 0 }, { ASE_T("nextofile"), 9, TOKEN_NEXTOFILE, ASE_AWK_NEXTOFILE }, { ASE_T("delete"), 6, TOKEN_DELETE, 0 }, { ASE_T("print"), 5, TOKEN_PRINT, ASE_AWK_EXTIO }, { ASE_T("printf"), 6, TOKEN_PRINTF, ASE_AWK_EXTIO }, /* keywords that can start an expression */ { ASE_T("getline"), 7, TOKEN_GETLINE, ASE_AWK_EXTIO }, { ASE_NULL, 0, 0, 0 } }; struct __bvent { const ase_char_t* name; ase_size_t name_len; int valid; }; static struct __bvent __bvtab[] = { { ASE_T("ARGC"), 4, 0 }, { ASE_T("ARGV"), 4, 0 }, { ASE_T("CONVFMT"), 7, 0 }, { ASE_T("ENVIRON"), 7, 0 }, { ASE_T("ERRNO"), 5, 0 }, { ASE_T("FILENAME"), 8, 0 }, { ASE_T("FNR"), 3, 0 }, { ASE_T("FS"), 2, 0 }, { ASE_T("IGNORECASE"), 10, 0 }, { ASE_T("NF"), 2, 0 }, { ASE_T("NR"), 2, 0 }, { ASE_T("OFILENAME"), 9, ASE_AWK_NEXTOFILE }, { ASE_T("OFMT"), 4, 0 }, { ASE_T("OFS"), 3, 0 }, { ASE_T("ORS"), 3, 0 }, { ASE_T("RLENGTH"), 7, 0 }, { ASE_T("RS"), 2, 0 }, { ASE_T("RSTART"), 6, 0 }, { ASE_T("SUBSEP"), 6, 0 }, { ASE_NULL, 0, 0 } }; #define GET_CHAR(awk) \ do { if (__get_char (awk) == -1) return -1; } while(0) #define GET_CHAR_TO(awk,c) \ do { \ if (__get_char (awk) == -1) return -1; \ c = (awk)->src.lex.curc; \ } while(0) /* #define SET_TOKEN_TYPE(awk,code) \ do { \ (awk)->token.prev.type = (awk)->token.type; \ (awk)->token.type = (code); \ } while (0) */ #define SET_TOKEN_TYPE(awk,code) \ do { (awk)->token.type = (code); } while (0) #define ADD_TOKEN_CHAR(awk,c) \ do { \ if (ase_awk_str_ccat(&(awk)->token.name,(c)) == (ase_size_t)-1) { \ (awk)->errnum = ASE_AWK_ENOMEM; return -1; \ } \ } while (0) #define ADD_TOKEN_STR(awk,str) \ do { \ if (ase_awk_str_cat(&(awk)->token.name,(str)) == (ase_size_t)-1) { \ (awk)->errnum = ASE_AWK_ENOMEM; return -1; \ } \ } while (0) #define MATCH(awk,token_type) ((awk)->token.type == (token_type)) #define PANIC(awk,code) \ do { (awk)->errnum = (code); return ASE_NULL; } while (0) void ase_awk_setmaxparsedepth (ase_awk_t* awk, int types, ase_size_t depth) { if (types & ASE_AWK_DEPTH_BLOCK) { awk->parse.depth.max.block = depth; if (depth <= 0) awk->parse.parse_block = __parse_block; else awk->parse.parse_block = __parse_block_dc; } if (types & ASE_AWK_DEPTH_EXPR) { awk->parse.depth.max.expr = depth; } } int ase_awk_parse (ase_awk_t* awk, ase_awk_srcios_t* srcios) { int n; ASE_AWK_ASSERTX (awk, srcios != ASE_NULL && srcios->in != ASE_NULL, "the source code input stream must be provided at least"); ASE_AWK_ASSERT (awk, awk->parse.depth.cur.loop == 0); ASE_AWK_ASSERT (awk, awk->parse.depth.cur.expr == 0); ase_awk_clear (awk); ASE_AWK_MEMCPY (awk, &awk->src.ios, srcios, ASE_SIZEOF(awk->src.ios)); n = __parse (awk); ASE_AWK_ASSERT (awk, awk->parse.depth.cur.loop == 0); ASE_AWK_ASSERT (awk, awk->parse.depth.cur.expr == 0); return n; } static int __parse (ase_awk_t* awk) { int n = 0, op; ASE_AWK_ASSERT (awk, awk->src.ios.in != ASE_NULL); op = awk->src.ios.in ( ASE_AWK_IO_OPEN, awk->src.ios.custom_data, ASE_NULL, 0); if (op <= -1) { /* cannot open the source file. * it doesn't even have to call CLOSE */ awk->errnum = ASE_AWK_ESRCINOPEN; return -1; } if (__add_builtin_globals (awk) == ASE_NULL) { n = -1; goto exit_parse; } /* the user io handler for the source code input returns 0 when * it doesn't have any files to open. this is the same condition * as the source code file is empty. so it will perform the parsing * when op is positive, which means there are something to parse */ if (op > 0) { /* get the first character */ if (__get_char(awk) == -1) { n = -1; goto exit_parse; } /* get the first token */ if (__get_token(awk) == -1) { n = -1; goto exit_parse; } while (1) { if (MATCH(awk,TOKEN_EOF)) break; if (MATCH(awk,TOKEN_NEWLINE)) continue; if (__parse_progunit (awk) == ASE_NULL) { n = -1; goto exit_parse; } } } awk->tree.nglobals = ase_awk_tab_getsize(&awk->parse.globals); if (awk->src.ios.out != ASE_NULL) { if (__deparse (awk) == -1) { n = -1; goto exit_parse; } } exit_parse: if (awk->src.ios.in ( ASE_AWK_IO_CLOSE, awk->src.ios.custom_data, ASE_NULL, 0) != 0) { if (n == 0) { /* this is to keep the earlier error above * that might be more critical than this */ awk->errnum = ASE_AWK_ESRCINCLOSE; n = -1; } } if (n == -1) ase_awk_clear (awk); else awk->tree.ok = 1; return n; } static ase_awk_t* __parse_progunit (ase_awk_t* awk) { /* pattern { action } function name (parameter-list) { statement } */ ASE_AWK_ASSERT (awk, awk->parse.depth.cur.loop == 0); if ((awk->option & ASE_AWK_EXPLICIT) && MATCH(awk,TOKEN_GLOBAL)) { ase_size_t nglobals; awk->parse.id.block = PARSE_GLOBAL; if (__get_token(awk) == -1) return ASE_NULL; nglobals = ase_awk_tab_getsize(&awk->parse.globals); if (__collect_globals (awk) == ASE_NULL) { ase_awk_tab_remove ( &awk->parse.globals, nglobals, ase_awk_tab_getsize(&awk->parse.globals) - nglobals); return ASE_NULL; } } else if (MATCH(awk,TOKEN_FUNCTION)) { awk->parse.id.block = PARSE_FUNCTION; if (__parse_function (awk) == ASE_NULL) return ASE_NULL; } else if (MATCH(awk,TOKEN_BEGIN)) { awk->parse.id.block = PARSE_BEGIN; if (__get_token(awk) == -1) return ASE_NULL; if ((awk->option & ASE_AWK_BLOCKLESS) && (MATCH(awk,TOKEN_NEWLINE) || MATCH(awk,TOKEN_EOF))) { /* when the blockless pattern is supported * BEGIN and { should be located on the same line */ PANIC (awk, ASE_AWK_EBEGINBLOCK); } if (!MATCH(awk,TOKEN_LBRACE)) PANIC (awk, ASE_AWK_ELBRACE); awk->parse.id.block = PARSE_BEGIN_BLOCK; if (__parse_begin (awk) == ASE_NULL) return ASE_NULL; } else if (MATCH(awk,TOKEN_END)) { awk->parse.id.block = PARSE_END; if (__get_token(awk) == -1) return ASE_NULL; if ((awk->option & ASE_AWK_BLOCKLESS) && (MATCH(awk,TOKEN_NEWLINE) || MATCH(awk,TOKEN_EOF))) { /* when the blockless pattern is supported * END and { should be located on the same line */ PANIC (awk, ASE_AWK_EENDBLOCK); } if (!MATCH(awk,TOKEN_LBRACE)) PANIC (awk, ASE_AWK_ELBRACE); awk->parse.id.block = PARSE_END_BLOCK; if (__parse_end (awk) == ASE_NULL) return ASE_NULL; } else if (MATCH(awk,TOKEN_LBRACE)) { /* patternless block */ awk->parse.id.block = PARSE_ACTION_BLOCK; if (__parse_pattern_block ( awk, ASE_NULL, ase_false) == ASE_NULL) return ASE_NULL; } else { /* expressions /regular expression/ pattern && pattern pattern || pattern !pattern (pattern) pattern, pattern */ ase_awk_nde_t* ptn; awk->parse.id.block = PARSE_PATTERN; ptn = __parse_expression (awk); if (ptn == ASE_NULL) return ASE_NULL; ASE_AWK_ASSERT (awk, ptn->next == ASE_NULL); if (MATCH(awk,TOKEN_COMMA)) { if (__get_token (awk) == -1) { ase_awk_clrpt (awk, ptn); return ASE_NULL; } ptn->next = __parse_expression (awk); if (ptn->next == ASE_NULL) { ase_awk_clrpt (awk, ptn); return ASE_NULL; } } if ((awk->option & ASE_AWK_BLOCKLESS) && (MATCH(awk,TOKEN_NEWLINE) || MATCH(awk,TOKEN_EOF))) { /* blockless pattern */ ase_bool_t newline = MATCH(awk,TOKEN_NEWLINE); awk->parse.id.block = PARSE_ACTION_BLOCK; if (__parse_pattern_block ( awk, ptn, ase_true) == ASE_NULL) { ase_awk_clrpt (awk, ptn); return ASE_NULL; } if (newline) { if (__get_token (awk) == -1) { ase_awk_clrpt (awk, ptn); return ASE_NULL; } } } else { /* parse the action block */ if (!MATCH(awk,TOKEN_LBRACE)) { ase_awk_clrpt (awk, ptn); PANIC (awk, ASE_AWK_ELBRACE); } awk->parse.id.block = PARSE_ACTION_BLOCK; if (__parse_pattern_block ( awk, ptn, ase_false) == ASE_NULL) { ase_awk_clrpt (awk, ptn); return ASE_NULL; } } } return awk; } static ase_awk_nde_t* __parse_function (ase_awk_t* awk) { ase_char_t* name; ase_char_t* name_dup; ase_size_t name_len; ase_awk_nde_t* body; ase_awk_afn_t* afn; ase_size_t nargs; ase_awk_pair_t* pair; int n; /* eat up the keyword 'function' and get the next token */ ASE_AWK_ASSERT (awk, MATCH(awk,TOKEN_FUNCTION)); if (__get_token(awk) == -1) return ASE_NULL; /* match a function name */ if (!MATCH(awk,TOKEN_IDENT)) { /* cannot find a valid identifier for a function name */ PANIC (awk, ASE_AWK_EIDENT); } name = ASE_AWK_STR_BUF(&awk->token.name); name_len = ASE_AWK_STR_LEN(&awk->token.name); /* check if it is a builtin function */ if (ase_awk_getbfn (awk, name, name_len) != ASE_NULL) { PANIC (awk, ASE_AWK_EFNREDEFBFN); } if (ase_awk_map_get(&awk->tree.afns, name, name_len) != ASE_NULL) { /* the function is defined previously */ PANIC (awk, ASE_AWK_EFNREDEFAFN); } if (awk->option & ASE_AWK_UNIQUEAFN) { /* check if it coincides to be a global variable name */ ase_size_t g; g = ase_awk_tab_find (&awk->parse.globals, 0, name, name_len); if (g != (ase_size_t)-1) { PANIC (awk, ASE_AWK_EFNREDEFGLOBAL); } } /* clone the function name before it is overwritten */ name_dup = ase_awk_strxdup (awk, name, name_len); if (name_dup == ASE_NULL) PANIC (awk, ASE_AWK_ENOMEM); /* get the next token */ if (__get_token(awk) == -1) { ASE_AWK_FREE (awk, name_dup); return ASE_NULL; } /* match a left parenthesis */ if (!MATCH(awk,TOKEN_LPAREN)) { /* a function name is not followed by a left parenthesis */ ASE_AWK_FREE (awk, name_dup); PANIC (awk, ASE_AWK_ELPAREN); } /* get the next token */ if (__get_token(awk) == -1) { ASE_AWK_FREE (awk, name_dup); return ASE_NULL; } /* make sure that parameter table is empty */ ASE_AWK_ASSERT (awk, ase_awk_tab_getsize(&awk->parse.params) == 0); /* read parameter list */ if (MATCH(awk,TOKEN_RPAREN)) { /* no function parameter found. get the next token */ if (__get_token(awk) == -1) { ASE_AWK_FREE (awk, name_dup); return ASE_NULL; } } else { while (1) { ase_char_t* param; ase_size_t param_len; if (!MATCH(awk,TOKEN_IDENT)) { ASE_AWK_FREE (awk, name_dup); ase_awk_tab_clear (&awk->parse.params); PANIC (awk, ASE_AWK_EIDENT); } param = ASE_AWK_STR_BUF(&awk->token.name); param_len = ASE_AWK_STR_LEN(&awk->token.name); if (awk->option & ASE_AWK_UNIQUEAFN) { /* check if a parameter conflicts with a function */ if (ase_awk_strxncmp (name_dup, name_len, param, param_len) == 0 || ase_awk_map_get (&awk->tree.afns, param, param_len) != ASE_NULL) { ASE_AWK_FREE (awk, name_dup); ase_awk_tab_clear (&awk->parse.params); PANIC (awk, ASE_AWK_EPARREDEFAFN); } /* NOTE: the following is not a conflict * global x; * function f (x) { print x; } * x in print x is a parameter */ } /* check if a parameter conflicts with other parameters */ if (ase_awk_tab_find ( &awk->parse.params, 0, param, param_len) != (ase_size_t)-1) { ASE_AWK_FREE (awk, name_dup); ase_awk_tab_clear (&awk->parse.params); PANIC (awk, ASE_AWK_EDUPPARAM); } /* push the parameter to the parameter list */ if (ase_awk_tab_getsize ( &awk->parse.params) >= ASE_AWK_MAX_PARAMS) { ASE_AWK_FREE (awk, name_dup); ase_awk_tab_clear (&awk->parse.params); PANIC (awk, ASE_AWK_ETOOMANYPARAMS); } if (ase_awk_tab_add ( &awk->parse.params, param, param_len) == (ase_size_t)-1) { ASE_AWK_FREE (awk, name_dup); ase_awk_tab_clear (&awk->parse.params); PANIC (awk, ASE_AWK_ENOMEM); } if (__get_token (awk) == -1) { ASE_AWK_FREE (awk, name_dup); ase_awk_tab_clear (&awk->parse.params); return ASE_NULL; } if (MATCH(awk,TOKEN_RPAREN)) break; if (!MATCH(awk,TOKEN_COMMA)) { ASE_AWK_FREE (awk, name_dup); ase_awk_tab_clear (&awk->parse.params); PANIC (awk, ASE_AWK_ECOMMA); } if (__get_token(awk) == -1) { ASE_AWK_FREE (awk, name_dup); ase_awk_tab_clear (&awk->parse.params); return ASE_NULL; } } if (__get_token(awk) == -1) { ASE_AWK_FREE (awk, name_dup); ase_awk_tab_clear (&awk->parse.params); return ASE_NULL; } } /* check if the function body starts with a left brace */ if (!MATCH(awk,TOKEN_LBRACE)) { ASE_AWK_FREE (awk, name_dup); ase_awk_tab_clear (&awk->parse.params); PANIC (awk, ASE_AWK_ELBRACE); } if (__get_token(awk) == -1) { ASE_AWK_FREE (awk, name_dup); ase_awk_tab_clear (&awk->parse.params); return ASE_NULL; } /* actual function body */ body = awk->parse.parse_block (awk, ase_true); if (body == ASE_NULL) { ASE_AWK_FREE (awk, name_dup); ase_awk_tab_clear (&awk->parse.params); return ASE_NULL; } /* TODO: study furthur if the parameter names should be saved * for some reasons */ nargs = ase_awk_tab_getsize (&awk->parse.params); /* parameter names are not required anymore. clear them */ ase_awk_tab_clear (&awk->parse.params); afn = (ase_awk_afn_t*) ASE_AWK_MALLOC (awk, ASE_SIZEOF(ase_awk_afn_t)); if (afn == ASE_NULL) { ASE_AWK_FREE (awk, name_dup); ase_awk_clrpt (awk, body); return ASE_NULL; } afn->name = ASE_NULL; /* function name set below */ afn->name_len = 0; afn->nargs = nargs; afn->body = body; n = ase_awk_map_putx (&awk->tree.afns, name_dup, name_len, afn, &pair); if (n < 0) { ASE_AWK_FREE (awk, name_dup); ase_awk_clrpt (awk, body); ASE_AWK_FREE (awk, afn); PANIC (awk, ASE_AWK_ENOMEM); } /* duplicate functions should have been detected previously */ ASE_AWK_ASSERT (awk, n != 0); afn->name = pair->key; /* do some trick to save a string. */ afn->name_len = pair->key_len; ASE_AWK_FREE (awk, name_dup); return body; } static ase_awk_nde_t* __parse_begin (ase_awk_t* awk) { ase_awk_nde_t* nde; ASE_AWK_ASSERT (awk, MATCH(awk,TOKEN_LBRACE)); if (__get_token(awk) == -1) return ASE_NULL; nde = awk->parse.parse_block (awk, ase_true); if (nde == ASE_NULL) return ASE_NULL; awk->tree.begin = nde; return nde; } static ase_awk_nde_t* __parse_end (ase_awk_t* awk) { ase_awk_nde_t* nde; ASE_AWK_ASSERT (awk, MATCH(awk,TOKEN_LBRACE)); if (__get_token(awk) == -1) return ASE_NULL; nde = awk->parse.parse_block (awk, ase_true); if (nde == ASE_NULL) return ASE_NULL; awk->tree.end = nde; return nde; } static ase_awk_chain_t* __parse_pattern_block ( ase_awk_t* awk, ase_awk_nde_t* ptn, ase_bool_t blockless) { ase_awk_nde_t* nde; ase_awk_chain_t* chain; if (blockless) nde = ASE_NULL; else { ASE_AWK_ASSERT (awk, MATCH(awk,TOKEN_LBRACE)); if (__get_token(awk) == -1) return ASE_NULL; nde = awk->parse.parse_block (awk, ase_true); if (nde == ASE_NULL) return ASE_NULL; } chain = (ase_awk_chain_t*) ASE_AWK_MALLOC (awk, ASE_SIZEOF(ase_awk_chain_t)); if (chain == ASE_NULL) { ase_awk_clrpt (awk, nde); PANIC (awk, ASE_AWK_ENOMEM); } chain->pattern = ptn; chain->action = nde; chain->next = ASE_NULL; if (awk->tree.chain == ASE_NULL) { awk->tree.chain = chain; awk->tree.chain_tail = chain; awk->tree.chain_size++; } else { awk->tree.chain_tail->next = chain; awk->tree.chain_tail = chain; awk->tree.chain_size++; } return chain; } static ase_awk_nde_t* __parse_block (ase_awk_t* awk, ase_bool_t is_top) { ase_awk_nde_t* head, * curr, * nde; ase_awk_nde_blk_t* block; ase_size_t nlocals, nlocals_max, tmp; nlocals = ase_awk_tab_getsize(&awk->parse.locals); nlocals_max = awk->parse.nlocals_max; /* local variable declarations */ if (awk->option & ASE_AWK_EXPLICIT) { while (1) { if (!MATCH(awk,TOKEN_LOCAL)) break; if (__get_token(awk) == -1) { ase_awk_tab_remove ( &awk->parse.locals, nlocals, ase_awk_tab_getsize(&awk->parse.locals) - nlocals); return ASE_NULL; } if (__collect_locals(awk, nlocals) == ASE_NULL) { ase_awk_tab_remove ( &awk->parse.locals, nlocals, ase_awk_tab_getsize(&awk->parse.locals) - nlocals); return ASE_NULL; } } } /* block body */ head = ASE_NULL; curr = ASE_NULL; while (1) { if (MATCH(awk,TOKEN_EOF)) { ase_awk_tab_remove ( &awk->parse.locals, nlocals, ase_awk_tab_getsize(&awk->parse.locals) - nlocals); if (head != ASE_NULL) ase_awk_clrpt (awk, head); PANIC (awk, ASE_AWK_EENDSRC); } if (MATCH(awk,TOKEN_RBRACE)) { if (__get_token(awk) == -1) { ase_awk_tab_remove ( &awk->parse.locals, nlocals, ase_awk_tab_getsize(&awk->parse.locals) - nlocals); if (head != ASE_NULL) ase_awk_clrpt (awk, head); return ASE_NULL; } break; } nde = __parse_statement (awk); if (nde == ASE_NULL) { ase_awk_tab_remove ( &awk->parse.locals, nlocals, ase_awk_tab_getsize(&awk->parse.locals) - nlocals); if (head != ASE_NULL) ase_awk_clrpt (awk, head); return ASE_NULL; } /* remove unnecessary statements */ if (nde->type == ASE_AWK_NDE_NULL || (nde->type == ASE_AWK_NDE_BLK && ((ase_awk_nde_blk_t*)nde)->body == ASE_NULL)) continue; if (curr == ASE_NULL) head = nde; else curr->next = nde; curr = nde; } block = (ase_awk_nde_blk_t*) ASE_AWK_MALLOC (awk, ASE_SIZEOF(ase_awk_nde_blk_t)); if (block == ASE_NULL) { ase_awk_tab_remove ( &awk->parse.locals, nlocals, ase_awk_tab_getsize(&awk->parse.locals) - nlocals); ase_awk_clrpt (awk, head); PANIC (awk, ASE_AWK_ENOMEM); } tmp = ase_awk_tab_getsize(&awk->parse.locals); if (tmp > awk->parse.nlocals_max) awk->parse.nlocals_max = tmp; ase_awk_tab_remove ( &awk->parse.locals, nlocals, tmp - nlocals); /* adjust the number of locals for a block without any statements */ /* if (head == ASE_NULL) tmp = 0; */ block->type = ASE_AWK_NDE_BLK; //block->line = block->next = ASE_NULL; block->body = head; /* TODO: not only local variables but also nested blocks, unless it is part of other constructs such as if, can be promoted and merged to top-level block */ /* migrate all block-local variables to a top-level block */ if (is_top) { block->nlocals = awk->parse.nlocals_max - nlocals; awk->parse.nlocals_max = nlocals_max; } else { /*block->nlocals = tmp - nlocals;*/ block->nlocals = 0; } return (ase_awk_nde_t*)block; } static ase_awk_nde_t* __parse_block_dc (ase_awk_t* awk, ase_bool_t is_top) { ase_awk_nde_t* nde; ASE_AWK_ASSERT (awk, awk->parse.depth.max.block > 0); if (awk->parse.depth.cur.block >= awk->parse.depth.max.block) { awk->errnum = ASE_AWK_ERECURSION; return ASE_NULL; } awk->parse.depth.cur.block++; nde = __parse_block (awk, is_top); awk->parse.depth.cur.block--; return nde; } static ase_awk_t* __add_builtin_globals (ase_awk_t* awk) { struct __bvent* p = __bvtab; ase_awk_t* tmp; awk->tree.nbglobals = 0; while (p->name != ASE_NULL) { if (p->valid != 0 && (awk->option & p->valid) == 0) { /* an invalid global variable are still added * to the global variable table with an empty name. * this is to prevent the run-time from looking up * the variable */ tmp =__add_global (awk, ASE_T(""), 0, 1); } else { tmp =__add_global (awk, p->name, p->name_len, 0); } if (tmp == ASE_NULL) return ASE_NULL; awk->tree.nbglobals++; p++; } return awk; } static ase_awk_t* __add_global ( ase_awk_t* awk, const ase_char_t* name, ase_size_t len, int force) { if (!force) { if (awk->option & ASE_AWK_UNIQUEAFN) { /* check if it conflict with a function name */ if (ase_awk_map_get ( &awk->tree.afns, name, len) != ASE_NULL) { PANIC (awk, ASE_AWK_EDUPNAME); } } /* check if it conflicts with other global variable names */ if (ase_awk_tab_find ( &awk->parse.globals, 0, name, len) != (ase_size_t)-1) { PANIC (awk, ASE_AWK_EDUPVAR); } } if (ase_awk_tab_getsize(&awk->parse.globals) >= ASE_AWK_MAX_GLOBALS) { PANIC (awk, ASE_AWK_ETOOMANYGLOBALS); } if (ase_awk_tab_add (&awk->parse.globals, name, len) == (ase_size_t)-1) { PANIC (awk, ASE_AWK_ENOMEM); } return awk; } static ase_awk_t* __collect_globals (ase_awk_t* awk) { while (1) { if (!MATCH(awk,TOKEN_IDENT)) { PANIC (awk, ASE_AWK_EIDENT); } if (__add_global ( awk, ASE_AWK_STR_BUF(&awk->token.name), ASE_AWK_STR_LEN(&awk->token.name), 0) == ASE_NULL) return ASE_NULL; if (__get_token(awk) == -1) return ASE_NULL; if (MATCH(awk,TOKEN_SEMICOLON)) break; if (!MATCH(awk,TOKEN_COMMA)) { PANIC (awk, ASE_AWK_ECOMMA); } if (__get_token(awk) == -1) return ASE_NULL; } /* skip a semicolon */ if (__get_token(awk) == -1) return ASE_NULL; return awk; } static ase_awk_t* __collect_locals (ase_awk_t* awk, ase_size_t nlocals) { ase_char_t* local; ase_size_t local_len; while (1) { if (!MATCH(awk,TOKEN_IDENT)) { PANIC (awk, ASE_AWK_EIDENT); } local = ASE_AWK_STR_BUF(&awk->token.name); local_len = ASE_AWK_STR_LEN(&awk->token.name); /* NOTE: it is not checked againt globals names */ if (awk->option & ASE_AWK_UNIQUEAFN) { /* check if it conflict with a function name */ if (ase_awk_map_get ( &awk->tree.afns, local, local_len) != ASE_NULL) { PANIC (awk, ASE_AWK_EDUPNAME); } } /* check if it conflicts with a paremeter name */ if (ase_awk_tab_find (&awk->parse.params, 0, local, local_len) != (ase_size_t)-1) { PANIC (awk, ASE_AWK_EDUPNAME); } /* check if it conflicts with other local variable names */ if (ase_awk_tab_find (&awk->parse.locals, ((awk->option & ASE_AWK_SHADING)? nlocals: 0), local, local_len) != (ase_size_t)-1) { PANIC (awk, ASE_AWK_EDUPVAR); } if (ase_awk_tab_getsize(&awk->parse.locals) >= ASE_AWK_MAX_LOCALS) { PANIC (awk, ASE_AWK_ETOOMANYLOCALS); } if (ase_awk_tab_add ( &awk->parse.locals, local, local_len) == (ase_size_t)-1) { PANIC (awk, ASE_AWK_ENOMEM); } if (__get_token(awk) == -1) return ASE_NULL; if (MATCH(awk,TOKEN_SEMICOLON)) break; if (!MATCH(awk,TOKEN_COMMA)) PANIC (awk, ASE_AWK_ECOMMA); if (__get_token(awk) == -1) return ASE_NULL; } /* skip a semicolon */ if (__get_token(awk) == -1) return ASE_NULL; return awk; } static ase_awk_nde_t* __parse_statement (ase_awk_t* awk) { ase_awk_nde_t* nde; if (MATCH(awk,TOKEN_SEMICOLON)) { /* null statement */ nde = (ase_awk_nde_t*) ASE_AWK_MALLOC (awk, ASE_SIZEOF(ase_awk_nde_t)); if (nde == ASE_NULL) PANIC (awk, ASE_AWK_ENOMEM); nde->type = ASE_AWK_NDE_NULL; nde->next = ASE_NULL; if (__get_token(awk) == -1) { ASE_AWK_FREE (awk, nde); return ASE_NULL; } } else if (MATCH(awk,TOKEN_LBRACE)) { if (__get_token(awk) == -1) return ASE_NULL; nde = awk->parse.parse_block (awk, ase_false); } else { /* the statement id held in awk->parse.id.stmnt denotes * the token id of the statement currently being parsed. * the current statement id is saved here because the * statement id can be changed in __parse_statement_nb. * it will, in turn, call __parse_statement which will * eventually change the statement id. */ int old_id = awk->parse.id.stmnt; /* set the current statement id */ awk->parse.id.stmnt = awk->token.type; /* proceed parsing the statement */ nde = __parse_statement_nb (awk); /* restore the statement id saved previously */ awk->parse.id.stmnt = old_id; } return nde; } static ase_awk_nde_t* __parse_statement_nb (ase_awk_t* awk) { ase_awk_nde_t* nde; /* keywords that don't require any terminating semicolon */ if (MATCH(awk,TOKEN_IF)) { if (__get_token(awk) == -1) return ASE_NULL; return __parse_if (awk); } else if (MATCH(awk,TOKEN_WHILE)) { if (__get_token(awk) == -1) return ASE_NULL; awk->parse.depth.cur.loop++; nde = __parse_while (awk); awk->parse.depth.cur.loop--; return nde; } else if (MATCH(awk,TOKEN_FOR)) { if (__get_token(awk) == -1) return ASE_NULL; awk->parse.depth.cur.loop++; nde = __parse_for (awk); awk->parse.depth.cur.loop--; return nde; } /* keywords that require a terminating semicolon */ if (MATCH(awk,TOKEN_DO)) { if (__get_token(awk) == -1) return ASE_NULL; awk->parse.depth.cur.loop++; nde = __parse_dowhile (awk); awk->parse.depth.cur.loop--; return nde; } else if (MATCH(awk,TOKEN_BREAK)) { if (__get_token(awk) == -1) return ASE_NULL; nde = __parse_break (awk); } else if (MATCH(awk,TOKEN_CONTINUE)) { if (__get_token(awk) == -1) return ASE_NULL; nde = __parse_continue (awk); } else if (MATCH(awk,TOKEN_RETURN)) { if (__get_token(awk) == -1) return ASE_NULL; nde = __parse_return (awk); } else if (MATCH(awk,TOKEN_EXIT)) { if (__get_token(awk) == -1) return ASE_NULL; nde = __parse_exit (awk); } else if (MATCH(awk,TOKEN_NEXT)) { if (__get_token(awk) == -1) return ASE_NULL; nde = __parse_next (awk); } else if (MATCH(awk,TOKEN_NEXTFILE)) { if (__get_token(awk) == -1) return ASE_NULL; nde = __parse_nextfile (awk, 0); } else if (MATCH(awk,TOKEN_NEXTOFILE)) { if (__get_token(awk) == -1) return ASE_NULL; nde = __parse_nextfile (awk, 1); } else if (MATCH(awk,TOKEN_DELETE)) { if (__get_token(awk) == -1) return ASE_NULL; nde = __parse_delete (awk); } else if (MATCH(awk,TOKEN_PRINT)) { if (__get_token(awk) == -1) return ASE_NULL; nde = __parse_print (awk, ASE_AWK_NDE_PRINT); } else if (MATCH(awk,TOKEN_PRINTF)) { if (__get_token(awk) == -1) return ASE_NULL; nde = __parse_print (awk, ASE_AWK_NDE_PRINTF); } else { nde = __parse_expression(awk); } if (nde == ASE_NULL) return ASE_NULL; /* check if a statement ends with a semicolon */ if (!MATCH(awk,TOKEN_SEMICOLON)) { if (nde != ASE_NULL) ase_awk_clrpt (awk, nde); PANIC (awk, ASE_AWK_ESEMICOLON); } /* eat up the semicolon and read in the next token */ if (__get_token(awk) == -1) { if (nde != ASE_NULL) ase_awk_clrpt (awk, nde); return ASE_NULL; } return nde; } static ase_awk_nde_t* __parse_expression (ase_awk_t* awk) { ase_awk_nde_t* nde; if (awk->parse.depth.max.expr > 0 && awk->parse.depth.cur.expr >= awk->parse.depth.max.expr) { awk->errnum = ASE_AWK_ERECURSION; return ASE_NULL; } awk->parse.depth.cur.expr++; nde = __parse_expression0 (awk); awk->parse.depth.cur.expr--; return nde; } static ase_awk_nde_t* __parse_expression0 (ase_awk_t* awk) { ase_awk_nde_t* x, * y; ase_awk_nde_ass_t* nde; int opcode; x = __parse_basic_expr (awk); if (x == ASE_NULL) return ASE_NULL; opcode = __assign_to_opcode (awk); if (opcode == -1) { /* no assignment operator found. */ return x; } ASE_AWK_ASSERT (awk, x->next == ASE_NULL); if (!__is_var(x) && x->type != ASE_AWK_NDE_POS) { ase_awk_clrpt (awk, x); PANIC (awk, ASE_AWK_EASSIGNMENT); } if (__get_token(awk) == -1) { ase_awk_clrpt (awk, x); return ASE_NULL; } /*y = __parse_basic_expr (awk);*/ y = __parse_expression (awk); if (y == ASE_NULL) { ase_awk_clrpt (awk, x); return ASE_NULL; } nde = (ase_awk_nde_ass_t*) ASE_AWK_MALLOC (awk, ASE_SIZEOF(ase_awk_nde_ass_t)); if (nde == ASE_NULL) { ase_awk_clrpt (awk, x); ase_awk_clrpt (awk, y); PANIC (awk, ASE_AWK_ENOMEM); } nde->type = ASE_AWK_NDE_ASS; nde->next = ASE_NULL; nde->opcode = opcode; nde->left = x; nde->right = y; return (ase_awk_nde_t*)nde; } static ase_awk_nde_t* __parse_basic_expr (ase_awk_t* awk) { ase_awk_nde_t* nde, * n1, * n2; nde = __parse_logical_or (awk); if (nde == ASE_NULL) return ASE_NULL; if (MATCH(awk,TOKEN_QUEST)) { ase_awk_nde_cnd_t* tmp; if (__get_token(awk) == -1) return ASE_NULL; /*n1 = __parse_basic_expr (awk);*/ n1 = __parse_expression (awk); if (n1 == ASE_NULL) { ase_awk_clrpt (awk, nde); return ASE_NULL; } if (!MATCH(awk,TOKEN_COLON)) PANIC (awk, ASE_AWK_ECOLON); if (__get_token(awk) == -1) return ASE_NULL; /*n2 = __parse_basic_expr (awk);*/ n2 = __parse_expression (awk); if (n2 == ASE_NULL) { ase_awk_clrpt (awk, nde); ase_awk_clrpt (awk, n1); return ASE_NULL; } tmp = (ase_awk_nde_cnd_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_cnd_t)); if (tmp == ASE_NULL) { ase_awk_clrpt (awk, nde); ase_awk_clrpt (awk, n1); ase_awk_clrpt (awk, n2); return ASE_NULL; } tmp->type = ASE_AWK_NDE_CND; tmp->next = ASE_NULL; tmp->test = nde; tmp->left = n1; tmp->right = n2; nde = (ase_awk_nde_t*)tmp; } return nde; } static ase_awk_nde_t* __parse_binary_expr ( ase_awk_t* awk, const __binmap_t* binmap, ase_awk_nde_t*(*next_level_func)(ase_awk_t*)) { ase_awk_nde_exp_t* nde; ase_awk_nde_t* left, * right; int opcode; left = next_level_func (awk); if (left == ASE_NULL) return ASE_NULL; while (1) { const __binmap_t* p = binmap; ase_bool_t matched = ase_false; while (p->token != TOKEN_EOF) { if (MATCH(awk,p->token)) { opcode = p->binop; matched = ase_true; break; } p++; } if (!matched) break; if (__get_token(awk) == -1) { ase_awk_clrpt (awk, left); return ASE_NULL; } right = next_level_func (awk); if (right == ASE_NULL) { ase_awk_clrpt (awk, left); return ASE_NULL; } nde = (ase_awk_nde_exp_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_exp_t)); if (nde == ASE_NULL) { ase_awk_clrpt (awk, right); ase_awk_clrpt (awk, left); PANIC (awk, ASE_AWK_ENOMEM); } nde->type = ASE_AWK_NDE_EXP_BIN; nde->next = ASE_NULL; nde->opcode = opcode; nde->left = left; nde->right = right; left = (ase_awk_nde_t*)nde; } return left; } static ase_awk_nde_t* __parse_logical_or (ase_awk_t* awk) { static __binmap_t map[] = { { TOKEN_LOR, ASE_AWK_BINOP_LOR }, { TOKEN_EOF, 0 } }; return __parse_binary_expr (awk, map, __parse_logical_and); } static ase_awk_nde_t* __parse_logical_and (ase_awk_t* awk) { static __binmap_t map[] = { { TOKEN_LAND, ASE_AWK_BINOP_LAND }, { TOKEN_EOF, 0 } }; return __parse_binary_expr (awk, map, __parse_in); } static ase_awk_nde_t* __parse_in (ase_awk_t* awk) { /* static __binmap_t map[] = { { TOKEN_IN, ASE_AWK_BINOP_IN }, { TOKEN_EOF, 0 } }; return __parse_binary_expr (awk, map, __parse_regex_match); */ ase_awk_nde_exp_t* nde; ase_awk_nde_t* left, * right; left = __parse_regex_match (awk); if (left == ASE_NULL) return ASE_NULL; while (1) { if (!MATCH(awk,TOKEN_IN)) break; if (__get_token(awk) == -1) { ase_awk_clrpt (awk, left); return ASE_NULL; } right = __parse_regex_match (awk); if (right == ASE_NULL) { ase_awk_clrpt (awk, left); return ASE_NULL; } if (!__is_plain_var(right)) { ase_awk_clrpt (awk, right); ase_awk_clrpt (awk, left); PANIC (awk, ASE_AWK_ENOTVAR); } nde = (ase_awk_nde_exp_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_exp_t)); if (nde == ASE_NULL) { ase_awk_clrpt (awk, right); ase_awk_clrpt (awk, left); PANIC (awk, ASE_AWK_ENOMEM); } nde->type = ASE_AWK_NDE_EXP_BIN; nde->next = ASE_NULL; nde->opcode = ASE_AWK_BINOP_IN; nde->left = left; nde->right = right; left = (ase_awk_nde_t*)nde; } return left; } static ase_awk_nde_t* __parse_regex_match (ase_awk_t* awk) { static __binmap_t map[] = { { TOKEN_TILDE, ASE_AWK_BINOP_MA }, { TOKEN_NM, ASE_AWK_BINOP_NM }, { TOKEN_EOF, 0 }, }; return __parse_binary_expr (awk, map, __parse_bitwise_or); } static ase_awk_nde_t* __parse_bitwise_or (ase_awk_t* awk) { if (awk->option & ASE_AWK_EXTIO) { return __parse_bitwise_or_with_extio (awk); } else { static __binmap_t map[] = { { TOKEN_BOR, ASE_AWK_BINOP_BOR }, { TOKEN_EOF, 0 } }; return __parse_binary_expr (awk, map, __parse_bitwise_xor); } } static ase_awk_nde_t* __parse_bitwise_or_with_extio (ase_awk_t* awk) { ase_awk_nde_t* left, * right; left = __parse_bitwise_xor (awk); if (left == ASE_NULL) return ASE_NULL; while (1) { int in_type; if (MATCH(awk,TOKEN_BOR)) in_type = ASE_AWK_IN_PIPE; else if (MATCH(awk,TOKEN_BORAND)) in_type = ASE_AWK_IN_COPROC; else break; if (__get_token(awk) == -1) { ase_awk_clrpt (awk, left); return ASE_NULL; } if (MATCH(awk,TOKEN_GETLINE)) { ase_awk_nde_getline_t* nde; ase_awk_nde_t* var = ASE_NULL; /* piped getline */ if (__get_token(awk) == -1) { ase_awk_clrpt (awk, left); return ASE_NULL; } /* TODO: is this correct? */ if (MATCH(awk,TOKEN_IDENT)) { /* command | getline var */ var = __parse_primary (awk); if (var == ASE_NULL) { ase_awk_clrpt (awk, left); return ASE_NULL; } } nde = (ase_awk_nde_getline_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_getline_t)); if (nde == ASE_NULL) { ase_awk_clrpt (awk, left); PANIC (awk, ASE_AWK_ENOMEM); } nde->type = ASE_AWK_NDE_GETLINE; nde->next = ASE_NULL; nde->var = var; nde->in_type = in_type; nde->in = left; left = (ase_awk_nde_t*)nde; } else { ase_awk_nde_exp_t* nde; if (in_type == ASE_AWK_IN_COPROC) { ase_awk_clrpt (awk, left); PANIC (awk, ASE_AWK_EGETLINE); } right = __parse_bitwise_xor (awk); if (right == ASE_NULL) { ase_awk_clrpt (awk, left); return ASE_NULL; } /* TODO: do constant folding */ nde = (ase_awk_nde_exp_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_exp_t)); if (nde == ASE_NULL) { ase_awk_clrpt (awk, right); ase_awk_clrpt (awk, left); PANIC (awk, ASE_AWK_ENOMEM); } nde->type = ASE_AWK_NDE_EXP_BIN; nde->next = ASE_NULL; nde->opcode = ASE_AWK_BINOP_BOR; nde->left = left; nde->right = right; left = (ase_awk_nde_t*)nde; } } return left; } static ase_awk_nde_t* __parse_bitwise_xor (ase_awk_t* awk) { static __binmap_t map[] = { { TOKEN_BXOR, ASE_AWK_BINOP_BXOR }, { TOKEN_EOF, 0 } }; return __parse_binary_expr (awk, map, __parse_bitwise_and); } static ase_awk_nde_t* __parse_bitwise_and (ase_awk_t* awk) { static __binmap_t map[] = { { TOKEN_BAND, ASE_AWK_BINOP_BAND }, { TOKEN_EOF, 0 } }; return __parse_binary_expr (awk, map, __parse_equality); } static ase_awk_nde_t* __parse_equality (ase_awk_t* awk) { static __binmap_t map[] = { { TOKEN_EQ, ASE_AWK_BINOP_EQ }, { TOKEN_NE, ASE_AWK_BINOP_NE }, { TOKEN_EOF, 0 } }; return __parse_binary_expr (awk, map, __parse_relational); } static ase_awk_nde_t* __parse_relational (ase_awk_t* awk) { static __binmap_t map[] = { { TOKEN_GT, ASE_AWK_BINOP_GT }, { TOKEN_GE, ASE_AWK_BINOP_GE }, { TOKEN_LT, ASE_AWK_BINOP_LT }, { TOKEN_LE, ASE_AWK_BINOP_LE }, { TOKEN_EOF, 0 } }; return __parse_binary_expr (awk, map, __parse_shift); } static ase_awk_nde_t* __parse_shift (ase_awk_t* awk) { static __binmap_t map[] = { { TOKEN_LSHIFT, ASE_AWK_BINOP_LSHIFT }, { TOKEN_RSHIFT, ASE_AWK_BINOP_RSHIFT }, { TOKEN_EOF, 0 } }; return __parse_binary_expr (awk, map, __parse_concat); } static ase_awk_nde_t* __parse_concat (ase_awk_t* awk) { ase_awk_nde_exp_t* nde; ase_awk_nde_t* left, * right; left = __parse_additive (awk); if (left == ASE_NULL) return ASE_NULL; /* TODO: write a better code to do this.... * first of all, is the following check sufficient? */ while (MATCH(awk,TOKEN_LPAREN) || MATCH(awk,TOKEN_DOLLAR) || awk->token.type >= TOKEN_GETLINE) { right = __parse_additive (awk); if (right == ASE_NULL) { ase_awk_clrpt (awk, left); return ASE_NULL; } nde = (ase_awk_nde_exp_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_exp_t)); if (nde == ASE_NULL) { ase_awk_clrpt (awk, left); ase_awk_clrpt (awk, right); PANIC (awk, ASE_AWK_ENOMEM); } nde->type = ASE_AWK_NDE_EXP_BIN; nde->next = ASE_NULL; nde->opcode = ASE_AWK_BINOP_CONCAT; nde->left = left; nde->right = right; left = (ase_awk_nde_t*)nde; } return left; } static ase_awk_nde_t* __parse_additive (ase_awk_t* awk) { static __binmap_t map[] = { { TOKEN_PLUS, ASE_AWK_BINOP_PLUS }, { TOKEN_MINUS, ASE_AWK_BINOP_MINUS }, { TOKEN_EOF, 0 } }; return __parse_binary_expr (awk, map, __parse_multiplicative); } static ase_awk_nde_t* __parse_multiplicative (ase_awk_t* awk) { static __binmap_t map[] = { { TOKEN_MUL, ASE_AWK_BINOP_MUL }, { TOKEN_DIV, ASE_AWK_BINOP_DIV }, { TOKEN_IDIV, ASE_AWK_BINOP_IDIV }, { TOKEN_MOD, ASE_AWK_BINOP_MOD }, /* { TOKEN_EXP, ASE_AWK_BINOP_EXP }, */ { TOKEN_EOF, 0 } }; return __parse_binary_expr (awk, map, __parse_unary); } static ase_awk_nde_t* __parse_unary (ase_awk_t* awk) { ase_awk_nde_exp_t* nde; ase_awk_nde_t* left; int opcode; opcode = (MATCH(awk,TOKEN_PLUS))? ASE_AWK_UNROP_PLUS: (MATCH(awk,TOKEN_MINUS))? ASE_AWK_UNROP_MINUS: (MATCH(awk,TOKEN_NOT))? ASE_AWK_UNROP_NOT: (MATCH(awk,TOKEN_TILDE))? ASE_AWK_UNROP_BNOT: -1; /*if (opcode == -1) return __parse_increment (awk);*/ if (opcode == -1) return __parse_exponent (awk); if (__get_token(awk) == -1) return ASE_NULL; if (awk->parse.depth.max.expr > 0 && awk->parse.depth.cur.expr >= awk->parse.depth.max.expr) { awk->errnum = ASE_AWK_ERECURSION; return ASE_NULL; } awk->parse.depth.cur.expr++; left = __parse_unary (awk); awk->parse.depth.cur.expr--; if (left == ASE_NULL) return ASE_NULL; nde = (ase_awk_nde_exp_t*) ASE_AWK_MALLOC (awk, ASE_SIZEOF(ase_awk_nde_exp_t)); if (nde == ASE_NULL) { ase_awk_clrpt (awk, left); PANIC (awk, ASE_AWK_ENOMEM); } nde->type = ASE_AWK_NDE_EXP_UNR; nde->next = ASE_NULL; nde->opcode = opcode; nde->left = left; nde->right = ASE_NULL; return (ase_awk_nde_t*)nde; } static ase_awk_nde_t* __parse_exponent (ase_awk_t* awk) { static __binmap_t map[] = { { TOKEN_EXP, ASE_AWK_BINOP_EXP }, { TOKEN_EOF, 0 } }; return __parse_binary_expr (awk, map, __parse_unary_exp); } static ase_awk_nde_t* __parse_unary_exp (ase_awk_t* awk) { ase_awk_nde_exp_t* nde; ase_awk_nde_t* left; int opcode; opcode = (MATCH(awk,TOKEN_PLUS))? ASE_AWK_UNROP_PLUS: (MATCH(awk,TOKEN_MINUS))? ASE_AWK_UNROP_MINUS: (MATCH(awk,TOKEN_NOT))? ASE_AWK_UNROP_NOT: (MATCH(awk,TOKEN_TILDE))? ASE_AWK_UNROP_BNOT: -1; if (opcode == -1) return __parse_increment (awk); if (__get_token(awk) == -1) return ASE_NULL; if (awk->parse.depth.max.expr > 0 && awk->parse.depth.cur.expr >= awk->parse.depth.max.expr) { awk->errnum = ASE_AWK_ERECURSION; return ASE_NULL; } awk->parse.depth.cur.expr++; left = __parse_unary (awk); awk->parse.depth.cur.expr--; if (left == ASE_NULL) return ASE_NULL; nde = (ase_awk_nde_exp_t*) ASE_AWK_MALLOC (awk, ASE_SIZEOF(ase_awk_nde_exp_t)); if (nde == ASE_NULL) { ase_awk_clrpt (awk, left); PANIC (awk, ASE_AWK_ENOMEM); } nde->type = ASE_AWK_NDE_EXP_UNR; nde->next = ASE_NULL; nde->opcode = opcode; nde->left = left; nde->right = ASE_NULL; return (ase_awk_nde_t*)nde; } static ase_awk_nde_t* __parse_increment (ase_awk_t* awk) { ase_awk_nde_exp_t* nde; ase_awk_nde_t* left; int type, opcode, opcode1, opcode2; opcode1 = MATCH(awk,TOKEN_PLUSPLUS)? ASE_AWK_INCOP_PLUS: MATCH(awk,TOKEN_MINUSMINUS)? ASE_AWK_INCOP_MINUS: -1; if (opcode1 != -1) { if (__get_token(awk) == -1) return ASE_NULL; } left = __parse_primary (awk); if (left == ASE_NULL) return ASE_NULL; opcode2 = MATCH(awk,TOKEN_PLUSPLUS)? ASE_AWK_INCOP_PLUS: MATCH(awk,TOKEN_MINUSMINUS)? ASE_AWK_INCOP_MINUS: -1; if (opcode1 != -1 && opcode2 != -1) { ase_awk_clrpt (awk, left); PANIC (awk, ASE_AWK_ELVALUE); } else if (opcode1 == -1 && opcode2 == -1) { return left; } else if (opcode1 != -1) { type = ASE_AWK_NDE_EXP_INCPRE; opcode = opcode1; } else if (opcode2 != -1) { type = ASE_AWK_NDE_EXP_INCPST; opcode = opcode2; if (__get_token(awk) == -1) return ASE_NULL; } nde = (ase_awk_nde_exp_t*) ASE_AWK_MALLOC (awk, ASE_SIZEOF(ase_awk_nde_exp_t)); if (nde == ASE_NULL) { ase_awk_clrpt (awk, left); PANIC (awk, ASE_AWK_ENOMEM); } nde->type = type; nde->next = ASE_NULL; nde->opcode = opcode; nde->left = left; nde->right = ASE_NULL; return (ase_awk_nde_t*)nde; } static ase_awk_nde_t* __parse_primary (ase_awk_t* awk) { if (MATCH(awk,TOKEN_IDENT)) { return __parse_primary_ident (awk); } else if (MATCH(awk,TOKEN_INT)) { ase_awk_nde_int_t* nde; nde = (ase_awk_nde_int_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_int_t)); if (nde == ASE_NULL) PANIC (awk, ASE_AWK_ENOMEM); nde->type = ASE_AWK_NDE_INT; nde->next = ASE_NULL; nde->val = ase_awk_strxtolong (awk, ASE_AWK_STR_BUF(&awk->token.name), ASE_AWK_STR_LEN(&awk->token.name), 0, ASE_NULL); nde->str = ase_awk_strxdup (awk, ASE_AWK_STR_BUF(&awk->token.name), ASE_AWK_STR_LEN(&awk->token.name)); if (nde->str == ASE_NULL) { ASE_AWK_FREE (awk, nde); return ASE_NULL; } nde->len = ASE_AWK_STR_LEN(&awk->token.name); ASE_AWK_ASSERT (awk, ASE_AWK_STR_LEN(&awk->token.name) == ase_awk_strlen(ASE_AWK_STR_BUF(&awk->token.name))); if (__get_token(awk) == -1) { ASE_AWK_FREE (awk, nde->str); ASE_AWK_FREE (awk, nde); return ASE_NULL; } return (ase_awk_nde_t*)nde; } else if (MATCH(awk,TOKEN_REAL)) { ase_awk_nde_real_t* nde; nde = (ase_awk_nde_real_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_real_t)); if (nde == ASE_NULL) PANIC (awk, ASE_AWK_ENOMEM); nde->type = ASE_AWK_NDE_REAL; nde->next = ASE_NULL; nde->val = ase_awk_strxtoreal (awk, ASE_AWK_STR_BUF(&awk->token.name), ASE_AWK_STR_LEN(&awk->token.name), ASE_NULL); nde->str = ase_awk_strxdup (awk, ASE_AWK_STR_BUF(&awk->token.name), ASE_AWK_STR_LEN(&awk->token.name)); if (nde->str == ASE_NULL) { ASE_AWK_FREE (awk, nde); return ASE_NULL; } nde->len = ASE_AWK_STR_LEN(&awk->token.name); ASE_AWK_ASSERT (awk, ASE_AWK_STR_LEN(&awk->token.name) == ase_awk_strlen(ASE_AWK_STR_BUF(&awk->token.name))); if (__get_token(awk) == -1) { ASE_AWK_FREE (awk, nde->str); ASE_AWK_FREE (awk, nde); return ASE_NULL; } return (ase_awk_nde_t*)nde; } else if (MATCH(awk,TOKEN_STR)) { ase_awk_nde_str_t* nde; nde = (ase_awk_nde_str_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_str_t)); if (nde == ASE_NULL) PANIC (awk, ASE_AWK_ENOMEM); nde->type = ASE_AWK_NDE_STR; nde->next = ASE_NULL; nde->len = ASE_AWK_STR_LEN(&awk->token.name); nde->buf = ase_awk_strxdup ( awk, ASE_AWK_STR_BUF(&awk->token.name), nde->len); if (nde->buf == ASE_NULL) { ASE_AWK_FREE (awk, nde); PANIC (awk, ASE_AWK_ENOMEM); } if (__get_token(awk) == -1) { ASE_AWK_FREE (awk, nde->buf); ASE_AWK_FREE (awk, nde); return ASE_NULL; } return (ase_awk_nde_t*)nde; } else if (MATCH(awk,TOKEN_DIV)) { ase_awk_nde_rex_t* nde; int errnum; /* the regular expression is tokenized here because * of the context-sensitivity of the slash symbol */ SET_TOKEN_TYPE (awk, TOKEN_REX); ase_awk_str_clear (&awk->token.name); if (__get_rexstr (awk) == -1) return ASE_NULL; ASE_AWK_ASSERT (awk, MATCH(awk,TOKEN_REX)); nde = (ase_awk_nde_rex_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_rex_t)); if (nde == ASE_NULL) PANIC (awk, ASE_AWK_ENOMEM); nde->type = ASE_AWK_NDE_REX; nde->next = ASE_NULL; nde->len = ASE_AWK_STR_LEN(&awk->token.name); nde->buf = ase_awk_strxdup (awk, ASE_AWK_STR_BUF(&awk->token.name), ASE_AWK_STR_LEN(&awk->token.name)); if (nde->buf == ASE_NULL) { ASE_AWK_FREE (awk, nde); PANIC (awk, ASE_AWK_ENOMEM); } nde->code = ase_awk_buildrex (awk, ASE_AWK_STR_BUF(&awk->token.name), ASE_AWK_STR_LEN(&awk->token.name), &errnum); if (nde->code == ASE_NULL) { ASE_AWK_FREE (awk, nde->buf); ASE_AWK_FREE (awk, nde); PANIC (awk, errnum); } if (__get_token(awk) == -1) { ASE_AWK_FREE (awk, nde->buf); ASE_AWK_FREE (awk, nde->code); ASE_AWK_FREE (awk, nde); return ASE_NULL; } return (ase_awk_nde_t*)nde; } else if (MATCH(awk,TOKEN_DOLLAR)) { ase_awk_nde_pos_t* nde; ase_awk_nde_t* prim; if (__get_token(awk)) return ASE_NULL; prim = __parse_primary (awk); if (prim == ASE_NULL) return ASE_NULL; nde = (ase_awk_nde_pos_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_pos_t)); if (nde == ASE_NULL) { ase_awk_clrpt (awk, prim); PANIC (awk, ASE_AWK_ENOMEM); } nde->type = ASE_AWK_NDE_POS; nde->next = ASE_NULL; nde->val = prim; return (ase_awk_nde_t*)nde; } else if (MATCH(awk,TOKEN_LPAREN)) { ase_awk_nde_t* nde; ase_awk_nde_t* last; /* eat up the left parenthesis */ if (__get_token(awk) == -1) return ASE_NULL; /* parse the sub-expression inside the parentheses */ nde = __parse_expression (awk); if (nde == ASE_NULL) return ASE_NULL; /* parse subsequent expressions separated by a comma, if any */ last = nde; ASE_AWK_ASSERT (awk, last->next == ASE_NULL); while (MATCH(awk,TOKEN_COMMA)) { ase_awk_nde_t* tmp; if (__get_token(awk) == -1) { ase_awk_clrpt (awk, nde); return ASE_NULL; } tmp = __parse_expression (awk); if (tmp == ASE_NULL) { ase_awk_clrpt (awk, nde); return ASE_NULL; } ASE_AWK_ASSERT (awk, tmp->next == ASE_NULL); last->next = tmp; last = tmp; } /* ----------------- */ /* check for the closing parenthesis */ if (!MATCH(awk,TOKEN_RPAREN)) { ase_awk_clrpt (awk, nde); PANIC (awk, ASE_AWK_ERPAREN); } if (__get_token(awk) == -1) { ase_awk_clrpt (awk, nde); return ASE_NULL; } /* check if it is a chained node */ if (nde->next != ASE_NULL) { /* if so, it is a expression group */ /* (expr1, expr2, expr2) */ ase_awk_nde_grp_t* tmp; if ((awk->parse.id.stmnt != TOKEN_PRINT && awk->parse.id.stmnt != TOKEN_PRINTF) || awk->parse.depth.cur.expr != 1) { if (!MATCH(awk,TOKEN_IN)) { ase_awk_clrpt (awk, nde); PANIC (awk, ASE_AWK_EIN); } } tmp = (ase_awk_nde_grp_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_grp_t)); if (tmp == ASE_NULL) { ase_awk_clrpt (awk, nde); PANIC (awk, ASE_AWK_ENOMEM); } tmp->type = ASE_AWK_NDE_GRP; tmp->next = ASE_NULL; tmp->body = nde; nde = (ase_awk_nde_t*)tmp; } /* ----------------- */ return nde; } else if (MATCH(awk,TOKEN_GETLINE)) { ase_awk_nde_getline_t* nde; ase_awk_nde_t* var = ASE_NULL; ase_awk_nde_t* in = ASE_NULL; if (__get_token(awk) == -1) return ASE_NULL; if (MATCH(awk,TOKEN_IDENT)) { /* getline var */ var = __parse_primary (awk); if (var == ASE_NULL) return ASE_NULL; } if (MATCH(awk, TOKEN_LT)) { /* getline [var] < file */ if (__get_token(awk) == -1) { if (var != ASE_NULL) ase_awk_clrpt (awk, var); return ASE_NULL; } /* TODO: is this correct? */ /*in = __parse_expression (awk);*/ in = __parse_primary (awk); if (in == ASE_NULL) { if (var != ASE_NULL) ase_awk_clrpt (awk, var); return ASE_NULL; } } nde = (ase_awk_nde_getline_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_getline_t)); if (nde == ASE_NULL) { if (var != ASE_NULL) ase_awk_clrpt (awk, var); if (in != ASE_NULL) ase_awk_clrpt (awk, in); PANIC (awk, ASE_AWK_ENOMEM); } nde->type = ASE_AWK_NDE_GETLINE; nde->next = ASE_NULL; nde->var = var; nde->in_type = (in == ASE_NULL)? ASE_AWK_IN_CONSOLE: ASE_AWK_IN_FILE; nde->in = in; return (ase_awk_nde_t*)nde; } /* valid expression introducer is expected */ awk->errnum = ASE_AWK_EEXPRESSION; return ASE_NULL; } static ase_awk_nde_t* __parse_primary_ident (ase_awk_t* awk) { ase_char_t* name_dup; ase_size_t name_len; ase_awk_bfn_t* bfn; ASE_AWK_ASSERT (awk, MATCH(awk,TOKEN_IDENT)); name_dup = ase_awk_strxdup ( awk, ASE_AWK_STR_BUF(&awk->token.name), ASE_AWK_STR_LEN(&awk->token.name)); if (name_dup == ASE_NULL) { awk->errnum = ASE_AWK_ENOMEM; return ASE_NULL; } name_len = ASE_AWK_STR_LEN(&awk->token.name); if (__get_token(awk) == -1) { ASE_AWK_FREE (awk, name_dup); return ASE_NULL; } /* check if name_dup is a built-in function name */ bfn = ase_awk_getbfn (awk, name_dup, name_len); if (bfn != ASE_NULL) { ase_awk_nde_t* nde; ASE_AWK_FREE (awk, name_dup); if (!MATCH(awk,TOKEN_LPAREN)) { /* built-in function should be in the form * of the function call */ awk->errnum = ASE_AWK_ELPAREN; return ASE_NULL; } nde = __parse_fncall (awk, ASE_NULL, 0, bfn); return (ase_awk_nde_t*)nde; } /* now we know that name_dup is a normal identifier. */ if (MATCH(awk,TOKEN_LBRACK)) { ase_awk_nde_t* nde; nde = __parse_hashidx (awk, name_dup, name_len); if (nde == ASE_NULL) ASE_AWK_FREE (awk, name_dup); return (ase_awk_nde_t*)nde; } else if (MATCH(awk,TOKEN_LPAREN)) { /* function call */ ase_awk_nde_t* nde; nde = __parse_fncall (awk, name_dup, name_len, ASE_NULL); if (nde == ASE_NULL) ASE_AWK_FREE (awk, name_dup); return (ase_awk_nde_t*)nde; } else { /* normal variable */ ase_awk_nde_var_t* nde; ase_size_t idxa; nde = (ase_awk_nde_var_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_var_t)); if (nde == ASE_NULL) { ASE_AWK_FREE (awk, name_dup); awk->errnum = ASE_AWK_ENOMEM; return ASE_NULL; } /* search the parameter name list */ idxa = ase_awk_tab_find ( &awk->parse.params, 0, name_dup, name_len); if (idxa != (ase_size_t)-1) { nde->type = ASE_AWK_NDE_ARG; nde->next = ASE_NULL; /*nde->id.name = ASE_NULL;*/ nde->id.name = name_dup; nde->id.name_len = name_len; nde->id.idxa = idxa; nde->idx = ASE_NULL; return (ase_awk_nde_t*)nde; } /* search the local variable list */ idxa = ase_awk_tab_rrfind ( &awk->parse.locals, 0, name_dup, name_len); if (idxa != (ase_size_t)-1) { nde->type = ASE_AWK_NDE_LOCAL; nde->next = ASE_NULL; /*nde->id.name = ASE_NULL;*/ nde->id.name = name_dup; nde->id.name_len = name_len; nde->id.idxa = idxa; nde->idx = ASE_NULL; return (ase_awk_nde_t*)nde; } /* search the global variable list */ idxa = ase_awk_tab_rrfind ( &awk->parse.globals, 0, name_dup, name_len); if (idxa != (ase_size_t)-1) { nde->type = ASE_AWK_NDE_GLOBAL; nde->next = ASE_NULL; /*nde->id.name = ASE_NULL;*/ nde->id.name = name_dup; nde->id.name_len = name_len; nde->id.idxa = idxa; nde->idx = ASE_NULL; return (ase_awk_nde_t*)nde; } if (awk->option & ASE_AWK_IMPLICIT) { nde->type = ASE_AWK_NDE_NAMED; nde->next = ASE_NULL; nde->id.name = name_dup; nde->id.name_len = name_len; nde->id.idxa = (ase_size_t)-1; nde->idx = ASE_NULL; return (ase_awk_nde_t*)nde; } /* undefined variable */ ASE_AWK_FREE (awk, name_dup); ASE_AWK_FREE (awk, nde); awk->errnum = ASE_AWK_EUNDEF; return ASE_NULL; } } static ase_awk_nde_t* __parse_hashidx ( ase_awk_t* awk, ase_char_t* name, ase_size_t name_len) { ase_awk_nde_t* idx, * tmp, * last; ase_awk_nde_var_t* nde; ase_size_t idxa; idx = ASE_NULL; last = ASE_NULL; do { if (__get_token(awk) == -1) { if (idx != ASE_NULL) ase_awk_clrpt (awk, idx); return ASE_NULL; } tmp = __parse_expression (awk); if (tmp == ASE_NULL) { if (idx != ASE_NULL) ase_awk_clrpt (awk, idx); return ASE_NULL; } if (idx == ASE_NULL) { ASE_AWK_ASSERT (awk, last == ASE_NULL); idx = tmp; last = tmp; } else { last->next = tmp; last = tmp; } } while (MATCH(awk,TOKEN_COMMA)); ASE_AWK_ASSERT (awk, idx != ASE_NULL); if (!MATCH(awk,TOKEN_RBRACK)) { ase_awk_clrpt (awk, idx); PANIC (awk, ASE_AWK_ERBRACK); } if (__get_token(awk) == -1) { ase_awk_clrpt (awk, idx); return ASE_NULL; } nde = (ase_awk_nde_var_t*) ASE_AWK_MALLOC (awk, ASE_SIZEOF(ase_awk_nde_var_t)); if (nde == ASE_NULL) { ase_awk_clrpt (awk, idx); awk->errnum = ASE_AWK_ENOMEM; return ASE_NULL; } /* search the parameter name list */ idxa = ase_awk_tab_find (&awk->parse.params, 0, name, name_len); if (idxa != (ase_size_t)-1) { nde->type = ASE_AWK_NDE_ARGIDX; nde->next = ASE_NULL; /*nde->id.name = ASE_NULL; */ nde->id.name = name; nde->id.name_len = name_len; nde->id.idxa = idxa; nde->idx = idx; return (ase_awk_nde_t*)nde; } /* search the local variable list */ idxa = ase_awk_tab_rrfind(&awk->parse.locals, 0, name, name_len); if (idxa != (ase_size_t)-1) { nde->type = ASE_AWK_NDE_LOCALIDX; nde->next = ASE_NULL; /*nde->id.name = ASE_NULL; */ nde->id.name = name; nde->id.name_len = name_len; nde->id.idxa = idxa; nde->idx = idx; return (ase_awk_nde_t*)nde; } /* search the global variable list */ idxa = ase_awk_tab_rrfind(&awk->parse.globals, 0, name, name_len); if (idxa != (ase_size_t)-1) { nde->type = ASE_AWK_NDE_GLOBALIDX; nde->next = ASE_NULL; /*nde->id.name = ASE_NULL;*/ nde->id.name = name; nde->id.name_len = name_len; nde->id.idxa = idxa; nde->idx = idx; return (ase_awk_nde_t*)nde; } if (awk->option & ASE_AWK_IMPLICIT) { nde->type = ASE_AWK_NDE_NAMEDIDX; nde->next = ASE_NULL; nde->id.name = name; nde->id.name_len = name_len; nde->id.idxa = (ase_size_t)-1; nde->idx = idx; return (ase_awk_nde_t*)nde; } /* undefined variable */ ase_awk_clrpt (awk, idx); ASE_AWK_FREE (awk, nde); awk->errnum = ASE_AWK_EUNDEF; return ASE_NULL; } static ase_awk_nde_t* __parse_fncall ( ase_awk_t* awk, ase_char_t* name, ase_size_t name_len, ase_awk_bfn_t* bfn) { ase_awk_nde_t* head, * curr, * nde; ase_awk_nde_call_t* call; ase_size_t nargs; if (__get_token(awk) == -1) return ASE_NULL; head = curr = ASE_NULL; nargs = 0; if (MATCH(awk,TOKEN_RPAREN)) { /* no parameters to the function call */ if (__get_token(awk) == -1) return ASE_NULL; } else { /* parse function parameters */ while (1) { nde = __parse_expression (awk); if (nde == ASE_NULL) { if (head != ASE_NULL) ase_awk_clrpt (awk, head); return ASE_NULL; } if (head == ASE_NULL) head = nde; else curr->next = nde; curr = nde; nargs++; if (MATCH(awk,TOKEN_RPAREN)) { if (__get_token(awk) == -1) { if (head != ASE_NULL) ase_awk_clrpt (awk, head); return ASE_NULL; } break; } if (!MATCH(awk,TOKEN_COMMA)) { if (head != ASE_NULL) ase_awk_clrpt (awk, head); PANIC (awk, ASE_AWK_ECOMMA); } if (__get_token(awk) == -1) { if (head != ASE_NULL) ase_awk_clrpt (awk, head); return ASE_NULL; } } } call = (ase_awk_nde_call_t*) ASE_AWK_MALLOC (awk, ASE_SIZEOF(ase_awk_nde_call_t)); if (call == ASE_NULL) { if (head != ASE_NULL) ase_awk_clrpt (awk, head); PANIC (awk, ASE_AWK_ENOMEM); } if (bfn != ASE_NULL) { call->type = ASE_AWK_NDE_BFN; call->next = ASE_NULL; /*call->what.bfn = bfn; */ call->what.bfn.name.ptr = bfn->name.ptr; call->what.bfn.name.len = bfn->name.len; call->what.bfn.arg.min = bfn->arg.min; call->what.bfn.arg.max = bfn->arg.max; call->what.bfn.arg.spec = bfn->arg.spec; call->what.bfn.handler = bfn->handler; call->args = head; call->nargs = nargs; } else { call->type = ASE_AWK_NDE_AFN; call->next = ASE_NULL; call->what.afn.name.ptr = name; call->what.afn.name.len = name_len; call->args = head; call->nargs = nargs; } return (ase_awk_nde_t*)call; } static ase_awk_nde_t* __parse_if (ase_awk_t* awk) { ase_awk_nde_t* test; ase_awk_nde_t* then_part; ase_awk_nde_t* else_part; ase_awk_nde_if_t* nde; if (!MATCH(awk,TOKEN_LPAREN)) PANIC (awk, ASE_AWK_ELPAREN); if (__get_token(awk) == -1) return ASE_NULL; test = __parse_expression (awk); if (test == ASE_NULL) return ASE_NULL; if (!MATCH(awk,TOKEN_RPAREN)) { ase_awk_clrpt (awk, test); PANIC (awk, ASE_AWK_ERPAREN); } if (__get_token(awk) == -1) { ase_awk_clrpt (awk, test); return ASE_NULL; } then_part = __parse_statement (awk); if (then_part == ASE_NULL) { ase_awk_clrpt (awk, test); return ASE_NULL; } if (MATCH(awk,TOKEN_ELSE)) { if (__get_token(awk) == -1) { ase_awk_clrpt (awk, then_part); ase_awk_clrpt (awk, test); return ASE_NULL; } else_part = __parse_statement (awk); if (else_part == ASE_NULL) { ase_awk_clrpt (awk, then_part); ase_awk_clrpt (awk, test); return ASE_NULL; } } else else_part = ASE_NULL; nde = (ase_awk_nde_if_t*) ASE_AWK_MALLOC (awk, ASE_SIZEOF(ase_awk_nde_if_t)); if (nde == ASE_NULL) { ase_awk_clrpt (awk, else_part); ase_awk_clrpt (awk, then_part); ase_awk_clrpt (awk, test); PANIC (awk, ASE_AWK_ENOMEM); } nde->type = ASE_AWK_NDE_IF; nde->next = ASE_NULL; nde->test = test; nde->then_part = then_part; nde->else_part = else_part; return (ase_awk_nde_t*)nde; } static ase_awk_nde_t* __parse_while (ase_awk_t* awk) { ase_awk_nde_t* test, * body; ase_awk_nde_while_t* nde; if (!MATCH(awk,TOKEN_LPAREN)) PANIC (awk, ASE_AWK_ELPAREN); if (__get_token(awk) == -1) return ASE_NULL; test = __parse_expression (awk); if (test == ASE_NULL) return ASE_NULL; if (!MATCH(awk,TOKEN_RPAREN)) { ase_awk_clrpt (awk, test); PANIC (awk, ASE_AWK_ERPAREN); } if (__get_token(awk) == -1) { ase_awk_clrpt (awk, test); return ASE_NULL; } body = __parse_statement (awk); if (body == ASE_NULL) { ase_awk_clrpt (awk, test); return ASE_NULL; } nde = (ase_awk_nde_while_t*) ASE_AWK_MALLOC (awk, ASE_SIZEOF(ase_awk_nde_while_t)); if (nde == ASE_NULL) { ase_awk_clrpt (awk, body); ase_awk_clrpt (awk, test); PANIC (awk, ASE_AWK_ENOMEM); } nde->type = ASE_AWK_NDE_WHILE; nde->next = ASE_NULL; nde->test = test; nde->body = body; return (ase_awk_nde_t*)nde; } static ase_awk_nde_t* __parse_for (ase_awk_t* awk) { ase_awk_nde_t* init, * test, * incr, * body; ase_awk_nde_for_t* nde; ase_awk_nde_foreach_t* nde2; if (!MATCH(awk,TOKEN_LPAREN)) PANIC (awk, ASE_AWK_ELPAREN); if (__get_token(awk) == -1) return ASE_NULL; if (MATCH(awk,TOKEN_SEMICOLON)) init = ASE_NULL; else { /* this line is very ugly. it checks the entire next * expression or the first element in the expression * is wrapped by a parenthesis */ int no_foreach = MATCH(awk,TOKEN_LPAREN); init = __parse_expression (awk); if (init == ASE_NULL) return ASE_NULL; if (!no_foreach && init->type == ASE_AWK_NDE_EXP_BIN && ((ase_awk_nde_exp_t*)init)->opcode == ASE_AWK_BINOP_IN && __is_plain_var(((ase_awk_nde_exp_t*)init)->left)) { /* switch to foreach */ if (!MATCH(awk,TOKEN_RPAREN)) { ase_awk_clrpt (awk, init); PANIC (awk, ASE_AWK_ERPAREN); } if (__get_token(awk) == -1) { ase_awk_clrpt (awk, init); return ASE_NULL; } body = __parse_statement (awk); if (body == ASE_NULL) { ase_awk_clrpt (awk, init); return ASE_NULL; } nde2 = (ase_awk_nde_foreach_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_foreach_t)); if (nde2 == ASE_NULL) { ase_awk_clrpt (awk, init); ase_awk_clrpt (awk, body); PANIC (awk, ASE_AWK_ENOMEM); } nde2->type = ASE_AWK_NDE_FOREACH; nde2->next = ASE_NULL; nde2->test = init; nde2->body = body; return (ase_awk_nde_t*)nde2; } if (!MATCH(awk,TOKEN_SEMICOLON)) { ase_awk_clrpt (awk, init); PANIC (awk, ASE_AWK_ESEMICOLON); } } if (__get_token(awk) == -1) { ase_awk_clrpt (awk, init); return ASE_NULL; } if (MATCH(awk,TOKEN_SEMICOLON)) test = ASE_NULL; else { test = __parse_expression (awk); if (test == ASE_NULL) { ase_awk_clrpt (awk, init); return ASE_NULL; } if (!MATCH(awk,TOKEN_SEMICOLON)) { ase_awk_clrpt (awk, init); ase_awk_clrpt (awk, test); PANIC (awk, ASE_AWK_ESEMICOLON); } } if (__get_token(awk) == -1) { ase_awk_clrpt (awk, init); ase_awk_clrpt (awk, test); return ASE_NULL; } if (MATCH(awk,TOKEN_RPAREN)) incr = ASE_NULL; else { incr = __parse_expression (awk); if (incr == ASE_NULL) { ase_awk_clrpt (awk, init); ase_awk_clrpt (awk, test); return ASE_NULL; } if (!MATCH(awk,TOKEN_RPAREN)) { ase_awk_clrpt (awk, init); ase_awk_clrpt (awk, test); ase_awk_clrpt (awk, incr); PANIC (awk, ASE_AWK_ERPAREN); } } if (__get_token(awk) == -1) { ase_awk_clrpt (awk, init); ase_awk_clrpt (awk, test); ase_awk_clrpt (awk, incr); return ASE_NULL; } body = __parse_statement (awk); if (body == ASE_NULL) { ase_awk_clrpt (awk, init); ase_awk_clrpt (awk, test); ase_awk_clrpt (awk, incr); return ASE_NULL; } nde = (ase_awk_nde_for_t*) ASE_AWK_MALLOC (awk, ASE_SIZEOF(ase_awk_nde_for_t)); if (nde == ASE_NULL) { ase_awk_clrpt (awk, init); ase_awk_clrpt (awk, test); ase_awk_clrpt (awk, incr); ase_awk_clrpt (awk, body); PANIC (awk, ASE_AWK_ENOMEM); } nde->type = ASE_AWK_NDE_FOR; nde->next = ASE_NULL; nde->init = init; nde->test = test; nde->incr = incr; nde->body = body; return (ase_awk_nde_t*)nde; } static ase_awk_nde_t* __parse_dowhile (ase_awk_t* awk) { ase_awk_nde_t* test, * body; ase_awk_nde_while_t* nde; body = __parse_statement (awk); if (body == ASE_NULL) return ASE_NULL; if (!MATCH(awk,TOKEN_WHILE)) { ase_awk_clrpt (awk, body); PANIC (awk, ASE_AWK_EWHILE); } if (__get_token(awk) == -1) { ase_awk_clrpt (awk, body); return ASE_NULL; } if (!MATCH(awk,TOKEN_LPAREN)) { ase_awk_clrpt (awk, body); PANIC (awk, ASE_AWK_ELPAREN); } if (__get_token(awk) == -1) { ase_awk_clrpt (awk, body); return ASE_NULL; } test = __parse_expression (awk); if (test == ASE_NULL) { ase_awk_clrpt (awk, body); return ASE_NULL; } if (!MATCH(awk,TOKEN_RPAREN)) { ase_awk_clrpt (awk, body); ase_awk_clrpt (awk, test); PANIC (awk, ASE_AWK_ERPAREN); } if (__get_token(awk) == -1) { ase_awk_clrpt (awk, body); ase_awk_clrpt (awk, test); return ASE_NULL; } nde = (ase_awk_nde_while_t*) ASE_AWK_MALLOC (awk, ASE_SIZEOF(ase_awk_nde_while_t)); if (nde == ASE_NULL) { ase_awk_clrpt (awk, body); ase_awk_clrpt (awk, test); PANIC (awk, ASE_AWK_ENOMEM); } nde->type = ASE_AWK_NDE_DOWHILE; nde->next = ASE_NULL; nde->test = test; nde->body = body; return (ase_awk_nde_t*)nde; } static ase_awk_nde_t* __parse_break (ase_awk_t* awk) { ase_awk_nde_break_t* nde; if (awk->parse.depth.cur.loop <= 0) PANIC (awk, ASE_AWK_EBREAK); nde = (ase_awk_nde_break_t*) ASE_AWK_MALLOC (awk, ASE_SIZEOF(ase_awk_nde_break_t)); if (nde == ASE_NULL) PANIC (awk, ASE_AWK_ENOMEM); nde->type = ASE_AWK_NDE_BREAK; nde->next = ASE_NULL; return (ase_awk_nde_t*)nde; } static ase_awk_nde_t* __parse_continue (ase_awk_t* awk) { ase_awk_nde_continue_t* nde; if (awk->parse.depth.cur.loop <= 0) PANIC (awk, ASE_AWK_ECONTINUE); nde = (ase_awk_nde_continue_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_continue_t)); if (nde == ASE_NULL) PANIC (awk, ASE_AWK_ENOMEM); nde->type = ASE_AWK_NDE_CONTINUE; nde->next = ASE_NULL; return (ase_awk_nde_t*)nde; } static ase_awk_nde_t* __parse_return (ase_awk_t* awk) { ase_awk_nde_return_t* nde; ase_awk_nde_t* val; nde = (ase_awk_nde_return_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_return_t)); if (nde == ASE_NULL) PANIC (awk, ASE_AWK_ENOMEM); nde->type = ASE_AWK_NDE_RETURN; nde->next = ASE_NULL; if (MATCH(awk,TOKEN_SEMICOLON)) { /* no return value */ val = ASE_NULL; } else { val = __parse_expression (awk); if (val == ASE_NULL) { ASE_AWK_FREE (awk, nde); return ASE_NULL; } } nde->val = val; return (ase_awk_nde_t*)nde; } static ase_awk_nde_t* __parse_exit (ase_awk_t* awk) { ase_awk_nde_exit_t* nde; ase_awk_nde_t* val; nde = (ase_awk_nde_exit_t*) ASE_AWK_MALLOC (awk, ASE_SIZEOF(ase_awk_nde_exit_t)); if (nde == ASE_NULL) PANIC (awk, ASE_AWK_ENOMEM); nde->type = ASE_AWK_NDE_EXIT; nde->next = ASE_NULL; if (MATCH(awk,TOKEN_SEMICOLON)) { /* no exit code */ val = ASE_NULL; } else { val = __parse_expression (awk); if (val == ASE_NULL) { ASE_AWK_FREE (awk, nde); return ASE_NULL; } } nde->val = val; return (ase_awk_nde_t*)nde; } static ase_awk_nde_t* __parse_delete (ase_awk_t* awk) { ase_awk_nde_delete_t* nde; ase_awk_nde_t* var; if (!MATCH(awk,TOKEN_IDENT)) PANIC (awk, ASE_AWK_EIDENT); var = __parse_primary_ident (awk); if (var == ASE_NULL) return ASE_NULL; if (!__is_var (var)) { /* a normal identifier is expected */ ase_awk_clrpt (awk, var); PANIC (awk, ASE_AWK_EIDENT); } nde = (ase_awk_nde_delete_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_delete_t)); if (nde == ASE_NULL) PANIC (awk, ASE_AWK_ENOMEM); nde->type = ASE_AWK_NDE_DELETE; nde->next = ASE_NULL; nde->var = var; return (ase_awk_nde_t*)nde; } static ase_awk_nde_t* __parse_print (ase_awk_t* awk, int type) { ase_awk_nde_print_t* nde; ase_awk_nde_t* args = ASE_NULL; ase_awk_nde_t* out = ASE_NULL; int out_type; if (!MATCH(awk,TOKEN_SEMICOLON) && !MATCH(awk,TOKEN_GT) && !MATCH(awk,TOKEN_RSHIFT) && !MATCH(awk,TOKEN_BOR) && !MATCH(awk,TOKEN_BORAND)) { ase_awk_nde_t* args_tail; ase_awk_nde_t* tail_prev; args = __parse_expression (awk); if (args == ASE_NULL) return ASE_NULL; args_tail = args; tail_prev = ASE_NULL; if (args->type != ASE_AWK_NDE_GRP) { /* args->type == ASE_AWK_NDE_GRP when print (a, b, c) * args->type != ASE_AWK_NDE_GRP when print a, b, c */ while (MATCH(awk,TOKEN_COMMA)) { if (__get_token(awk) == -1) { ase_awk_clrpt (awk, args); return ASE_NULL; } args_tail->next = __parse_expression (awk); if (args_tail->next == ASE_NULL) { ase_awk_clrpt (awk, args); return ASE_NULL; } tail_prev = args_tail; args_tail = args_tail->next; } } /* print 1 > 2 would print 1 to the file named 2. * print (1 > 2) would print (1 > 2) on the console */ if (awk->token.prev.type != TOKEN_RPAREN && args_tail->type == ASE_AWK_NDE_EXP_BIN) { ase_awk_nde_exp_t* ep = (ase_awk_nde_exp_t*)args_tail; if (ep->opcode == ASE_AWK_BINOP_GT) { ase_awk_nde_t* tmp = args_tail; if (tail_prev != ASE_NULL) tail_prev->next = ep->left; else args = ep->left; out = ep->right; out_type = ASE_AWK_OUT_FILE; ASE_AWK_FREE (awk, tmp); } else if (ep->opcode == ASE_AWK_BINOP_RSHIFT) { ase_awk_nde_t* tmp = args_tail; if (tail_prev != ASE_NULL) tail_prev->next = ep->left; else args = ep->left; out = ep->right; out_type = ASE_AWK_OUT_FILE_APPEND; ASE_AWK_FREE (awk, tmp); } else if (ep->opcode == ASE_AWK_BINOP_BOR) { ase_awk_nde_t* tmp = args_tail; if (tail_prev != ASE_NULL) tail_prev->next = ep->left; else args = ep->left; out = ep->right; out_type = ASE_AWK_OUT_PIPE; ASE_AWK_FREE (awk, tmp); } } } if (out == ASE_NULL) { out_type = MATCH(awk,TOKEN_GT)? ASE_AWK_OUT_FILE: MATCH(awk,TOKEN_RSHIFT)? ASE_AWK_OUT_FILE_APPEND: MATCH(awk,TOKEN_BOR)? ASE_AWK_OUT_PIPE: MATCH(awk,TOKEN_BORAND)? ASE_AWK_OUT_COPROC: ASE_AWK_OUT_CONSOLE; if (out_type != ASE_AWK_OUT_CONSOLE) { if (__get_token(awk) == -1) { if (args != ASE_NULL) ase_awk_clrpt (awk, args); return ASE_NULL; } out = __parse_expression(awk); if (out == ASE_NULL) { if (args != ASE_NULL) ase_awk_clrpt (awk, args); return ASE_NULL; } } } nde = (ase_awk_nde_print_t*) ASE_AWK_MALLOC (awk, ASE_SIZEOF(ase_awk_nde_print_t)); if (nde == ASE_NULL) { if (args != ASE_NULL) ase_awk_clrpt (awk, args); if (out != ASE_NULL) ase_awk_clrpt (awk, out); awk->errnum = ASE_AWK_ENOMEM; return ASE_NULL; } ASE_AWK_ASSERTX (awk, type == ASE_AWK_NDE_PRINT || type == ASE_AWK_NDE_PRINTF, "the node type should be either ASE_AWK_NDE_PRINT or ASE_AWK_NDE_PRINTF"); if (type == ASE_AWK_NDE_PRINTF && args == ASE_NULL) { if (out != ASE_NULL) ase_awk_clrpt (awk, out); awk->errnum = ASE_AWK_EPRINTFARG; return ASE_NULL; } nde->type = type; nde->next = ASE_NULL; nde->args = args; nde->out_type = out_type; nde->out = out; return (ase_awk_nde_t*)nde; } static ase_awk_nde_t* __parse_next (ase_awk_t* awk) { ase_awk_nde_next_t* nde; if (awk->parse.id.block == PARSE_BEGIN_BLOCK || awk->parse.id.block == PARSE_END_BLOCK) { PANIC (awk, ASE_AWK_ENEXT); } nde = (ase_awk_nde_next_t*) ASE_AWK_MALLOC (awk, ASE_SIZEOF(ase_awk_nde_next_t)); if (nde == ASE_NULL) PANIC (awk, ASE_AWK_ENOMEM); nde->type = ASE_AWK_NDE_NEXT; nde->next = ASE_NULL; return (ase_awk_nde_t*)nde; } static ase_awk_nde_t* __parse_nextfile (ase_awk_t* awk, int out) { ase_awk_nde_nextfile_t* nde; if (awk->parse.id.block == PARSE_BEGIN_BLOCK || awk->parse.id.block == PARSE_END_BLOCK) { PANIC (awk, ASE_AWK_ENEXTFILE); } nde = (ase_awk_nde_nextfile_t*) ASE_AWK_MALLOC ( awk, ASE_SIZEOF(ase_awk_nde_nextfile_t)); if (nde == ASE_NULL) PANIC (awk, ASE_AWK_ENOMEM); nde->type = ASE_AWK_NDE_NEXTFILE; nde->next = ASE_NULL; nde->out = out; return (ase_awk_nde_t*)nde; } static int __get_token (ase_awk_t* awk) { ase_cint_t c; ase_size_t line; int n; line = awk->token.line; awk->token.prev.type = awk->token.type; awk->token.prev.line = awk->token.line; awk->token.prev.column = awk->token.column; do { if (__skip_spaces(awk) == -1) return -1; if ((n = __skip_comment(awk)) == -1) return -1; } while (n == 1); ase_awk_str_clear (&awk->token.name); awk->token.line = awk->src.lex.line; awk->token.column = awk->src.lex.column; if (line != 0 && (awk->option & ASE_AWK_BLOCKLESS) && (awk->parse.id.block == PARSE_PATTERN || awk->parse.id.block == PARSE_BEGIN || awk->parse.id.block == PARSE_END)) { if (awk->token.line != line) { SET_TOKEN_TYPE (awk, TOKEN_NEWLINE); return 0; } } c = awk->src.lex.curc; if (c == ASE_CHAR_EOF) { SET_TOKEN_TYPE (awk, TOKEN_EOF); } else if (ASE_AWK_ISDIGIT (awk, c)/*|| c == ASE_T('.')*/) { if (__get_number (awk) == -1) return -1; } else if (c == ASE_T('.')) { if (__get_char (awk) == -1) return -1; c = awk->src.lex.curc; if (ASE_AWK_ISDIGIT (awk, c)) { if (__unget_char (awk, c) == -1) return -1; if (__get_number (awk) == -1) return -1; } else { awk->errnum = ASE_AWK_ELXCHR; return -1; } } else if (ASE_AWK_ISALPHA (awk, c) || c == ASE_T('_')) { int type; /* identifier */ do { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); } while (ASE_AWK_ISALPHA (awk, c) || c == ASE_T('_') || ASE_AWK_ISDIGIT(awk,c)); type = __classify_ident (awk, ASE_AWK_STR_BUF(&awk->token.name), ASE_AWK_STR_LEN(&awk->token.name)); SET_TOKEN_TYPE (awk, type); } else if (c == ASE_T('\"')) { SET_TOKEN_TYPE (awk, TOKEN_STR); if (__get_charstr(awk) == -1) return -1; while (awk->option & ASE_AWK_STRCONCAT) { do { if (__skip_spaces(awk) == -1) return -1; if ((n = __skip_comment(awk)) == -1) return -1; } while (n == 1); c = awk->src.lex.curc; if (c != ASE_T('\"')) break; if (__get_charstr(awk) == -1) return -1; } } else if (c == ASE_T('=')) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); if (c == ASE_T('=')) { SET_TOKEN_TYPE (awk, TOKEN_EQ); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else { SET_TOKEN_TYPE (awk, TOKEN_ASSIGN); } } else if (c == ASE_T('!')) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); if (c == ASE_T('=')) { SET_TOKEN_TYPE (awk, TOKEN_NE); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T('~')) { SET_TOKEN_TYPE (awk, TOKEN_NM); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else { SET_TOKEN_TYPE (awk, TOKEN_NOT); } } else if (c == ASE_T('>')) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); if ((awk->option & ASE_AWK_SHIFT) && c == ASE_T('>')) { SET_TOKEN_TYPE (awk, TOKEN_RSHIFT); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T('=')) { SET_TOKEN_TYPE (awk, TOKEN_GE); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else { SET_TOKEN_TYPE (awk, TOKEN_GT); } } else if (c == ASE_T('<')) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); if ((awk->option & ASE_AWK_SHIFT) && c == ASE_T('<')) { SET_TOKEN_TYPE (awk, TOKEN_LSHIFT); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T('=')) { SET_TOKEN_TYPE (awk, TOKEN_LE); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else { SET_TOKEN_TYPE (awk, TOKEN_LT); } } else if (c == ASE_T('|')) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); if (c == ASE_T('|')) { SET_TOKEN_TYPE (awk, TOKEN_LOR); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if ((awk->option & ASE_AWK_COPROC) && c == ASE_T('&')) { SET_TOKEN_TYPE (awk, TOKEN_BORAND); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else { SET_TOKEN_TYPE (awk, TOKEN_BOR); } } else if (c == ASE_T('&')) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); if (c == ASE_T('&')) { SET_TOKEN_TYPE (awk, TOKEN_LAND); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else { SET_TOKEN_TYPE (awk, TOKEN_BAND); } } else if (c == ASE_T('~')) { SET_TOKEN_TYPE (awk, TOKEN_TILDE); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T('^')) { SET_TOKEN_TYPE (awk, TOKEN_BXOR); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T('+')) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); if (c == ASE_T('+')) { SET_TOKEN_TYPE (awk, TOKEN_PLUSPLUS); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T('=')) { SET_TOKEN_TYPE (awk, TOKEN_PLUS_ASSIGN); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else { SET_TOKEN_TYPE (awk, TOKEN_PLUS); } } else if (c == ASE_T('-')) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); if (c == ASE_T('-')) { SET_TOKEN_TYPE (awk, TOKEN_MINUSMINUS); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T('=')) { SET_TOKEN_TYPE (awk, TOKEN_MINUS_ASSIGN); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else { SET_TOKEN_TYPE (awk, TOKEN_MINUS); } } else if (c == ASE_T('*')) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); if (c == ASE_T('=')) { SET_TOKEN_TYPE (awk, TOKEN_MUL_ASSIGN); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T('*')) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); if (c == ASE_T('=')) { SET_TOKEN_TYPE (awk, TOKEN_EXP_ASSIGN); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else { SET_TOKEN_TYPE (awk, TOKEN_EXP); } } else { SET_TOKEN_TYPE (awk, TOKEN_MUL); } } else if (c == ASE_T('/')) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); if (c == ASE_T('=')) { SET_TOKEN_TYPE (awk, TOKEN_DIV_ASSIGN); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if ((awk->option & ASE_AWK_IDIV) && c == ASE_T('/')) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); if (c == ASE_T('=')) { SET_TOKEN_TYPE (awk, TOKEN_IDIV_ASSIGN); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else { SET_TOKEN_TYPE (awk, TOKEN_IDIV); } } else { SET_TOKEN_TYPE (awk, TOKEN_DIV); } } #if 0 /* TODO: is it a good idea to use a back-slash for * the idiv operator like BASIC? */ else if ((awk->option & ASE_AWK_IDIV) && c == ASE_T('\\')) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); if (c == ASE_T('=')) { SET_TOKEN_TYPE (awk, TOKEN_IDIV_ASSIGN); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else { SET_TOKEN_TYPE (awk, TOKEN_IDIV); } } #endif else if (c == ASE_T('%')) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); if (c == ASE_T('=')) { SET_TOKEN_TYPE (awk, TOKEN_MOD_ASSIGN); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else { SET_TOKEN_TYPE (awk, TOKEN_MOD); } } else if (c == ASE_T('(')) { SET_TOKEN_TYPE (awk, TOKEN_LPAREN); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T(')')) { SET_TOKEN_TYPE (awk, TOKEN_RPAREN); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T('{')) { SET_TOKEN_TYPE (awk, TOKEN_LBRACE); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T('}')) { SET_TOKEN_TYPE (awk, TOKEN_RBRACE); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T('[')) { SET_TOKEN_TYPE (awk, TOKEN_LBRACK); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T(']')) { SET_TOKEN_TYPE (awk, TOKEN_RBRACK); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T('$')) { SET_TOKEN_TYPE (awk, TOKEN_DOLLAR); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T(',')) { SET_TOKEN_TYPE (awk, TOKEN_COMMA); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T('.')) { SET_TOKEN_TYPE (awk, TOKEN_PERIOD); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T(';')) { /* TODO: more check on the newline terminator... */ SET_TOKEN_TYPE (awk, TOKEN_SEMICOLON); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T(':')) { SET_TOKEN_TYPE (awk, TOKEN_COLON); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else if (c == ASE_T('?')) { SET_TOKEN_TYPE (awk, TOKEN_QUEST); ADD_TOKEN_CHAR (awk, c); GET_CHAR (awk); } else { awk->errnum = ASE_AWK_ELXCHR; return -1; } return 0; } static int __get_number (ase_awk_t* awk) { ase_cint_t c; ASE_AWK_ASSERT (awk, ASE_AWK_STR_LEN(&awk->token.name) == 0); SET_TOKEN_TYPE (awk, TOKEN_INT); c = awk->src.lex.curc; if (c == ASE_T('0')) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); if (c == ASE_T('x') || c == ASE_T('X')) { /* hexadecimal number */ do { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); } while (ASE_AWK_ISXDIGIT (awk, c)); return 0; } #if 0 else if (c == ASE_T('b') || c == ASE_T('B')) { /* binary number */ do { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); } while (c == ASE_T('0') || c == ASE_T('1')); return 0; } #endif else if (c != '.') { /* octal number */ while (c >= ASE_T('0') && c <= ASE_T('7')) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); } return 0; } } while (ASE_AWK_ISDIGIT (awk, c)) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); } if (c == ASE_T('.')) { /* floating-point number */ SET_TOKEN_TYPE (awk, TOKEN_REAL); ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); while (ASE_AWK_ISDIGIT (awk, c)) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); } } if (c == ASE_T('E') || c == ASE_T('e')) { SET_TOKEN_TYPE (awk, TOKEN_REAL); ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); if (c == ASE_T('+') || c == ASE_T('-')) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); } while (ASE_AWK_ISDIGIT (awk, c)) { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); } } return 0; } static int __get_charstr (ase_awk_t* awk) { if (awk->src.lex.curc != ASE_T('\"')) { /* the starting quote has been consumed before this function * has been called */ ADD_TOKEN_CHAR (awk, awk->src.lex.curc); } return __get_string (awk, ASE_T('\"'), ASE_T('\\'), ase_false); } static int __get_rexstr (ase_awk_t* awk) { if (awk->src.lex.curc == ASE_T('/')) { /* this part of the function is different from __get_charstr * because of the way this function is called */ GET_CHAR (awk); return 0; } else { ADD_TOKEN_CHAR (awk, awk->src.lex.curc); return __get_string (awk, ASE_T('/'), ASE_T('\\'), ase_true); } } static int __get_string ( ase_awk_t* awk, ase_char_t end_char, ase_char_t esc_char, ase_bool_t keep_esc_char) { ase_cint_t c; int escaped = 0; int digit_count = 0; ase_cint_t c_acc = 0; while (1) { GET_CHAR_TO (awk, c); if (c == ASE_CHAR_EOF) { awk->errnum = ASE_AWK_EENDSTR; return -1; } if (escaped == 3) { if (c >= ASE_T('0') && c <= ASE_T('7')) { c_acc = c_acc * 8 + c - ASE_T('0'); digit_count++; if (digit_count >= escaped) { ADD_TOKEN_CHAR (awk, c_acc); escaped = 0; } continue; } else { ADD_TOKEN_CHAR (awk, c_acc); escaped = 0; } } else if (escaped == 2 || escaped == 4 || escaped == 8) { if (c >= ASE_T('0') && c <= ASE_T('9')) { c_acc = c_acc * 16 + c - ASE_T('0'); digit_count++; if (digit_count >= escaped) { ADD_TOKEN_CHAR (awk, c_acc); escaped = 0; } continue; } else if (c >= ASE_T('A') && c <= ASE_T('F')) { c_acc = c_acc * 16 + c - ASE_T('A') + 10; digit_count++; if (digit_count >= escaped) { ADD_TOKEN_CHAR (awk, c_acc); escaped = 0; } continue; } else if (c >= ASE_T('a') && c <= ASE_T('f')) { c_acc = c_acc * 16 + c - ASE_T('a') + 10; digit_count++; if (digit_count >= escaped) { ADD_TOKEN_CHAR (awk, c_acc); escaped = 0; } continue; } else { ase_char_t rc; rc = (escaped == 2)? ASE_T('x'): (escaped == 4)? ASE_T('u'): ASE_T('U'); if (digit_count == 0) ADD_TOKEN_CHAR (awk, rc); else ADD_TOKEN_CHAR (awk, c_acc); escaped = 0; } } if (escaped == 0 && c == end_char) { /* terminating quote */ /*GET_CHAR_TO (awk, c);*/ GET_CHAR (awk); break; } if (escaped == 0 && c == esc_char) { escaped = 1; continue; } if (escaped == 1) { if (c == ASE_T('n')) c = ASE_T('\n'); else if (c == ASE_T('r')) c = ASE_T('\r'); else if (c == ASE_T('t')) c = ASE_T('\t'); else if (c == ASE_T('f')) c = ASE_T('\f'); else if (c == ASE_T('b')) c = ASE_T('\b'); else if (c == ASE_T('v')) c = ASE_T('\v'); else if (c == ASE_T('a')) c = ASE_T('\a'); else if (c >= ASE_T('0') && c <= ASE_T('7')) { escaped = 3; digit_count = 1; c_acc = c - ASE_T('0'); continue; } else if (c == ASE_T('x')) { escaped = 2; digit_count = 0; c_acc = 0; continue; } #ifdef ASE_CHAR_IS_WCHAR else if (c == ASE_T('u') && ASE_SIZEOF(ase_char_t) >= 2) { escaped = 4; digit_count = 0; c_acc = 0; continue; } else if (c == ASE_T('U') && ASE_SIZEOF(ase_char_t) >= 4) { escaped = 8; digit_count = 0; c_acc = 0; continue; } #endif else if (keep_esc_char) { ADD_TOKEN_CHAR (awk, esc_char); } escaped = 0; } ADD_TOKEN_CHAR (awk, c); } return 0; } static int __get_char (ase_awk_t* awk) { ase_ssize_t n; /*ase_char_t c;*/ if (awk->src.lex.ungotc_count > 0) { awk->src.lex.curc = awk->src.lex.ungotc[--awk->src.lex.ungotc_count]; return 0; } if (awk->src.shared.buf_pos >= awk->src.shared.buf_len) { n = awk->src.ios.in ( ASE_AWK_IO_READ, awk->src.ios.custom_data, awk->src.shared.buf, ASE_COUNTOF(awk->src.shared.buf)); if (n <= -1) { awk->errnum = ASE_AWK_ESRCINREAD; return -1; } if (n == 0) { awk->src.lex.curc = ASE_CHAR_EOF; return 0; } awk->src.shared.buf_pos = 0; awk->src.shared.buf_len = n; } awk->src.lex.curc = awk->src.shared.buf[awk->src.shared.buf_pos++]; if (awk->src.lex.curc == ASE_T('\n')) { awk->src.lex.line++; awk->src.lex.column = 1; } else awk->src.lex.column++; return 0; } static int __unget_char (ase_awk_t* awk, ase_cint_t c) { if (awk->src.lex.ungotc_count >= ASE_COUNTOF(awk->src.lex.ungotc)) { awk->errnum = ASE_AWK_ELXUNG; return -1; } awk->src.lex.ungotc[awk->src.lex.ungotc_count++] = c; return 0; } static int __skip_spaces (ase_awk_t* awk) { ase_cint_t c = awk->src.lex.curc; while (ASE_AWK_ISSPACE (awk, c)) GET_CHAR_TO (awk, c); return 0; } static int __skip_comment (ase_awk_t* awk) { ase_cint_t c = awk->src.lex.curc; if ((awk->option & ASE_AWK_HASHSIGN) && c == ASE_T('#')) { do { GET_CHAR_TO (awk, c); } while (c != ASE_T('\n') && c != ASE_CHAR_EOF); GET_CHAR (awk); return 1; /* comment by # */ } if (c != ASE_T('/')) return 0; /* not a comment */ GET_CHAR_TO (awk, c); #if 0 if ((awk->option & ASE_AWK_DBLSLASHES) && c == ASE_T('/')) { do { GET_CHAR_TO (awk, c); } while (c != ASE_T('\n') && c != ASE_CHAR_EOF); GET_CHAR (awk); return 1; /* comment by // */ } else #endif if (c == ASE_T('*')) { do { GET_CHAR_TO (awk, c); if (c == ASE_CHAR_EOF) { awk->errnum = ASE_AWK_EENDCOMMENT; return -1; } if (c == ASE_T('*')) { GET_CHAR_TO (awk, c); if (c == ASE_CHAR_EOF) { awk->errnum = ASE_AWK_EENDCOMMENT; return -1; } if (c == ASE_T('/')) { /*GET_CHAR_TO (awk, c);*/ GET_CHAR (awk); break; } } } while (1); return 1; /* c-style comment */ } if (__unget_char (awk, c) == -1) return -1; /* error */ awk->src.lex.curc = ASE_T('/'); return 0; } static int __classify_ident ( ase_awk_t* awk, const ase_char_t* name, ase_size_t len) { struct __kwent* kwp; for (kwp = __kwtab; kwp->name != ASE_NULL; kwp++) { if (kwp->valid != 0 && (awk->option & kwp->valid) == 0) continue; if (ase_awk_strxncmp (kwp->name, kwp->name_len, name, len) == 0) { return kwp->type; } } return TOKEN_IDENT; } static int __assign_to_opcode (ase_awk_t* awk) { static int __assop[] = { ASE_AWK_ASSOP_NONE, ASE_AWK_ASSOP_PLUS, ASE_AWK_ASSOP_MINUS, ASE_AWK_ASSOP_MUL, ASE_AWK_ASSOP_DIV, ASE_AWK_ASSOP_IDIV, ASE_AWK_ASSOP_MOD, ASE_AWK_ASSOP_EXP }; if (awk->token.type >= TOKEN_ASSIGN && awk->token.type <= TOKEN_EXP_ASSIGN) { return __assop[awk->token.type - TOKEN_ASSIGN]; } return -1; } static int __is_plain_var (ase_awk_nde_t* nde) { return nde->type == ASE_AWK_NDE_GLOBAL || nde->type == ASE_AWK_NDE_LOCAL || nde->type == ASE_AWK_NDE_ARG || nde->type == ASE_AWK_NDE_NAMED; } static int __is_var (ase_awk_nde_t* nde) { return nde->type == ASE_AWK_NDE_GLOBAL || nde->type == ASE_AWK_NDE_LOCAL || nde->type == ASE_AWK_NDE_ARG || nde->type == ASE_AWK_NDE_NAMED || nde->type == ASE_AWK_NDE_GLOBALIDX || nde->type == ASE_AWK_NDE_LOCALIDX || nde->type == ASE_AWK_NDE_ARGIDX || nde->type == ASE_AWK_NDE_NAMEDIDX; } struct __deparse_func_t { ase_awk_t* awk; ase_char_t* tmp; ase_size_t tmp_len; }; static int __deparse (ase_awk_t* awk) { ase_awk_chain_t* chain; ase_char_t tmp[ASE_SIZEOF(ase_size_t)*8 + 32]; struct __deparse_func_t df; int n = 0, op; ASE_AWK_ASSERT (awk, awk->src.ios.out != ASE_NULL); awk->src.shared.buf_len = 0; awk->src.shared.buf_pos = 0; op = awk->src.ios.out ( ASE_AWK_IO_OPEN, awk->src.ios.custom_data, ASE_NULL, 0); if (op <= -1) { awk->errnum = ASE_AWK_ESRCOUTOPEN; return -1; } if (op == 0) { /* the result of the open operation indicates that the * file has been open but reached the end. so it has to * skip the entire deparsing procedure as it can't write * any single characters on such an io handler. but note * that this is not really an error for the parse and deparser. * * in fact, there are two ways to skip deparsing. * 1. set awk->src.ios.out to NULL. * 2. set awk->src.ios.out to a normal handler but * make it return 0 on the OPEN request. */ n = 0; goto exit_deparse; } #define EXIT_DEPARSE(num) \ do { n = -1; awk->errnum = num ; goto exit_deparse; } while(0) if (awk->tree.nglobals > awk->tree.nbglobals) { ase_size_t i/*, len*/; ASE_AWK_ASSERT (awk, awk->tree.nglobals > 0); if (ase_awk_putsrcstr (awk, ASE_T("global ")) == -1) EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); for (i = awk->tree.nbglobals; i < awk->tree.nglobals - 1; i++) { /* len = ase_awk_longtostr ((ase_long_t)i, 10, ASE_T("__global"), tmp, ASE_COUNTOF(tmp)); ASE_AWK_ASSERT (awk, len != (ase_size_t)-1); if (ase_awk_putsrcstrx (awk, tmp, len) == -1) EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); */ if (ase_awk_putsrcstrx (awk, awk->parse.globals.buf[i].name, awk->parse.globals.buf[i].name_len) == -1) { EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); } if (ase_awk_putsrcstr (awk, ASE_T(", ")) == -1) EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); } /* len = ase_awk_longtostr ((ase_long_t)i, 10, ASE_T("__global"), tmp, ASE_COUNTOF(tmp)); ASE_AWK_ASSERT (awk, len != (ase_size_t)-1); if (ase_awk_putsrcstrx (awk, tmp, len) == -1) EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); */ if (ase_awk_putsrcstrx (awk, awk->parse.globals.buf[i].name, awk->parse.globals.buf[i].name_len) == -1) { EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); } if (ase_awk_putsrcstr (awk, ASE_T(";\n\n")) == -1) EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); } df.awk = awk; df.tmp = tmp; df.tmp_len = ASE_COUNTOF(tmp); if (ase_awk_map_walk (&awk->tree.afns, __deparse_func, &df) == -1) { EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); } if (awk->tree.begin != ASE_NULL) { if (ase_awk_putsrcstr (awk, ASE_T("BEGIN ")) == -1) EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); if (ase_awk_prnpt (awk, awk->tree.begin) == -1) EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); if (__put_char (awk, ASE_T('\n')) == -1) EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); } chain = awk->tree.chain; while (chain != ASE_NULL) { if (chain->pattern != ASE_NULL) { if (ase_awk_prnptnpt (awk, chain->pattern) == -1) EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); } if (chain->action == ASE_NULL) { /* blockless pattern */ if (__put_char (awk, ASE_T('\n')) == -1) EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); } else { if (chain->pattern != ASE_NULL) { if (__put_char (awk, ASE_T(' ')) == -1) EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); } if (ase_awk_prnpt (awk, chain->action) == -1) EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); } if (__put_char (awk, ASE_T('\n')) == -1) EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); chain = chain->next; } if (awk->tree.end != ASE_NULL) { if (ase_awk_putsrcstr (awk, ASE_T("END ")) == -1) EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); if (ase_awk_prnpt (awk, awk->tree.end) == -1) EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); } if (__flush (awk) == -1) EXIT_DEPARSE (ASE_AWK_ESRCOUTWRITE); exit_deparse: if (awk->src.ios.out ( ASE_AWK_IO_CLOSE, awk->src.ios.custom_data, ASE_NULL, 0) != 0) { if (n == 0) { awk->errnum = ASE_AWK_ESRCOUTCLOSE; n = -1; } } return n; } static int __deparse_func (ase_awk_pair_t* pair, void* arg) { struct __deparse_func_t* df = (struct __deparse_func_t*)arg; ase_awk_afn_t* afn = (ase_awk_afn_t*)pair->val; ase_size_t i, n; ASE_AWK_ASSERT (df->awk, ase_awk_strxncmp ( pair->key, pair->key_len, afn->name, afn->name_len) == 0); if (ase_awk_putsrcstr (df->awk, ASE_T("function ")) == -1) return -1; if (ase_awk_putsrcstr (df->awk, afn->name) == -1) return -1; if (ase_awk_putsrcstr (df->awk, ASE_T(" (")) == -1) return -1; for (i = 0; i < afn->nargs; ) { n = ase_awk_longtostr (i++, 10, ASE_T("__param"), df->tmp, df->tmp_len); ASE_AWK_ASSERT (df->awk, n != (ase_size_t)-1); if (ase_awk_putsrcstrx (df->awk, df->tmp, n) == -1) return -1; if (i >= afn->nargs) break; if (ase_awk_putsrcstr (df->awk, ASE_T(", ")) == -1) return -1; } if (ase_awk_putsrcstr (df->awk, ASE_T(")\n")) == -1) return -1; if (ase_awk_prnpt (df->awk, afn->body) == -1) return -1; if (ase_awk_putsrcstr (df->awk, ASE_T("\n")) == -1) return -1; return 0; } static int __put_char (ase_awk_t* awk, ase_char_t c) { awk->src.shared.buf[awk->src.shared.buf_len++] = c; if (awk->src.shared.buf_len >= ASE_COUNTOF(awk->src.shared.buf)) { if (__flush (awk) == -1) return -1; } return 0; } static int __flush (ase_awk_t* awk) { ase_ssize_t n; ASE_AWK_ASSERT (awk, awk->src.ios.out != ASE_NULL); while (awk->src.shared.buf_pos < awk->src.shared.buf_len) { n = awk->src.ios.out ( ASE_AWK_IO_WRITE, awk->src.ios.custom_data, &awk->src.shared.buf[awk->src.shared.buf_pos], awk->src.shared.buf_len - awk->src.shared.buf_pos); if (n <= 0) return -1; awk->src.shared.buf_pos += n; } awk->src.shared.buf_pos = 0; awk->src.shared.buf_len = 0; return 0; } int ase_awk_putsrcstr (ase_awk_t* awk, const ase_char_t* str) { while (*str != ASE_T('\0')) { if (__put_char (awk, *str) == -1) return -1; str++; } return 0; } int ase_awk_putsrcstrx ( ase_awk_t* awk, const ase_char_t* str, ase_size_t len) { const ase_char_t* end = str + len; while (str < end) { if (__put_char (awk, *str) == -1) return -1; str++; } return 0; }