diff --git a/ase/lib/awk/parse.c b/ase/lib/awk/parse.c index 77501591..c78294c4 100644 --- a/ase/lib/awk/parse.c +++ b/ase/lib/awk/parse.c @@ -1,5 +1,5 @@ /* - * $Id: parse.c 253 2008-07-16 13:45:01Z baconevi $ + * $Id: parse.c 254 2008-07-17 11:03:09Z baconevi $ * * {License} */ @@ -215,6 +215,7 @@ static int get_rexstr (ase_awk_t* awk); static int get_string ( ase_awk_t* awk, ase_char_t end_char, ase_char_t esc_char, ase_bool_t keep_esc_char); +static int get_symbol (ase_awk_t* awk, ase_cint_t fc); static int get_char (ase_awk_t* awk); static int unget_char (ase_awk_t* awk, ase_cint_t c); static int skip_spaces (ase_awk_t* awk); @@ -334,16 +335,17 @@ static global_t gtab[] = }; #define GET_CHAR(awk) \ - do { if (get_char (awk) == -1) return -1; } while(0) + do { if (get_char(awk) == -1) return -1; } while(0) #define GET_CHAR_TO(awk,c) \ do { \ - if (get_char (awk) == -1) return -1; \ + if (get_char(awk) == -1) return -1; \ c = (awk)->src.lex.curc; \ +wprintf (L"GOT CHAR [%c]\n", c); \ } while(0) #define UNGET_CHAR(awk,c) \ - do { if (unget_char (awk, c) == -1) return -1; } while(0) + do { wprintf (L"UNGETTING [%C]\n", c); if (unget_char (awk, c) == -1) return -1; } while(0) #define SET_TOKEN_TYPE(awk,code) do { (awk)->token.type = (code); } while (0) @@ -356,6 +358,15 @@ static global_t gtab[] = } \ } while (0) +#define ADD_TOKEN_STR(awk,s,l) \ + do { \ + if (ase_str_ncat(&(awk)->token.name,(s),(l)) == (ase_size_t)-1) \ + { \ + ase_awk_seterror (awk, ASE_AWK_ENOMEM, (awk)->token.line, ASE_NULL, 0); \ + return -1; \ + } \ + } while (0) + #define MATCH(awk,token_type) ((awk)->token.type == (token_type)) #define CLRERR(awk) ase_awk_seterrnum(awk,ASE_AWK_ENOERR) @@ -4513,6 +4524,7 @@ static ase_awk_nde_t* parse_print (ase_awk_t* awk, ase_size_t line, int type) return (ase_awk_nde_t*)nde; } + static int get_token (ase_awk_t* awk) { ase_cint_t c; @@ -4540,12 +4552,7 @@ static int get_token (ase_awk_t* awk) if (c == ASE_CHAR_EOF) { - ADD_TOKEN_CHAR (awk, ASE_T('<')); - ADD_TOKEN_CHAR (awk, ASE_T('E')); - ADD_TOKEN_CHAR (awk, ASE_T('O')); - ADD_TOKEN_CHAR (awk, ASE_T('F')); - ADD_TOKEN_CHAR (awk, ASE_T('>')); - + ADD_TOKEN_STR (awk, ASE_T(""), 5); SET_TOKEN_TYPE (awk, TOKEN_EOF); } else if (c == ASE_T('\n')) @@ -4625,6 +4632,7 @@ static int get_token (ase_awk_t* awk) } } +#if 0 else if (c == ASE_T('=')) { ADD_TOKEN_CHAR (awk, c); @@ -4890,26 +4898,6 @@ static int get_token (ase_awk_t* awk) SET_TOKEN_TYPE (awk, TOKEN_DIV); } } -#if 0 - /* TODO: is it a good idea to use a back-slash for - * the idiv operator like BASIC? */ - else if ((awk->option & ASE_AWK_IDIV) && c == ASE_T('\\')) - { - ADD_TOKEN_CHAR (awk, c); - GET_CHAR_TO (awk, c); - - if (c == ASE_T('=')) - { - SET_TOKEN_TYPE (awk, TOKEN_IDIV_ASSIGN); - ADD_TOKEN_CHAR (awk, c); - GET_CHAR (awk); - } - else - { - SET_TOKEN_TYPE (awk, TOKEN_IDIV); - } - } -#endif else if (c == ASE_T('%')) { ADD_TOKEN_CHAR (awk, c); @@ -4926,59 +4914,19 @@ static int get_token (ase_awk_t* awk) SET_TOKEN_TYPE (awk, TOKEN_MOD); } } - /* - - { ASE_T("=="), TOKEN_EQ }, - { ASE_T("="), TOKEN_ASSIGN }, - - { ASE_T("!="), TOKEN_NE }, - { ASE_T("!~"), TOKEN_NM }, - { ASE_T("!"), TOKEN_LNOT }, - - { ASE_T(">>="), TOKEN_RSHIFT_ASSIGN, ASE_AWK_SHIFT }, - { ASE_T(">>"), TOKEN_RSHIFT, 0 }, - { ASE_T(">="), TOKEN_GE, 0 }, - { ASE_T(">"), TOKEN_GT, 0 }, - - { ASE_T("<<="), TOKEN_LSHIFT_ASSIGN, ASE_AWK_SHIFT }, - { ASE_T("<<"), TOKEN_LSHIFT, ASE_AWK_SHIFT }, - { ASE_T("<="), TOKEN_LE, 0 }, - { ASE_T("<"), TOKEN_LT, 0 }, - - { ASE_T("||"), TOKEN_LOR, 0 }, - { ASE_T("|&"), TOKEN_COPROC, ASE_AWK_COPROC }, - { ASE_T("|="), TOKEN_BOR_ASSIGN, 0 }, - { ASE_T("|"), TOKEN_BOR, 0 }, - - { ASE_T("&&"), TOKEN_LAND, 0 }, - { ASE_T("&="), TOKEN_BAND_ASSIGN, 0 }, - { ASE_T("&"), TOKEN_BAND, 0 }, - - { ASE_T("^="), TOKEN_BXOR_ASSIGN, 0 }, - { ASE_T("^"), TOKEN_BXOR, 0 }, - - { ASE_T("++"), TOKEN_PLUSPLUS, 0 }, - { ASE_T("+="), TOKEN_PLUS_ASSIGN, 0 }, - { ASE_T("+"), TOKEN_PLUS, 0 }, - - { ASE_T("--"), TOKEN_MINUSMINUS, 0 }, - { ASE_T("-="), TOKEN_MINUS_ASSIGN, 0 }, - { ASE_T("-"), TOKEN_MINUS, 0 }, - - { ASE_T("**="), TOKEN_EXP_ASSIGN, 0 }, - { ASE_T("**"), TOKEN_EXP, 0 }, - { ASE_T("*="), TOKEN_MUL_ASSIGN, 0 }, - { ASE_T("*"), TOKEN_MUL, 0 }, - - { ASE_T("//="), TOKEN_IDIV_ASSIGN, ASE_AWK_IDIV }, - { ASE_T("//"), TOKEN_IDIV_ASSIGN, ASE_AWK_IDIV }, - { ASE_T("/="), TOKEN_DIV_ASSIGN, 0 }, - { ASE_T("/"), TOKEN_DIV, 0 }, - - { ASE_T("%="), TOKEN_MOD_ASSIGN, 0 }, - { ASE_T("%"), TOKEN_MOD, 0 }, - - */ +#endif + else + { + int n = get_symbol (awk, c); + if (n == -1) return -1; + if (n == 0) + { + ase_char_t cc = (ase_char_t)c; + SETERRARG (awk, ASE_AWK_ELXCHR, awk->token.line, &cc, 1); + return -1; + } + } +#if 0 else { int i; @@ -5018,8 +4966,10 @@ static int get_token (ase_awk_t* awk) SETERRARG (awk, ASE_AWK_ELXCHR, awk->token.line, &cc, 1); return -1; } +#endif get_token_ok: +wprintf (L"TOKEN=[%S]\n", awk->token.name.buf); return 0; } @@ -5308,6 +5258,157 @@ static int get_string ( return 0; } +static int get_symbol (ase_awk_t* awk, ase_cint_t fc) +{ + int i; + static struct + { + const ase_char_t* s; + unsigned short l; + unsigned short t; + int o; + } tab[] = + { + { ASE_T("=="), 2, TOKEN_EQ, 0 }, + { ASE_T("="), 1, TOKEN_ASSIGN, 0 }, + +{ ASE_T("!@#$%"), 6, 1000, 0 }, +{ ASE_T("!^"), 2, 1001, 0 }, + + { ASE_T("!="), 2, TOKEN_NE, 0 }, + { ASE_T("!~"), 2, TOKEN_NM, 0 }, + { ASE_T("!"), 1, TOKEN_LNOT, 0 }, + + { ASE_T(">>="), 3, TOKEN_RSHIFT_ASSIGN, ASE_AWK_SHIFT }, + { ASE_T(">>"), 2, TOKEN_RSHIFT, 0 }, + { ASE_T(">="), 2, TOKEN_GE, 0 }, + { ASE_T(">"), 1, TOKEN_GT, 0 }, + + { ASE_T("<<="), 3, TOKEN_LSHIFT_ASSIGN, ASE_AWK_SHIFT }, + { ASE_T("<<"), 2, TOKEN_LSHIFT, ASE_AWK_SHIFT }, + { ASE_T("<="), 2, TOKEN_LE, 0 }, + { ASE_T("<"), 1, TOKEN_LT, 0 }, + + { ASE_T("||"), 2, TOKEN_LOR, 0 }, + { ASE_T("|&"), 2, TOKEN_BORAND, ASE_AWK_COPROC }, + { ASE_T("|="), 2, TOKEN_BOR_ASSIGN, 0 }, + { ASE_T("|"), 1, TOKEN_BOR, 0 }, + + { ASE_T("&&"), 2, TOKEN_LAND, 0 }, + { ASE_T("&="), 2, TOKEN_BAND_ASSIGN, 0 }, + { ASE_T("&"), 1, TOKEN_BAND, 0 }, + + { ASE_T("^="), 2, TOKEN_BXOR_ASSIGN, 0 }, + { ASE_T("^"), 1, TOKEN_BXOR, 0 }, + + { ASE_T("++"), 2, TOKEN_PLUSPLUS, 0 }, + { ASE_T("+="), 2, TOKEN_PLUS_ASSIGN, 0 }, + { ASE_T("+"), 1, TOKEN_PLUS, 0 }, + + { ASE_T("--"), 2, TOKEN_MINUSMINUS, 0 }, + { ASE_T("-="), 2, TOKEN_MINUS_ASSIGN, 0 }, + { ASE_T("-"), 1, TOKEN_MINUS, 0 }, + + { ASE_T("**="), 3, TOKEN_EXP_ASSIGN, 0 }, + { ASE_T("**"), 2, TOKEN_EXP, 0 }, + { ASE_T("*="), 2, TOKEN_MUL_ASSIGN, 0 }, + { ASE_T("*"), 1, TOKEN_MUL, 0 }, + + { ASE_T("//="), 3, TOKEN_IDIV_ASSIGN, ASE_AWK_IDIV }, + { ASE_T("//"), 2, TOKEN_IDIV_ASSIGN, ASE_AWK_IDIV }, + { ASE_T("/="), 2, TOKEN_DIV_ASSIGN, 0 }, + { ASE_T("/"), 1, TOKEN_DIV, 0 }, + + { ASE_T("%="), 2, TOKEN_MOD_ASSIGN, 0 }, + { ASE_T("%"), 1, TOKEN_MOD, 0 }, + + { ASE_T("~"), 1, TOKEN_TILDE, 0 }, + { ASE_T("("), 1, TOKEN_LPAREN, 0 }, + { ASE_T(")"), 1, TOKEN_RPAREN, 0 }, + { ASE_T("{"), 1, TOKEN_LBRACE, 0 }, + { ASE_T("}"), 1, TOKEN_RBRACE, 0 }, + { ASE_T("["), 1, TOKEN_LBRACK, 0 }, + { ASE_T("]"), 1, TOKEN_RBRACK, 0 }, + { ASE_T("$"), 1, TOKEN_DOLLAR, 0 }, + { ASE_T(","), 1, TOKEN_COMMA, 0 }, + { ASE_T(";"), 1, TOKEN_SEMICOLON, 0 }, + { ASE_T(":"), 1, TOKEN_COLON, 0 }, + { ASE_T("?"), 1, TOKEN_QUEST, 0 }, + + { ASE_NULL, 0, TOKEN_EOF, 0 } + }; + + /* + * INPUT: ABCDEFX + * + * TOKENS: + * ABCDEFG + * ABX + */ + for (i = 0; tab[i].s != ASE_NULL; ) + { + const ase_char_t* p = tab[i].s; + ase_cint_t c = fc; + + ASE_ASSERT (tab[i].l > 0); + + if (c == *p) + { + int len, len2; + const ase_char_t* p2; + + try_again: +//wprintf (L"PROCESSING ENTRY AT %d fc = [%c] tab[i].s = [%S] c = [%c] p = [%S]\n", i, fc, tab[i].s, c, p); + /* proceed as long as the string matches the stream + * of characters read in */ + while (c == *p) + { + if (*++p == ASE_T('\0')) // TODO: CHANGE THIS CHECK TO USE LENGTH + { + /* found a matching entry */ + GET_CHAR (awk); + SET_TOKEN_TYPE (awk, tab[i].t); + ADD_TOKEN_STR (awk, tab[i].s, tab[i].l); + return 1; + } + + GET_CHAR_TO (awk, c); + } + + len = p - tab[i++].s /*+ 1*/; + + if (fc != tab[i].s[0]) break; + len2 = tab[i].l; + ASE_ASSERT (len2 > 0); + + /* unget as many characters as the length + * difference */ +//wprintf (L"UNGET STAGE 1 len=%d len2=%d\n", len, len2); + UNGET_CHAR (awk, c); + while (--len > len2) UNGET_CHAR (awk, *--p); + + /* unget different characters from the back */ +//wprintf (L"UNGET STAGE 2 len=[%d], len2=[%d]\n", len, len2); + p2 = &tab[i].s[len2-1]; + while (len > 0 && *p != *p2) + { + UNGET_CHAR (awk, *p); + p--; p2--; len--; + } +//wprintf (L"UNGET OVER\n"); + /* restore the current character */ + GET_CHAR_TO (awk, c); + + p = p2; + goto try_again; + } + i++; + } + + /* not found */ + return 0; +} + static int get_char (ase_awk_t* awk) { ase_ssize_t n;