This commit is contained in:
hyung-hwan 2008-07-18 05:03:09 +00:00
parent 4898f5f78c
commit aa43115454

View File

@ -1,5 +1,5 @@
/* /*
* $Id: parse.c 253 2008-07-16 13:45:01Z baconevi $ * $Id: parse.c 254 2008-07-17 11:03:09Z baconevi $
* *
* {License} * {License}
*/ */
@ -215,6 +215,7 @@ static int get_rexstr (ase_awk_t* awk);
static int get_string ( static int get_string (
ase_awk_t* awk, ase_char_t end_char, ase_awk_t* awk, ase_char_t end_char,
ase_char_t esc_char, ase_bool_t keep_esc_char); ase_char_t esc_char, ase_bool_t keep_esc_char);
static int get_symbol (ase_awk_t* awk, ase_cint_t fc);
static int get_char (ase_awk_t* awk); static int get_char (ase_awk_t* awk);
static int unget_char (ase_awk_t* awk, ase_cint_t c); static int unget_char (ase_awk_t* awk, ase_cint_t c);
static int skip_spaces (ase_awk_t* awk); static int skip_spaces (ase_awk_t* awk);
@ -334,16 +335,17 @@ static global_t gtab[] =
}; };
#define GET_CHAR(awk) \ #define GET_CHAR(awk) \
do { if (get_char (awk) == -1) return -1; } while(0) do { if (get_char(awk) == -1) return -1; } while(0)
#define GET_CHAR_TO(awk,c) \ #define GET_CHAR_TO(awk,c) \
do { \ do { \
if (get_char (awk) == -1) return -1; \ if (get_char(awk) == -1) return -1; \
c = (awk)->src.lex.curc; \ c = (awk)->src.lex.curc; \
wprintf (L"GOT CHAR [%c]\n", c); \
} while(0) } while(0)
#define UNGET_CHAR(awk,c) \ #define UNGET_CHAR(awk,c) \
do { if (unget_char (awk, c) == -1) return -1; } while(0) do { wprintf (L"UNGETTING [%C]\n", c); if (unget_char (awk, c) == -1) return -1; } while(0)
#define SET_TOKEN_TYPE(awk,code) do { (awk)->token.type = (code); } while (0) #define SET_TOKEN_TYPE(awk,code) do { (awk)->token.type = (code); } while (0)
@ -356,6 +358,15 @@ static global_t gtab[] =
} \ } \
} while (0) } while (0)
#define ADD_TOKEN_STR(awk,s,l) \
do { \
if (ase_str_ncat(&(awk)->token.name,(s),(l)) == (ase_size_t)-1) \
{ \
ase_awk_seterror (awk, ASE_AWK_ENOMEM, (awk)->token.line, ASE_NULL, 0); \
return -1; \
} \
} while (0)
#define MATCH(awk,token_type) ((awk)->token.type == (token_type)) #define MATCH(awk,token_type) ((awk)->token.type == (token_type))
#define CLRERR(awk) ase_awk_seterrnum(awk,ASE_AWK_ENOERR) #define CLRERR(awk) ase_awk_seterrnum(awk,ASE_AWK_ENOERR)
@ -4513,6 +4524,7 @@ static ase_awk_nde_t* parse_print (ase_awk_t* awk, ase_size_t line, int type)
return (ase_awk_nde_t*)nde; return (ase_awk_nde_t*)nde;
} }
static int get_token (ase_awk_t* awk) static int get_token (ase_awk_t* awk)
{ {
ase_cint_t c; ase_cint_t c;
@ -4540,12 +4552,7 @@ static int get_token (ase_awk_t* awk)
if (c == ASE_CHAR_EOF) if (c == ASE_CHAR_EOF)
{ {
ADD_TOKEN_CHAR (awk, ASE_T('<')); ADD_TOKEN_STR (awk, ASE_T("<EOF>"), 5);
ADD_TOKEN_CHAR (awk, ASE_T('E'));
ADD_TOKEN_CHAR (awk, ASE_T('O'));
ADD_TOKEN_CHAR (awk, ASE_T('F'));
ADD_TOKEN_CHAR (awk, ASE_T('>'));
SET_TOKEN_TYPE (awk, TOKEN_EOF); SET_TOKEN_TYPE (awk, TOKEN_EOF);
} }
else if (c == ASE_T('\n')) else if (c == ASE_T('\n'))
@ -4625,6 +4632,7 @@ static int get_token (ase_awk_t* awk)
} }
} }
#if 0
else if (c == ASE_T('=')) else if (c == ASE_T('='))
{ {
ADD_TOKEN_CHAR (awk, c); ADD_TOKEN_CHAR (awk, c);
@ -4890,26 +4898,6 @@ static int get_token (ase_awk_t* awk)
SET_TOKEN_TYPE (awk, TOKEN_DIV); SET_TOKEN_TYPE (awk, TOKEN_DIV);
} }
} }
#if 0
/* TODO: is it a good idea to use a back-slash for
* the idiv operator like BASIC? */
else if ((awk->option & ASE_AWK_IDIV) && c == ASE_T('\\'))
{
ADD_TOKEN_CHAR (awk, c);
GET_CHAR_TO (awk, c);
if (c == ASE_T('='))
{
SET_TOKEN_TYPE (awk, TOKEN_IDIV_ASSIGN);
ADD_TOKEN_CHAR (awk, c);
GET_CHAR (awk);
}
else
{
SET_TOKEN_TYPE (awk, TOKEN_IDIV);
}
}
#endif
else if (c == ASE_T('%')) else if (c == ASE_T('%'))
{ {
ADD_TOKEN_CHAR (awk, c); ADD_TOKEN_CHAR (awk, c);
@ -4926,59 +4914,19 @@ static int get_token (ase_awk_t* awk)
SET_TOKEN_TYPE (awk, TOKEN_MOD); SET_TOKEN_TYPE (awk, TOKEN_MOD);
} }
} }
/* #endif
else
{ ASE_T("=="), TOKEN_EQ }, {
{ ASE_T("="), TOKEN_ASSIGN }, int n = get_symbol (awk, c);
if (n == -1) return -1;
{ ASE_T("!="), TOKEN_NE }, if (n == 0)
{ ASE_T("!~"), TOKEN_NM }, {
{ ASE_T("!"), TOKEN_LNOT }, ase_char_t cc = (ase_char_t)c;
SETERRARG (awk, ASE_AWK_ELXCHR, awk->token.line, &cc, 1);
{ ASE_T(">>="), TOKEN_RSHIFT_ASSIGN, ASE_AWK_SHIFT }, return -1;
{ ASE_T(">>"), TOKEN_RSHIFT, 0 }, }
{ ASE_T(">="), TOKEN_GE, 0 }, }
{ ASE_T(">"), TOKEN_GT, 0 }, #if 0
{ ASE_T("<<="), TOKEN_LSHIFT_ASSIGN, ASE_AWK_SHIFT },
{ ASE_T("<<"), TOKEN_LSHIFT, ASE_AWK_SHIFT },
{ ASE_T("<="), TOKEN_LE, 0 },
{ ASE_T("<"), TOKEN_LT, 0 },
{ ASE_T("||"), TOKEN_LOR, 0 },
{ ASE_T("|&"), TOKEN_COPROC, ASE_AWK_COPROC },
{ ASE_T("|="), TOKEN_BOR_ASSIGN, 0 },
{ ASE_T("|"), TOKEN_BOR, 0 },
{ ASE_T("&&"), TOKEN_LAND, 0 },
{ ASE_T("&="), TOKEN_BAND_ASSIGN, 0 },
{ ASE_T("&"), TOKEN_BAND, 0 },
{ ASE_T("^="), TOKEN_BXOR_ASSIGN, 0 },
{ ASE_T("^"), TOKEN_BXOR, 0 },
{ ASE_T("++"), TOKEN_PLUSPLUS, 0 },
{ ASE_T("+="), TOKEN_PLUS_ASSIGN, 0 },
{ ASE_T("+"), TOKEN_PLUS, 0 },
{ ASE_T("--"), TOKEN_MINUSMINUS, 0 },
{ ASE_T("-="), TOKEN_MINUS_ASSIGN, 0 },
{ ASE_T("-"), TOKEN_MINUS, 0 },
{ ASE_T("**="), TOKEN_EXP_ASSIGN, 0 },
{ ASE_T("**"), TOKEN_EXP, 0 },
{ ASE_T("*="), TOKEN_MUL_ASSIGN, 0 },
{ ASE_T("*"), TOKEN_MUL, 0 },
{ ASE_T("//="), TOKEN_IDIV_ASSIGN, ASE_AWK_IDIV },
{ ASE_T("//"), TOKEN_IDIV_ASSIGN, ASE_AWK_IDIV },
{ ASE_T("/="), TOKEN_DIV_ASSIGN, 0 },
{ ASE_T("/"), TOKEN_DIV, 0 },
{ ASE_T("%="), TOKEN_MOD_ASSIGN, 0 },
{ ASE_T("%"), TOKEN_MOD, 0 },
*/
else else
{ {
int i; int i;
@ -5018,8 +4966,10 @@ static int get_token (ase_awk_t* awk)
SETERRARG (awk, ASE_AWK_ELXCHR, awk->token.line, &cc, 1); SETERRARG (awk, ASE_AWK_ELXCHR, awk->token.line, &cc, 1);
return -1; return -1;
} }
#endif
get_token_ok: get_token_ok:
wprintf (L"TOKEN=[%S]\n", awk->token.name.buf);
return 0; return 0;
} }
@ -5308,6 +5258,157 @@ static int get_string (
return 0; return 0;
} }
static int get_symbol (ase_awk_t* awk, ase_cint_t fc)
{
int i;
static struct
{
const ase_char_t* s;
unsigned short l;
unsigned short t;
int o;
} tab[] =
{
{ ASE_T("=="), 2, TOKEN_EQ, 0 },
{ ASE_T("="), 1, TOKEN_ASSIGN, 0 },
{ ASE_T("!@#$%"), 6, 1000, 0 },
{ ASE_T("!^"), 2, 1001, 0 },
{ ASE_T("!="), 2, TOKEN_NE, 0 },
{ ASE_T("!~"), 2, TOKEN_NM, 0 },
{ ASE_T("!"), 1, TOKEN_LNOT, 0 },
{ ASE_T(">>="), 3, TOKEN_RSHIFT_ASSIGN, ASE_AWK_SHIFT },
{ ASE_T(">>"), 2, TOKEN_RSHIFT, 0 },
{ ASE_T(">="), 2, TOKEN_GE, 0 },
{ ASE_T(">"), 1, TOKEN_GT, 0 },
{ ASE_T("<<="), 3, TOKEN_LSHIFT_ASSIGN, ASE_AWK_SHIFT },
{ ASE_T("<<"), 2, TOKEN_LSHIFT, ASE_AWK_SHIFT },
{ ASE_T("<="), 2, TOKEN_LE, 0 },
{ ASE_T("<"), 1, TOKEN_LT, 0 },
{ ASE_T("||"), 2, TOKEN_LOR, 0 },
{ ASE_T("|&"), 2, TOKEN_BORAND, ASE_AWK_COPROC },
{ ASE_T("|="), 2, TOKEN_BOR_ASSIGN, 0 },
{ ASE_T("|"), 1, TOKEN_BOR, 0 },
{ ASE_T("&&"), 2, TOKEN_LAND, 0 },
{ ASE_T("&="), 2, TOKEN_BAND_ASSIGN, 0 },
{ ASE_T("&"), 1, TOKEN_BAND, 0 },
{ ASE_T("^="), 2, TOKEN_BXOR_ASSIGN, 0 },
{ ASE_T("^"), 1, TOKEN_BXOR, 0 },
{ ASE_T("++"), 2, TOKEN_PLUSPLUS, 0 },
{ ASE_T("+="), 2, TOKEN_PLUS_ASSIGN, 0 },
{ ASE_T("+"), 1, TOKEN_PLUS, 0 },
{ ASE_T("--"), 2, TOKEN_MINUSMINUS, 0 },
{ ASE_T("-="), 2, TOKEN_MINUS_ASSIGN, 0 },
{ ASE_T("-"), 1, TOKEN_MINUS, 0 },
{ ASE_T("**="), 3, TOKEN_EXP_ASSIGN, 0 },
{ ASE_T("**"), 2, TOKEN_EXP, 0 },
{ ASE_T("*="), 2, TOKEN_MUL_ASSIGN, 0 },
{ ASE_T("*"), 1, TOKEN_MUL, 0 },
{ ASE_T("//="), 3, TOKEN_IDIV_ASSIGN, ASE_AWK_IDIV },
{ ASE_T("//"), 2, TOKEN_IDIV_ASSIGN, ASE_AWK_IDIV },
{ ASE_T("/="), 2, TOKEN_DIV_ASSIGN, 0 },
{ ASE_T("/"), 1, TOKEN_DIV, 0 },
{ ASE_T("%="), 2, TOKEN_MOD_ASSIGN, 0 },
{ ASE_T("%"), 1, TOKEN_MOD, 0 },
{ ASE_T("~"), 1, TOKEN_TILDE, 0 },
{ ASE_T("("), 1, TOKEN_LPAREN, 0 },
{ ASE_T(")"), 1, TOKEN_RPAREN, 0 },
{ ASE_T("{"), 1, TOKEN_LBRACE, 0 },
{ ASE_T("}"), 1, TOKEN_RBRACE, 0 },
{ ASE_T("["), 1, TOKEN_LBRACK, 0 },
{ ASE_T("]"), 1, TOKEN_RBRACK, 0 },
{ ASE_T("$"), 1, TOKEN_DOLLAR, 0 },
{ ASE_T(","), 1, TOKEN_COMMA, 0 },
{ ASE_T(";"), 1, TOKEN_SEMICOLON, 0 },
{ ASE_T(":"), 1, TOKEN_COLON, 0 },
{ ASE_T("?"), 1, TOKEN_QUEST, 0 },
{ ASE_NULL, 0, TOKEN_EOF, 0 }
};
/*
* INPUT: ABCDEFX
*
* TOKENS:
* ABCDEFG
* ABX
*/
for (i = 0; tab[i].s != ASE_NULL; )
{
const ase_char_t* p = tab[i].s;
ase_cint_t c = fc;
ASE_ASSERT (tab[i].l > 0);
if (c == *p)
{
int len, len2;
const ase_char_t* p2;
try_again:
//wprintf (L"PROCESSING ENTRY AT %d fc = [%c] tab[i].s = [%S] c = [%c] p = [%S]\n", i, fc, tab[i].s, c, p);
/* proceed as long as the string matches the stream
* of characters read in */
while (c == *p)
{
if (*++p == ASE_T('\0')) // TODO: CHANGE THIS CHECK TO USE LENGTH
{
/* found a matching entry */
GET_CHAR (awk);
SET_TOKEN_TYPE (awk, tab[i].t);
ADD_TOKEN_STR (awk, tab[i].s, tab[i].l);
return 1;
}
GET_CHAR_TO (awk, c);
}
len = p - tab[i++].s /*+ 1*/;
if (fc != tab[i].s[0]) break;
len2 = tab[i].l;
ASE_ASSERT (len2 > 0);
/* unget as many characters as the length
* difference */
//wprintf (L"UNGET STAGE 1 len=%d len2=%d\n", len, len2);
UNGET_CHAR (awk, c);
while (--len > len2) UNGET_CHAR (awk, *--p);
/* unget different characters from the back */
//wprintf (L"UNGET STAGE 2 len=[%d], len2=[%d]\n", len, len2);
p2 = &tab[i].s[len2-1];
while (len > 0 && *p != *p2)
{
UNGET_CHAR (awk, *p);
p--; p2--; len--;
}
//wprintf (L"UNGET OVER\n");
/* restore the current character */
GET_CHAR_TO (awk, c);
p = p2;
goto try_again;
}
i++;
}
/* not found */
return 0;
}
static int get_char (ase_awk_t* awk) static int get_char (ase_awk_t* awk)
{ {
ase_ssize_t n; ase_ssize_t n;