diff --git a/ase/awk/awk.c b/ase/awk/awk.c index 7cba3115..58e23dc4 100644 --- a/ase/awk/awk.c +++ b/ase/awk/awk.c @@ -1,5 +1,5 @@ /* - * $Id: awk.c,v 1.16 2006-02-01 02:56:12 bacon Exp $ + * $Id: awk.c,v 1.17 2006-02-04 19:31:51 bacon Exp $ */ #include @@ -31,9 +31,26 @@ xp_awk_t* xp_awk_open (xp_awk_t* awk) return XP_NULL; } + if (xp_awk_tab_open(&awk->parse.globals) == XP_NULL) { + xp_str_close (&awk->token.name); + xp_awk_hash_close (&awk->tree.funcs); + if (awk->__dynamic) xp_free (awk); + return XP_NULL; + } + + if (xp_awk_tab_open(&awk->parse.locals) == XP_NULL) { + xp_str_close (&awk->token.name); + xp_awk_hash_close (&awk->tree.funcs); + xp_awk_tab_close (&awk->parse.globals); + if (awk->__dynamic) xp_free (awk); + return XP_NULL; + } + if (xp_awk_tab_open(&awk->parse.params) == XP_NULL) { xp_str_close (&awk->token.name); xp_awk_hash_close (&awk->tree.funcs); + xp_awk_tab_close (&awk->parse.globals); + xp_awk_tab_close (&awk->parse.locals); if (awk->__dynamic) xp_free (awk); return XP_NULL; } @@ -50,6 +67,8 @@ xp_awk_t* xp_awk_open (xp_awk_t* awk) awk->in_arg = XP_NULL; awk->out_arg = XP_NULL; + awk->parse.nlocals_max = 0; + awk->tree.begin = XP_NULL; awk->tree.end = XP_NULL; awk->tree.unnamed = XP_NULL; @@ -66,6 +85,8 @@ int xp_awk_close (xp_awk_t* awk) if (xp_awk_detsrc(awk) == -1) return -1; xp_awk_hash_close (&awk->tree.funcs); + xp_awk_tab_close (&awk->parse.globals); + xp_awk_tab_close (&awk->parse.locals); xp_awk_tab_close (&awk->parse.params); xp_str_close (&awk->token.name); @@ -108,6 +129,7 @@ const xp_char_t* xp_awk_geterrstr (xp_awk_t* awk) XP_TEXT("duplicate END"), XP_TEXT("duplicate function name"), XP_TEXT("duplicate parameter name"), + XP_TEXT("duplicate variable name"), XP_TEXT("duplicate name"), }; @@ -124,6 +146,11 @@ const xp_char_t* xp_awk_geterrstr (xp_awk_t* awk) void xp_awk_clear (xp_awk_t* awk) { + xp_awk_tab_clear (&awk->parse.globals); + xp_awk_tab_clear (&awk->parse.locals); + xp_awk_tab_clear (&awk->parse.params); + awk->parse.nlocals_max = 0; + /* clear parse trees */ xp_awk_hash_clear (&awk->tree.funcs); diff --git a/ase/awk/awk.h b/ase/awk/awk.h index d20d42a2..3eec0692 100644 --- a/ase/awk/awk.h +++ b/ase/awk/awk.h @@ -1,5 +1,5 @@ /* - * $Id: awk.h,v 1.23 2006-01-31 16:57:45 bacon Exp $ + * $Id: awk.h,v 1.24 2006-02-04 19:31:51 bacon Exp $ */ #ifndef _XP_AWK_AWK_H_ @@ -45,6 +45,7 @@ enum XP_AWK_EDUPEND, /* duplicate END */ XP_AWK_EDUPFUNC, /* duplicate function name */ XP_AWK_EDUPPARAM, /* duplicate parameter name */ + XP_AWK_EDUPVAR, /* duplicate variable name */ XP_AWK_EDUPNAME /* duplicate name - function, variable, etc */ }; @@ -71,7 +72,8 @@ enum enum { XP_AWK_EXPLICIT = (1 << 0), /* variable requires explicit declaration */ - XP_AWK_UNIQUE = (1 << 1) /* a function name should not coincide to be a variable name */ + XP_AWK_UNIQUE = (1 << 1), /* a function name should not coincide to be a variable name */ + XP_AWK_SHADING = (1 << 2) /* allow variable shading */ }; struct xp_awk_t @@ -104,9 +106,10 @@ struct xp_awk_t /* temporary information that the parser needs */ struct { - // TODO: locals, globals??? - //xp_awk_tab_t vars; /* global and local variable names... */ + xp_awk_tab_t globals; + xp_awk_tab_t locals; xp_awk_tab_t params; + xp_size_t nlocals_max; } parse; /* source buffer management */ diff --git a/ase/awk/parse.c b/ase/awk/parse.c index 94d466d4..1b087673 100644 --- a/ase/awk/parse.c +++ b/ase/awk/parse.c @@ -1,5 +1,5 @@ /* - * $Id: parse.c,v 1.42 2006-02-01 02:56:12 bacon Exp $ + * $Id: parse.c,v 1.43 2006-02-04 19:31:51 bacon Exp $ */ #include @@ -87,7 +87,8 @@ static xp_awk_node_t* __parse_function (xp_awk_t* awk); static xp_awk_node_t* __parse_begin (xp_awk_t* awk); static xp_awk_node_t* __parse_end (xp_awk_t* awk); static xp_awk_node_t* __parse_action (xp_awk_t* awk); -static xp_awk_node_t* __parse_block (xp_awk_t* awk); +static xp_awk_node_t* __parse_block (xp_awk_t* awk, xp_bool_t is_top); +static xp_awk_t* __collect_locals (xp_awk_t* awk, xp_size_t nlocals); static xp_awk_node_t* __parse_statement (xp_awk_t* awk); static xp_awk_node_t* __parse_statement_nb (xp_awk_t* awk); static xp_awk_node_t* __parse_expression (xp_awk_t* awk); @@ -114,43 +115,42 @@ static int __get_char (xp_awk_t* awk); static int __unget_char (xp_awk_t* awk, xp_cint_t c); static int __skip_spaces (xp_awk_t* awk); static int __skip_comment (xp_awk_t* awk); -static int __classfy_ident (const xp_char_t* ident); +static int __classify_ident (xp_awk_t* awk, const xp_char_t* ident); static xp_long_t __str_to_long (const xp_char_t* name); static INLINE xp_size_t __find_func_arg (xp_awk_t* awk, const xp_char_t* name); static INLINE xp_size_t __find_variable (xp_awk_t* awk, const xp_char_t* name); - struct __kwent { const xp_char_t* name; int type; + int valid; /* the entry is valid when this option is set */ }; static struct __kwent __kwtab[] = { - { XP_TEXT("BEGIN"), TOKEN_BEGIN }, - { XP_TEXT("END"), TOKEN_END }, - { XP_TEXT("function"), TOKEN_FUNCTION }, - { XP_TEXT("if"), TOKEN_IF }, - { XP_TEXT("else"), TOKEN_ELSE }, - { XP_TEXT("while"), TOKEN_WHILE }, - { XP_TEXT("for"), TOKEN_FOR }, - { XP_TEXT("do"), TOKEN_DO }, - { XP_TEXT("break"), TOKEN_BREAK }, - { XP_TEXT("continue"), TOKEN_CONTINUE }, - { XP_TEXT("return"), TOKEN_RETURN }, - { XP_TEXT("exit"), TOKEN_EXIT }, - { XP_TEXT("delete"), TOKEN_DELETE }, - { XP_TEXT("next"), TOKEN_NEXT }, - { XP_TEXT("nextfile"), TOKEN_NEXTFILE }, + { XP_TEXT("BEGIN"), TOKEN_BEGIN, 0 }, + { XP_TEXT("END"), TOKEN_END, 0 }, + { XP_TEXT("function"), TOKEN_FUNCTION, 0 }, + { XP_TEXT("if"), TOKEN_IF, 0 }, + { XP_TEXT("else"), TOKEN_ELSE, 0 }, + { XP_TEXT("while"), TOKEN_WHILE, 0 }, + { XP_TEXT("for"), TOKEN_FOR, 0 }, + { XP_TEXT("do"), TOKEN_DO, 0 }, + { XP_TEXT("break"), TOKEN_BREAK, 0 }, + { XP_TEXT("continue"), TOKEN_CONTINUE, 0 }, + { XP_TEXT("return"), TOKEN_RETURN, 0 }, + { XP_TEXT("exit"), TOKEN_EXIT, 0 }, + { XP_TEXT("delete"), TOKEN_DELETE, 0 }, + { XP_TEXT("next"), TOKEN_NEXT, 0 }, + { XP_TEXT("nextfile"), TOKEN_NEXTFILE, 0 }, -// TODO: don't return TOKEN_LOCAL & TOKEN_GLOBAL when explicit variable declaration is disabled. - { XP_TEXT("local"), TOKEN_LOCAL }, - { XP_TEXT("global"), TOKEN_GLOBAL }, + { XP_TEXT("local"), TOKEN_LOCAL, XP_AWK_EXPLICIT }, + { XP_TEXT("global"), TOKEN_GLOBAL, XP_AWK_EXPLICIT }, - { XP_NULL, 0 }, + { XP_NULL, 0, 0 } }; #define GET_CHAR(awk) \ @@ -184,7 +184,6 @@ static struct __kwent __kwtab[] = #define PANIC(awk,code) do { (awk)->errnum = (code); return XP_NULL; } while (0); - // TODO: remove stdio.h #ifndef __STAND_ALONE #include @@ -215,6 +214,8 @@ static void __dump (xp_awk_t* awk) xp_printf (XP_TEXT("END ")); xp_awk_prnpt (awk->tree.end); } + +// TODO: dump unmaed top-level blocks... } int xp_awk_parse (xp_awk_t* awk) @@ -307,8 +308,8 @@ static xp_awk_node_t* __parse_function (xp_awk_t* awk) } if (awk->opt.parse & XP_AWK_UNIQUE) { - /* check if it coincides to be a variable name */ - if (__find_variable(awk,name) != (xp_size_t)-1) { + /* check if it coincides to be a global variable name */ + if (xp_awk_tab_find(&awk->parse.globals, name, 0) != (xp_size_t)-1) { PANIC (awk, XP_AWK_EDUPNAME); } } @@ -349,24 +350,41 @@ static xp_awk_node_t* __parse_function (xp_awk_t* awk) } else { while (1) { + xp_char_t* param; + if (!MATCH(awk,TOKEN_IDENT)) { xp_free (name_dup); xp_awk_tab_clear (&awk->parse.params); PANIC (awk, XP_AWK_EIDENT); } -// TODO: check duplicates againt variables if shading is not supported -// global x; function f (x) { print x; } -> x in print x is a parameter + param = XP_STR_BUF(&awk->token.name); - if (xp_awk_tab_find (&awk->parse.params, - XP_STR_BUF(&awk->token.name), 0) != (xp_size_t)-1) { + if (awk->opt.parse & XP_AWK_UNIQUE) { + /* check if a parameter conflicts with a function */ + if (xp_strcmp(name_dup, param) == 0 || + xp_awk_hash_get(&awk->tree.funcs, param) != XP_NULL) { + xp_free (name_dup); + xp_awk_tab_clear (&awk->parse.params); + PANIC (awk, XP_AWK_EDUPNAME); + } + + /* NOTE: the following is not a conflict + * global x; + * function f (x) { print x; } + * x in print x is a parameter + */ + } + + /* check if a parameter conflicts with other parameters */ + if (xp_awk_tab_find(&awk->parse.params, param, 0) != (xp_size_t)-1) { xp_free (name_dup); xp_awk_tab_clear (&awk->parse.params); PANIC (awk, XP_AWK_EDUPPARAM); } - if (xp_awk_tab_adddatum (&awk->parse.params, - XP_STR_BUF(&awk->token.name)) == (xp_size_t)-1) { + /* push the parameter to the parameter list */ + if (xp_awk_tab_adddatum(&awk->parse.params, param) == (xp_size_t)-1) { xp_free (name_dup); xp_awk_tab_clear (&awk->parse.params); PANIC (awk, XP_AWK_ENOMEM); @@ -413,7 +431,7 @@ static xp_awk_node_t* __parse_function (xp_awk_t* awk) } /* actual function body */ - body = __parse_block (awk); + body = __parse_block (awk, xp_true); if (body == XP_NULL) { xp_free (name_dup); xp_awk_tab_clear (&awk->parse.params); @@ -479,106 +497,164 @@ static xp_awk_node_t* __parse_action (xp_awk_t* awk) { if (!MATCH(awk,TOKEN_LBRACE)) PANIC (awk, XP_AWK_ELBRACE); if (__get_token(awk) == -1) return XP_NULL; - return __parse_block(awk); + return __parse_block(awk, xp_true); } -/* TODO: what is the best name for the parsing routine for the outermost block? */ -static xp_awk_node_t* __parse_block (xp_awk_t* awk) +static xp_awk_node_t* __parse_block (xp_awk_t* awk, xp_bool_t is_top) { xp_awk_node_t* head, * curr, * node; xp_awk_node_block_t* block; - xp_size_t lvc = 0; + xp_size_t nlocals, tmp; - /* local variable declaration */ - //TODO: if (awk->opt & XP_AWK_VARDECL) { - while (1) { - if (MATCH(awk,TOKEN_EOF)) { - // cleanup the variable name list... - PANIC (awk, XP_AWK_EENDSRC); - } + nlocals = xp_awk_tab_getsize(&awk->parse.locals); + + /* local variable declarations */ + if (awk->opt.parse & XP_AWK_EXPLICIT) { + while (1) { + if (!MATCH(awk,TOKEN_LOCAL)) break; - if (MATCH(awk,TOKEN_RBRACE)) { if (__get_token(awk) == -1) { - // TODO: cleanup the variable name list... - return XP_NULL; + xp_awk_tab_remrange ( + &awk->parse.locals, nlocals - 1, + xp_awk_tab_getsize(&awk->parse.locals) - nlocals); + return XP_NULL; } - head = XP_NULL; - goto skip_block_body; - } - if (!MATCH(awk,TOKEN_LOCAL)) break; - - if (__get_token(awk) == -1) { - // TODO: cleanup the variable name list... - return XP_NULL; + if (__collect_locals(awk, nlocals) == XP_NULL) { + xp_awk_tab_remrange ( + &awk->parse.locals, nlocals - 1, + xp_awk_tab_getsize(&awk->parse.locals) - nlocals); + return XP_NULL; + } } -// TODO: collect variables... -// TODO: check duplicates with locals and globals, and maybe with the function names also depending on the awk options.... } - // TODO: } /* block body */ head = XP_NULL; curr = XP_NULL; while (1) { if (MATCH(awk,TOKEN_EOF)) { - // TODO: cleanup the variable name list... + xp_awk_tab_remrange ( + &awk->parse.locals, nlocals - 1, + xp_awk_tab_getsize(&awk->parse.locals) - nlocals); if (head != XP_NULL) xp_awk_clrpt (head); PANIC (awk, XP_AWK_EENDSRC); } if (MATCH(awk,TOKEN_RBRACE)) { if (__get_token(awk) == -1) { - // TODO: cleanup the variable name list... + xp_awk_tab_remrange ( + &awk->parse.locals, nlocals - 1, + xp_awk_tab_getsize(&awk->parse.locals) - nlocals); if (head != XP_NULL) xp_awk_clrpt (head); return XP_NULL; } break; } -/* TODO: if you want to remove top-level null statement... get it here... */ -/* - if (MATCH(awk,TOKEN_SEMICOLON)) { - if (__get_token(awk) == -1) { - // TODO: cleanup the variable name list... - if (head != XP_NULL) xp_awk_clrpt (head); - return XP_NULL; - } - continue; - } -*/ node = __parse_statement (awk); if (node == XP_NULL) { - // TODO: cleanup the variable name list... + xp_awk_tab_remrange ( + &awk->parse.locals, nlocals - 1, + xp_awk_tab_getsize(&awk->parse.locals) - nlocals); if (head != XP_NULL) xp_awk_clrpt (head); return XP_NULL; } - + + /* remove unnecessary statements */ + if (node->type == XP_AWK_NODE_NULL || + (node->type == XP_AWK_NODE_BLOCK && + ((xp_awk_node_block_t*)node)->body == XP_NULL)) continue; + if (curr == XP_NULL) head = node; else curr->next = node; curr = node; } -skip_block_body: - block = (xp_awk_node_block_t*) xp_malloc (xp_sizeof(xp_awk_node_block_t)); if (block == XP_NULL) { - // TODO: cleanup the variable name list... + xp_awk_tab_remrange ( + &awk->parse.locals, nlocals - 1, + xp_awk_tab_getsize(&awk->parse.locals) - nlocals); xp_awk_clrpt (head); PANIC (awk, XP_AWK_ENOMEM); } -// TODO: remove empty block such as { } { ;;;; }, or { local a, b, c; ;;; }. etc + tmp = xp_awk_tab_getsize(&awk->parse.locals); + xp_awk_tab_remrange ( + &awk->parse.locals, nlocals - 1, tmp - nlocals); + + /* adjust number of locals for a block without any statements */ + if (head == NULL) tmp = 0; + block->type = XP_AWK_NODE_BLOCK; block->next = XP_NULL; - block->lvc = lvc; block->body = head; -// TODO: cleanup the variable name list... + if (is_top) { + block->nlocals = awk->parse.nlocals_max - nlocals; + awk->parse.nlocals_max = nlocals; + } + else { + block->nlocals = 0; + if (tmp > awk->parse.nlocals_max) awk->parse.nlocals_max = tmp; + } return (xp_awk_node_t*)block; } +static xp_awk_t* __collect_locals (xp_awk_t* awk, xp_size_t nlocals) +{ + xp_char_t* local; + + while (1) { + if (!MATCH(awk,TOKEN_IDENT)) { + PANIC (awk, XP_AWK_EIDENT); + } + + local = XP_STR_BUF(&awk->token.name); + + /* NOTE: it is not checked againt globals names */ + + if (awk->opt.parse & XP_AWK_UNIQUE) { + /* check if it conflict with a function name */ + if (xp_awk_hash_get(&awk->tree.funcs, local) != XP_NULL) { + PANIC (awk, XP_AWK_EDUPNAME); + } + } + + /* check if it conflicts with a paremeter name */ + if (xp_awk_tab_find(&awk->parse.params, local, 0) != (xp_size_t)-1) { + PANIC (awk, XP_AWK_EDUPNAME); + } + + /* check if it conflicts with other local variable names */ + if (xp_awk_tab_find(&awk->parse.locals, local, + ((awk->opt.parse & XP_AWK_SHADING)? nlocals: 0)) != (xp_size_t)-1) { + PANIC (awk, XP_AWK_EDUPVAR); + } + + if (xp_awk_tab_adddatum(&awk->parse.locals, local) == (xp_size_t)-1) { + PANIC (awk, XP_AWK_ENOMEM); + } + + if (__get_token(awk) == -1) return XP_NULL; + + if (MATCH(awk,TOKEN_SEMICOLON)) break; + + if (!MATCH(awk,TOKEN_COMMA)) { + PANIC (awk, XP_AWK_ECOMMA); + } + + if (__get_token(awk) == -1) return XP_NULL; + } + + /* skip a semicolon */ + if (__get_token(awk) == -1) return XP_NULL; + + return awk; +} + static xp_awk_node_t* __parse_statement (xp_awk_t* awk) { xp_awk_node_t* node; @@ -598,7 +674,7 @@ static xp_awk_node_t* __parse_statement (xp_awk_t* awk) } else if (MATCH(awk,TOKEN_LBRACE)) { if (__get_token(awk) == -1) return XP_NULL; - node = __parse_block (awk); + node = __parse_block (awk, xp_false); } else node = __parse_statement_nb (awk); @@ -1561,7 +1637,7 @@ static int __get_token (xp_awk_t* awk) GET_CHAR_TO (awk, c); } while (xp_isalpha(c) || c == XP_CHAR('_') || xp_isdigit(c)); - SET_TOKEN_TYPE (awk, __classfy_ident(XP_STR_BUF(&awk->token.name))); + SET_TOKEN_TYPE (awk, __classify_ident(awk, XP_STR_BUF(&awk->token.name))); } else if (c == XP_CHAR('\"')) { /* string */ @@ -1780,13 +1856,13 @@ static int __skip_comment (xp_awk_t* awk) return 0; } -static int __classfy_ident (const xp_char_t* ident) +static int __classify_ident (xp_awk_t* awk, const xp_char_t* ident) { struct __kwent* p = __kwtab; - while (p->name != XP_NULL) { + for (p = __kwtab; p->name != XP_NULL; p++) { + if (p->valid != 0 && (awk->opt.parse & p->valid) == 0) continue; if (xp_strcmp(p->name, ident) == 0) return p->type; - p++; } return TOKEN_IDENT; @@ -1806,18 +1882,13 @@ static xp_long_t __str_to_long (const xp_char_t* name) static INLINE xp_size_t __find_func_arg (xp_awk_t* awk, const xp_char_t* name) { -/* - if (awk->curfunc != XP_NULL) { - -// TODO: finish this.... - } -*/ - +// TODO: return (xp_size_t)-1; } static xp_size_t __find_variable (xp_awk_t* awk, const xp_char_t* name) { +// TODO: return (xp_size_t)-1; } diff --git a/ase/awk/tree.c b/ase/awk/tree.c index edf24089..2a10b6ec 100644 --- a/ase/awk/tree.c +++ b/ase/awk/tree.c @@ -1,5 +1,5 @@ /* - * $Id: tree.c,v 1.15 2006-02-01 02:56:12 bacon Exp $ + * $Id: tree.c,v 1.16 2006-02-04 19:31:51 bacon Exp $ */ #include @@ -133,6 +133,11 @@ static void __print_statements (xp_awk_node_t* tree, int depth) case XP_AWK_NODE_BLOCK: __print_tabs (depth); xp_printf (XP_TEXT("{\n")); + if (((xp_awk_node_block_t*)p)->nlocals > 0) { + __print_tabs (depth + 1); + xp_printf (XP_TEXT("NLOCALS=>%u;\n"), + (unsigned int)((xp_awk_node_block_t*)p)->nlocals); + } __print_statements (((xp_awk_node_block_t*)p)->body, depth + 1); __print_tabs (depth); xp_printf (XP_TEXT("}\n")); diff --git a/ase/awk/tree.h b/ase/awk/tree.h index 1f1b4efa..7aae171c 100644 --- a/ase/awk/tree.h +++ b/ase/awk/tree.h @@ -1,5 +1,5 @@ /* - * $Id: tree.h,v 1.20 2006-02-01 02:56:12 bacon Exp $ + * $Id: tree.h,v 1.21 2006-02-04 19:31:51 bacon Exp $ */ #ifndef _XP_AWK_TREE_H_ @@ -74,7 +74,7 @@ struct xp_awk_node_sgv_t struct xp_awk_node_block_t { XP_AWK_NODE_HDR; - xp_size_t lvc; /* local variable count */ + xp_size_t nlocals; xp_awk_node_t* body; }; diff --git a/ase/test/awk/awk.c b/ase/test/awk/awk.c index 49552bf9..8bff2fb0 100644 --- a/ase/test/awk/awk.c +++ b/ase/test/awk/awk.c @@ -78,6 +78,7 @@ int xp_main (int argc, xp_char_t* argv[]) return -1; } +awk.opt.parse = XP_AWK_EXPLICIT | XP_AWK_UNIQUE; if (xp_awk_parse(&awk) == -1) { xp_printf ( XP_TEXT("error: cannot parse program - [%d] %s\n"),