/* * $Id: parse.c,v 1.8 2005-12-11 13:56:13 bacon Exp $ */ #include #include #include enum { TOKEN_EOF, TOKEN_ASSIGN, TOKEN_EQ, TOKEN_NE, TOKEN_NOT, TOKEN_PLUS, TOKEN_PLUS_PLUS, TOKEN_PLUS_ASSIGN, TOKEN_MINUS, TOKEN_MINUS_MINUS, TOKEN_MINUS_ASSIGN, TOKEN_LPAREN, TOKEN_RPAREN, TOKEN_LBRACE, TOKEN_RBRACE, TOKEN_LBRAKET, TOKEN_RBRAKET, TOKEN_STRING, TOKEN_REGEX, TOKEN_IDENT, TOEKN_BEGIN, TOKEN_END, TOKEN_FUNCTION, TOKEN_IF, TOKEN_DO, TOKEN_WHILE, TOKEN_FOR, TOKEN_CONTINUE, TOKEN_BREAK }; static int __parse (xp_awk_t* awk); static int __parse_program (xp_awk_t* awk); static int __get_token (xp_awk_t* awk); static int __get_char (xp_awk_t* awk); static int __unget_char (xp_awk_t* awk, xp_cint_t c); static int __skip_spaces (xp_awk_t* awk); static int __skip_comment (xp_awk_t* awk); static int __classfy_ident (const xp_char_t* ident); struct __kwent { const xp_char_t* name, int type; }; static struct __kwent __kwtab[] = { { XP_TEXT("BEGIN"), TOKEN_BEGIN }, { XP_TEXT("END"), TOKEN_END }, { XP_TEXT("function"), TOKEN_FUNCTION }, { XP_TEXT("if"), TOKEN_IF }, { XP_TEXT("do"), TOKEN_DO }, { XP_TEXT("while"), TOKEN_WHILE }, { XP_TEXT("for"), TOKEN_FOR }, { XP_TEXT("continue"), TOKEN_CONTINUE }, { XP_TEXT("break"), TOKEN_BREAK }, { XP_NULL, 0 }, }; #define GET_CHAR(awk) \ do { if (__get_char(awk) == -1) return -1; } while(0) #define GET_CHAR_TO(awk, c) do { \ if (__get_char(awk) == -1) return -1; \ c = (awk)->lex.curc; \ } while(0) #define SET_TOKEN_TYPE(awk,code) ((awk)->token.type = code) #define ADD_TOKEN_CHAR(awk,c) do { \ if (xp_str_ccat(&(awk)->token.name,(c)) == -1) { \ (awk)->errnum = XP_AWK_ENOMEM; return -1; \ } \ } while (0) #define ADD_TOKEN_STR(awk,str) do { \ if (xp_str_cat(&(awk)->token.name,(str)) == -1) { \ (awk)->errnum = XP_AWK_ENOMEM; return -1; \ } while (0) #define GET_TOKEN(awk) do { if (__get_token(awk) == -1) return -1; } int xp_awk_parse (xp_awk_t* awk) { GET_CHAR (awk); GET_TOKEN (awk); return __parse_program (awk); } static int __parse_program (xp_awk_t* awk) { /* pattern { action } function name (parameter-list) { statement } */ while (1) { if (awk->token.type == TOKEN_FUNCTION) { if (__parse_function_declaration(awk) == -1) return -1; } else { if (__parse_pattern_action(awk) == -1) return -1; } } return 0; } static int __parse_function_declaration (xp_awk_t* awk) { return -1; } static int __parse_pattern_action (xp_awk_t* awk) { /* BEGIN END expressions /regular expression/ pattern && pattern pattern || pattern !pattern (pattern) pattern, pattern */ if (awk->token.type == TOKEN_BEGIN) { } else if (awk->token.type == TOKEN_END) { } return -1; } static int __get_token (xp_awk_t* awk) { xp_cint_t c; int n; do { if (__skip_spaces(awk) == -1) return -1; if ((n = __skip_comment(awk)) == -1) return -1; } while (n == 1); xp_str_clear (&awk->token.name); c = awk->lex.curc; if (c == XP_CHAR_EOF) { SET_TOKEN_TYPE (awk, TOKEN_EOF); } else if (xp_isdigit(c)) { /* number */ } else if (xp_isalpha(c) || c == XP_CHAR('_')) { /* identifier */ do { ADD_TOKEN_CHAR (awk, c); GET_CHAR_TO (awk, c); } while (xp_isalpha(c) || c == XP_CHAR('_') || xp_isdigit(c)); SET_TOKEN_TYPE (awk, __classfy_ident(XP_STR_BUF(&awk->token.name))); } else if (c == XP_CHAR('\"')) { /* string */ } else if (c == XP_CHAR('/')) { /* regular expression */ } else if (c == XP_CHAR('=')) { GET_CHAR_TO (awk, c); if (c == XP_CHAR('=')) { SET_TOKEN_TYPE (awk, TOKEN_EQ); ADD_TOKEN_STR (awk, XP_TEXT("==")); GET_CHAR_TO (awk, c); } else { SET_TOKEN_TYPE (awk, TOKEN_ASSIGN); ADD_TOKEN_STR (awk, XP_TEXT("=")); } } else if (c == XP_CHAR('!')) { GET_CHAR_TO (awk, c); if (c == XP_CHAR('=')) { SET_TOKEN_TYPE (awk, TOKEN_NE); ADD_TOKEN_STR (awk, XP_TEXT("!=")); GET_CHAR_TO (awk, c); } else { SET_TOKEN_TYPE (awk, TOKEN_NOT); ADD_TOKEN_STR (awk, XP_TEXT("!")); } } else if (c == XP_CHAR('+')) { GET_CHAR_TO (awk, c); if (c == XP_CHAR('+')) { SET_TOKEN_TYPE (awk, TOKEN_PLUS_PLUS); ADD_TOKEN_STR (awk, XP_TEXT("++")); GET_CHAR_TO (awk, c); } else if (c == XP_CHAR('=')) { SET_TOKEN_TYPE (awk, TOKEN_PLUS_ASSIGN); ADD_TOKEN_STR (awk, XP_TEXT("+=")); GET_CHAR_TO (awk, c); } else if (xp_isdigit(c)) { // read_number (XP_CHAR('+')); } else { SET_TOKEN_TYPE (awk, TOKEN_PLUS); ADD_TOKEN_STR (awk, XP_TEXT("+")); } } else if (c == XP_CHAR('-')) { GET_CHAR_TO (awk, c); if (c == XP_CHAR('-')) { SET_TOKEN_TYPE (awk, TOKEN_MINUS_MINUS); ADD_TOKEN_STR (awk, XP_TEXT("--")); GET_CHAR_TO (awk, c); } else if (c == XP_CHAR('=')) { SET_TOKEN_TYPE (awk, TOKEN_MINUS_ASSIGN); ADD_TOKEN_STR (awk, XP_TEXT("-=")); GET_CHAR_TO (awk, c); } else if (xp_isdigit(c)) { // read_number (XP_CHAR('-')); } else { SET_TOKEN_TYPE (awk, TOKEN_MINUS); ADD_TOKEN_STR (awk, XP_TEXT("-")); } } else if (c == XP_CHAR('(') { SET_TOKEN_TYPE (awk, TOKEN_LPAREN); ADD_TOKEN_STR (awk, c); } else if (c == XP_CHAR(')') { SET_TOKEN_TYPE (awk, TOKEN_RPAREN); ADD_TOKEN_STR (awk, c); } else if (c == XP_CHAR('{') { SET_TOKEN_TYPE (awk, TOKEN_LBRACE); ADD_TOKEN_STR (awk, c); } else if (c == XP_CHAR('}') { SET_TOKEN_TYPE (awk, TOKEN_RBRACE); ADD_TOKEN_CHAR (awk, c); } else if (c == XP_CHAR('[') { SET_TOKEN_TYPE (awk, TOKEN_LBRAKET); ADD_TOKEN_STR (awk, c); } else if (c == XP_CHAR(']') { SET_TOKEN_TYPE (awk, TOKEN_RBRAKET); ADD_TOKEN_CHAR (awk, c); } else { awk->errnum = XP_AWK_ELXCHR; return -1; } return 0; } static int __get_char (xp_awk_t* awk) { if (awk->lex.ungotc_count > 0) { awk->lex.curc = awk->lex.ungotc[--awk->lex.ungotc_count]; return 0; } if (awk->source_func(XP_AWK_IO_DATA, awk->source_arg, &awk->lex.curc, 1) == -1) { awk->errnum = XP_AWK_ESRCDT; return -1; } return 0; } static int __unget_char (xp_awk_t* awk, xp_cint_t c) { if (awk->lex.ungotc_count >= xp_countof(awk->lex.ungotc)) { awk->errnum = XP_AWK_ELXUNG; return -1; } awk->lex.ungotc[awk->lex.ungotc_count++] = c; return 0; } static int __skip_spaces (xp_awk_t* awk) { xp_cint_t c = awk->lex.curc; while (xp_isspace(c)) GET_CHAR_TO (awk, c); return 0; } static int __skip_comment (xp_awk_t* awk) { xp_cint_t c = awk->lex.curc; if (c != XP_CHAR('/')) return 0; GET_CHAR_TO (awk, c); if (c == XP_CHAR('/')) { do { GET_CHAR_TO (awk, c); } while (c != '\n' && c != XP_CHAR_EOF); GET_CHAR (awk); return 1; } else if (c == XP_CHAR('*')) { do { GET_CHAR_TO (awk, c); if (c == XP_CHAR('*')) { GET_CHAR_TO (awk, c); if (c == XP_CHAR('/')) { GET_CHAR_TO (awk, c); break; } } } while (0); return 1; } if (__unget_char(awk, c) == -1) return -1; return 0; } static int __classfy_ident (const xp_char_t* ident) { struct __kwent* p = __kwtab; while (p->name != XP_NULL) { if (xp_strcmp(p->name, ident) == 0) return p->type; } return TOKEN_IDENT; }