360 lines
7.0 KiB
C
360 lines
7.0 KiB
C
/*
|
|
* $Id: parse.c,v 1.9 2005-12-29 12:04:51 bacon Exp $
|
|
*/
|
|
|
|
#include <xp/awk/awk.h>
|
|
#include <xp/bas/memory.h>
|
|
#include <xp/bas/ctype.h>
|
|
#include <xp/bas/string.h>
|
|
|
|
enum
|
|
{
|
|
TOKEN_EOF,
|
|
|
|
TOKEN_ASSIGN,
|
|
TOKEN_EQ,
|
|
TOKEN_NE,
|
|
TOKEN_NOT,
|
|
TOKEN_PLUS,
|
|
TOKEN_PLUS_PLUS,
|
|
TOKEN_PLUS_ASSIGN,
|
|
TOKEN_MINUS,
|
|
TOKEN_MINUS_MINUS,
|
|
TOKEN_MINUS_ASSIGN,
|
|
|
|
TOKEN_LPAREN,
|
|
TOKEN_RPAREN,
|
|
TOKEN_LBRACE,
|
|
TOKEN_RBRACE,
|
|
TOKEN_LBRAKET,
|
|
TOKEN_RBRAKET,
|
|
|
|
TOKEN_STRING,
|
|
TOKEN_REGEX,
|
|
|
|
TOKEN_IDENT,
|
|
TOKEN_BEGIN,
|
|
TOKEN_END,
|
|
TOKEN_FUNCTION,
|
|
TOKEN_IF,
|
|
TOKEN_DO,
|
|
TOKEN_WHILE,
|
|
TOKEN_FOR,
|
|
TOKEN_CONTINUE,
|
|
TOKEN_BREAK
|
|
};
|
|
|
|
static int __parse (xp_awk_t* awk);
|
|
static int __parse_program (xp_awk_t* awk);
|
|
static int __get_token (xp_awk_t* awk);
|
|
static int __get_char (xp_awk_t* awk);
|
|
static int __unget_char (xp_awk_t* awk, xp_cint_t c);
|
|
static int __skip_spaces (xp_awk_t* awk);
|
|
static int __skip_comment (xp_awk_t* awk);
|
|
static int __classfy_ident (const xp_char_t* ident);
|
|
|
|
struct __kwent
|
|
{
|
|
const xp_char_t* name;
|
|
int type;
|
|
};
|
|
|
|
static struct __kwent __kwtab[] =
|
|
{
|
|
{ XP_TEXT("BEGIN"), TOKEN_BEGIN },
|
|
{ XP_TEXT("END"), TOKEN_END },
|
|
{ XP_TEXT("function"), TOKEN_FUNCTION },
|
|
{ XP_TEXT("if"), TOKEN_IF },
|
|
{ XP_TEXT("do"), TOKEN_DO },
|
|
{ XP_TEXT("while"), TOKEN_WHILE },
|
|
{ XP_TEXT("for"), TOKEN_FOR },
|
|
{ XP_TEXT("continue"), TOKEN_CONTINUE },
|
|
{ XP_TEXT("break"), TOKEN_BREAK },
|
|
{ XP_NULL, 0 },
|
|
};
|
|
|
|
#define GET_CHAR(awk) \
|
|
do { if (__get_char(awk) == -1) return -1; } while(0)
|
|
|
|
#define GET_CHAR_TO(awk,c) do { \
|
|
if (__get_char(awk) == -1) return -1; \
|
|
c = (awk)->lex.curc; \
|
|
} while(0)
|
|
|
|
#define SET_TOKEN_TYPE(awk,code) ((awk)->token.type = code)
|
|
|
|
#define ADD_TOKEN_CHAR(awk,c) do { \
|
|
if (xp_str_ccat(&(awk)->token.name,(c)) == -1) { \
|
|
(awk)->errnum = XP_AWK_ENOMEM; return -1; \
|
|
} \
|
|
} while (0)
|
|
|
|
#define ADD_TOKEN_STR(awk,str) do { \
|
|
if (xp_str_cat(&(awk)->token.name,(str)) == -1) { \
|
|
(awk)->errnum = XP_AWK_ENOMEM; return -1; \
|
|
} \
|
|
} while (0)
|
|
|
|
#define GET_TOKEN(awk) \
|
|
do { if (__get_token(awk) == -1) return -1; } while(0)
|
|
|
|
int xp_awk_parse (xp_awk_t* awk)
|
|
{
|
|
GET_CHAR (awk);
|
|
GET_TOKEN (awk);
|
|
return __parse_program (awk);
|
|
}
|
|
|
|
static int __parse_program (xp_awk_t* awk)
|
|
{
|
|
/*
|
|
pattern { action }
|
|
function name (parameter-list) { statement }
|
|
*/
|
|
|
|
while (1) {
|
|
if (awk->token.type == TOKEN_FUNCTION) {
|
|
if (__parse_function_declaration(awk) == -1) return -1;
|
|
}
|
|
else {
|
|
if (__parse_pattern_action(awk) == -1) return -1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __parse_function_declaration (xp_awk_t* awk)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
static int __parse_pattern_action (xp_awk_t* awk)
|
|
{
|
|
/*
|
|
BEGIN
|
|
END
|
|
expressions
|
|
/regular expression/
|
|
pattern && pattern
|
|
pattern || pattern
|
|
!pattern
|
|
(pattern)
|
|
pattern, pattern
|
|
*/
|
|
|
|
if (awk->token.type == TOKEN_BEGIN) {
|
|
}
|
|
else if (awk->token.type == TOKEN_END) {
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
static int __get_token (xp_awk_t* awk)
|
|
{
|
|
xp_cint_t c;
|
|
int n;
|
|
|
|
do {
|
|
if (__skip_spaces(awk) == -1) return -1;
|
|
if ((n = __skip_comment(awk)) == -1) return -1;
|
|
} while (n == 1);
|
|
|
|
xp_str_clear (&awk->token.name);
|
|
c = awk->lex.curc;
|
|
|
|
if (c == XP_CHAR_EOF) {
|
|
SET_TOKEN_TYPE (awk, TOKEN_EOF);
|
|
}
|
|
else if (xp_isdigit(c)) {
|
|
/* number */
|
|
}
|
|
else if (xp_isalpha(c) || c == XP_CHAR('_')) {
|
|
/* identifier */
|
|
do {
|
|
ADD_TOKEN_CHAR (awk, c);
|
|
GET_CHAR_TO (awk, c);
|
|
} while (xp_isalpha(c) || c == XP_CHAR('_') || xp_isdigit(c));
|
|
|
|
SET_TOKEN_TYPE (awk, __classfy_ident(XP_STR_BUF(&awk->token.name)));
|
|
}
|
|
else if (c == XP_CHAR('\"')) {
|
|
/* string */
|
|
}
|
|
else if (c == XP_CHAR('/')) {
|
|
/* regular expression */
|
|
}
|
|
else if (c == XP_CHAR('=')) {
|
|
GET_CHAR_TO (awk, c);
|
|
if (c == XP_CHAR('=')) {
|
|
SET_TOKEN_TYPE (awk, TOKEN_EQ);
|
|
ADD_TOKEN_STR (awk, XP_TEXT("=="));
|
|
GET_CHAR_TO (awk, c);
|
|
}
|
|
else {
|
|
SET_TOKEN_TYPE (awk, TOKEN_ASSIGN);
|
|
ADD_TOKEN_STR (awk, XP_TEXT("="));
|
|
}
|
|
}
|
|
else if (c == XP_CHAR('!')) {
|
|
GET_CHAR_TO (awk, c);
|
|
if (c == XP_CHAR('=')) {
|
|
SET_TOKEN_TYPE (awk, TOKEN_NE);
|
|
ADD_TOKEN_STR (awk, XP_TEXT("!="));
|
|
GET_CHAR_TO (awk, c);
|
|
}
|
|
else {
|
|
SET_TOKEN_TYPE (awk, TOKEN_NOT);
|
|
ADD_TOKEN_STR (awk, XP_TEXT("!"));
|
|
}
|
|
}
|
|
else if (c == XP_CHAR('+')) {
|
|
GET_CHAR_TO (awk, c);
|
|
if (c == XP_CHAR('+')) {
|
|
SET_TOKEN_TYPE (awk, TOKEN_PLUS_PLUS);
|
|
ADD_TOKEN_STR (awk, XP_TEXT("++"));
|
|
GET_CHAR_TO (awk, c);
|
|
}
|
|
else if (c == XP_CHAR('=')) {
|
|
SET_TOKEN_TYPE (awk, TOKEN_PLUS_ASSIGN);
|
|
ADD_TOKEN_STR (awk, XP_TEXT("+="));
|
|
GET_CHAR_TO (awk, c);
|
|
}
|
|
else if (xp_isdigit(c)) {
|
|
// read_number (XP_CHAR('+'));
|
|
}
|
|
else {
|
|
SET_TOKEN_TYPE (awk, TOKEN_PLUS);
|
|
ADD_TOKEN_STR (awk, XP_TEXT("+"));
|
|
}
|
|
}
|
|
else if (c == XP_CHAR('-')) {
|
|
GET_CHAR_TO (awk, c);
|
|
if (c == XP_CHAR('-')) {
|
|
SET_TOKEN_TYPE (awk, TOKEN_MINUS_MINUS);
|
|
ADD_TOKEN_STR (awk, XP_TEXT("--"));
|
|
GET_CHAR_TO (awk, c);
|
|
}
|
|
else if (c == XP_CHAR('=')) {
|
|
SET_TOKEN_TYPE (awk, TOKEN_MINUS_ASSIGN);
|
|
ADD_TOKEN_STR (awk, XP_TEXT("-="));
|
|
GET_CHAR_TO (awk, c);
|
|
}
|
|
else if (xp_isdigit(c)) {
|
|
// read_number (XP_CHAR('-'));
|
|
}
|
|
else {
|
|
SET_TOKEN_TYPE (awk, TOKEN_MINUS);
|
|
ADD_TOKEN_STR (awk, XP_TEXT("-"));
|
|
}
|
|
}
|
|
else if (c == XP_CHAR('(')) {
|
|
SET_TOKEN_TYPE (awk, TOKEN_LPAREN);
|
|
ADD_TOKEN_CHAR (awk, c);
|
|
}
|
|
else if (c == XP_CHAR(')')) {
|
|
SET_TOKEN_TYPE (awk, TOKEN_RPAREN);
|
|
ADD_TOKEN_CHAR (awk, c);
|
|
}
|
|
else if (c == XP_CHAR('{')) {
|
|
SET_TOKEN_TYPE (awk, TOKEN_LBRACE);
|
|
ADD_TOKEN_CHAR (awk, c);
|
|
}
|
|
else if (c == XP_CHAR('}')) {
|
|
SET_TOKEN_TYPE (awk, TOKEN_RBRACE);
|
|
ADD_TOKEN_CHAR (awk, c);
|
|
}
|
|
else if (c == XP_CHAR('[')) {
|
|
SET_TOKEN_TYPE (awk, TOKEN_LBRAKET);
|
|
ADD_TOKEN_CHAR (awk, c);
|
|
}
|
|
else if (c == XP_CHAR(']')) {
|
|
SET_TOKEN_TYPE (awk, TOKEN_RBRAKET);
|
|
ADD_TOKEN_CHAR (awk, c);
|
|
}
|
|
else {
|
|
awk->errnum = XP_AWK_ELXCHR;
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __get_char (xp_awk_t* awk)
|
|
{
|
|
if (awk->lex.ungotc_count > 0) {
|
|
awk->lex.curc = awk->lex.ungotc[--awk->lex.ungotc_count];
|
|
return 0;
|
|
}
|
|
|
|
if (awk->src_func(XP_AWK_IO_DATA,
|
|
awk->src_arg, &awk->lex.curc, 1) == -1) {
|
|
awk->errnum = XP_AWK_ESRCDT;
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __unget_char (xp_awk_t* awk, xp_cint_t c)
|
|
{
|
|
if (awk->lex.ungotc_count >= xp_countof(awk->lex.ungotc)) {
|
|
awk->errnum = XP_AWK_ELXUNG;
|
|
return -1;
|
|
}
|
|
|
|
awk->lex.ungotc[awk->lex.ungotc_count++] = c;
|
|
return 0;
|
|
}
|
|
|
|
static int __skip_spaces (xp_awk_t* awk)
|
|
{
|
|
xp_cint_t c = awk->lex.curc;
|
|
while (xp_isspace(c)) GET_CHAR_TO (awk, c);
|
|
return 0;
|
|
}
|
|
|
|
static int __skip_comment (xp_awk_t* awk)
|
|
{
|
|
xp_cint_t c = awk->lex.curc;
|
|
|
|
if (c != XP_CHAR('/')) return 0;
|
|
GET_CHAR_TO (awk, c);
|
|
|
|
if (c == XP_CHAR('/')) {
|
|
do {
|
|
GET_CHAR_TO (awk, c);
|
|
} while (c != '\n' && c != XP_CHAR_EOF);
|
|
GET_CHAR (awk);
|
|
return 1;
|
|
}
|
|
else if (c == XP_CHAR('*')) {
|
|
do {
|
|
GET_CHAR_TO (awk, c);
|
|
if (c == XP_CHAR('*')) {
|
|
GET_CHAR_TO (awk, c);
|
|
if (c == XP_CHAR('/')) {
|
|
GET_CHAR_TO (awk, c);
|
|
break;
|
|
}
|
|
}
|
|
} while (0);
|
|
return 1;
|
|
}
|
|
|
|
if (__unget_char(awk, c) == -1) return -1;
|
|
return 0;
|
|
}
|
|
|
|
static int __classfy_ident (const xp_char_t* ident)
|
|
{
|
|
struct __kwent* p = __kwtab;
|
|
|
|
while (p->name != XP_NULL) {
|
|
if (xp_strcmp(p->name, ident) == 0) return p->type;
|
|
}
|
|
|
|
return TOKEN_IDENT;
|
|
}
|