qse/ase/awk/parse.c

300 lines
5.9 KiB
C

/*
* $Id: parse.c,v 1.4 2005-11-15 15:32:39 bacon Exp $
*/
#include <xp/awk/awk.h>
#include <xp/bas/memory.h>
#include <xp/bas/ctype.h>
enum
{
TOKEN_EOF,
TOKEN_ASSIGN,
TOKEN_EQ,
TOKEN_NE,
TOKEN_NOT,
TOKEN_PLUS,
TOKEN_PLUS_PLUS,
TOKEN_PLUS_ASSIGN,
TOKEN_MINUS,
TOKEN_MINUS_MINUS,
TOKEN_MINUS_ASSIGN,
TOKEN_LPAREN,
TOKEN_RPAREN,
TOKEN_LBRACE,
TOKEN_RBRACE,
TOKEN_LBRAKET,
TOKEN_RBRAKET,
TOKEN_IDENT,
TOEKN_BEGIN,
TOKEN_END,
TOKEN_FUNCTION
};
static int __parse (xp_awk_t* awk);
static int __get_token (xp_awk_t* awk);
static int __get_char (xp_awk_t* awk);
static int __unget_char (xp_awk_t* awk, xp_cint_t c);
static int __skip_spaces (xp_awk_t* awk);
static int __skip_comment (xp_awk_t* awk);
static int __classfy_ident (const xp_char_t* ident);
struct __kwent {
const xp_char_t* name,
int type;
};
static struct __kwent __kwtab[] =
{
{ XP_TEXT("BEGIN"), TOKEN_BEGIN },
{ XP_TEXT("END"), TOKEN_END },
{ XP_TEXT("function"), TOKEN_FUNCTION },
{ XP_NULL, 0 },
};
#define GET_CHAR(awk) \
do { if (__get_char(awk) == -1) return -1; } while(0)
#define GET_CHAR_TO(awk, c) do { \
if (__get_char(awk) == -1) return -1; \
c = (awk)->lex.curc; \
} while(0)
#define SET_TOKEN_TYPE(awk,code) ((awk)->token.type = code)
#define ADD_TOKEN_CHAR(awk,c) do { \
if (xp_str_catc(&(awk)->token.name,(c)) == -1) { \
(awk)->errnum = XP_AWK_ENOMEM; return -1; \
} \
} while (0)
#define ADD_TOKEN_STR(awk,str) do { \
if (xp_str_cat(&(awk)->token.name,(str)) == -1) { \
(awk)->errnum = XP_AWK_ENOMEM; return -1; \
} while (0)
int xp_awk_parse (xp_awk_t* awk)
{
GET_CHAR (awk);
return __parse (awk);
}
static int __parse (xp_awk_t* awk)
{
if (awk->token.type == TOKEN_EOF) return 0;
return -1;
}
static int __get_token (xp_awk_t* awk)
{
xp_cint_t c;
int n;
do {
if (__skip_spaces(awk) == -1) return -1;
if ((n = __skip_comment(awk)) == -1) return -1;
} while (n == 1);
xp_str_clear (&awk->token.name);
c = awk->lex.curc;
if (c == XP_CHAR_EOF) {
SET_TOKEN_TYPE (awk, TOKEN_EOF);
}
else if (xp_isdigit(c)) {
/* number */
}
else if (xp_isalpha(c) || c == XP_CHAR('_')) {
/* identifier */
do {
ADD_TOKEN_CHAR (awk, c);
GET_CHAR_TO (awk, c);
} while (xp_isalpha(c) || c == XP_CHAR('_') || xp_isdigit(c));
SET_TOKEN_TYPE (awk, __classfy_ident(XP_STR_BUF(&awk->token.name)));
}
else if (c == XP_CHAR('\"')) {
/* string */
}
else if (c == XP_CHAR('/')) {
/* regular expression */
}
else if (c == XP_CHAR('=')) {
GET_CHAR_TO (awk, c);
if (c == XP_CHAR('=')) {
SET_TOKEN_TYPE (awk, TOKEN_EQ);
ADD_TOKEN_STR (awk, XP_TEXT("=="));
GET_CHAR_TO (awk, c);
}
else {
SET_TOKEN_TYPE (awk, TOKEN_ASSIGN);
ADD_TOKEN_STR (awk, XP_TEXT("="));
}
}
else if (c == XP_CHAR('!')) {
GET_CHAR_TO (awk, c);
if (c == XP_CHAR('=')) {
SET_TOKEN_TYPE (awk, TOKEN_NE);
ADD_TOKEN_STR (awk, XP_TEXT("!="));
GET_CHAR_TO (awk, c);
}
else {
SET_TOKEN_TYPE (awk, TOKEN_NOT);
ADD_TOKEN_STR (awk, XP_TEXT("!"));
}
}
else if (c == XP_CHAR('+')) {
GET_CHAR_TO (awk, c);
if (c == XP_CHAR('+')) {
SET_TOKEN_TYPE (awk, TOKEN_PLUS_PLUS);
ADD_TOKEN_STR (awk, XP_TEXT("++"));
GET_CHAR_TO (awk, c);
}
else if (c == XP_CHAR('=')) {
SET_TOKEN_TYPE (awk, TOKEN_PLUS_ASSIGN);
ADD_TOKEN_STR (awk, XP_TEXT("+="));
GET_CHAR_TO (awk, c);
}
else if (xp_isdigit(c)) {
// read_number (XP_CHAR('+'));
}
else {
SET_TOKEN_TYPE (awk, TOKEN_PLUS);
ADD_TOKEN_STR (awk, XP_TEXT("+"));
}
}
else if (c == XP_CHAR('-')) {
GET_CHAR_TO (awk, c);
if (c == XP_CHAR('-')) {
SET_TOKEN_TYPE (awk, TOKEN_MINUS_MINUS);
ADD_TOKEN_STR (awk, XP_TEXT("--"));
GET_CHAR_TO (awk, c);
}
else if (c == XP_CHAR('=')) {
SET_TOKEN_TYPE (awk, TOKEN_MINUS_ASSIGN);
ADD_TOKEN_STR (awk, XP_TEXT("-="));
GET_CHAR_TO (awk, c);
}
else if (xp_isdigit(c)) {
// read_number (XP_CHAR('-'));
}
else {
SET_TOKEN_TYPE (awk, TOKEN_MINUS);
ADD_TOKEN_STR (awk, XP_TEXT("-"));
}
}
else if (c == XP_CHAR('(') {
SET_TOKEN_TYPE (awk, TOKEN_LPAREN);
ADD_TOKEN_STR (awk, c);
}
else if (c == XP_CHAR(')') {
SET_TOKEN_TYPE (awk, TOKEN_RPAREN);
ADD_TOKEN_STR (awk, c);
}
else if (c == XP_CHAR('{') {
SET_TOKEN_TYPE (awk, TOKEN_LBRACE);
ADD_TOKEN_STR (awk, c);
}
else if (c == XP_CHAR('}') {
SET_TOKEN_TYPE (awk, TOKEN_RBRACE);
ADD_TOKEN_CHAR (awk, c);
}
else if (c == XP_CHAR('[') {
SET_TOKEN_TYPE (awk, TOKEN_LBRAKET);
ADD_TOKEN_STR (awk, c);
}
else if (c == XP_CHAR(']') {
SET_TOKEN_TYPE (awk, TOKEN_RBRAKET);
ADD_TOKEN_CHAR (awk, c);
}
else {
awk->errnum = XP_AWK_ELXCHR;
return -1;
}
return 0;
}
static int __get_char (xp_awk_t* awk)
{
if (awk->lex.ungotc_count > 0) {
awk->lex.curc = awk->lex.ungotc[--awk->lex.ungotc_count];
return 0;
}
if (awk->source_func(XP_AWK_IO_DATA,
awk->source_arg, &awk->lex.curc, 1) == -1) {
awk->errnum = XP_AWK_ESRCDT;
return -1;
}
return 0;
}
static int __unget_char (xp_awk_t* awk, xp_cint_t c)
{
if (awk->lex.ungotc_count >= xp_countof(awk->lex.ungotc)) {
awk->errnum = XP_AWK_ELXUNG;
return -1;
}
awk->lex.ungotc[awk->lex.ungotc_count++] = c;
return 0;
}
static int __skip_spaces (xp_awk_t* awk)
{
xp_cint_t c = awk->lex.curc;
while (xp_isspace(c)) GET_CHAR_TO (awk, c);
return 0;
}
static int __skip_comment (xp_awk_t* awk)
{
xp_cint_t c = awk->lex.curc;
if (c != XP_CHAR('/')) return 0;
GET_CHAR_TO (awk, c);
if (c == XP_CHAR('/')) {
do {
GET_CHAR_TO (awk, c);
} while (c != '\n' && c != XP_CHAR_EOF);
GET_CHAR (awk);
return 1;
}
else if (c == XP_CHAR('*')) {
do {
GET_CHAR_TO (awk, c);
if (c == XP_CHAR('*')) {
GET_CHAR_TO (awk, c);
if (c == XP_CHAR('/')) {
GET_CHAR_TO (awk, c);
break;
}
}
} while (0);
return 1;
}
if (__unget_char(awk, c) == -1) return -1;
return 0;
}
static int __classfy_ident (const xp_char_t* ident)
{
struct __kwent* p = __kwtab;
while (p->name != XP_NULL) {
if (xp_strcmp(p->name, ident) == 0) return p->type;
}
return TOKEN_IDENT;
}