diff --git a/ase/awk/Makefile.cl b/ase/awk/Makefile.cl index c9dcdfb6..0c2b05e0 100644 --- a/ase/awk/Makefile.cl +++ b/ase/awk/Makefile.cl @@ -1,7 +1,4 @@ -SRCS = \ - stx.c memory.c object.c symbol.c class.c array.c \ - dict.c misc.c name.c token.c parser.c bootstrp.c \ - bytecode.c interp.c +SRCS = awk.c lex.c parse.c OBJS = $(SRCS:.c=.obj) OUT = xpawk.lib diff --git a/ase/awk/awk.c b/ase/awk/awk.c new file mode 100644 index 00000000..b5c727a2 --- /dev/null +++ b/ase/awk/awk.c @@ -0,0 +1,25 @@ +/* + * $Id: awk.c,v 1.1 2005-11-06 12:01:29 bacon Exp $ + */ + +#include +#include + +xp_awk_t* xp_awk_open (xp_awk_t* awk) +{ + if (awk == XP_NULL) { + awk = (xp_awk_t*) xp_malloc (xp_sizeof(awk)); + if (awk == XP_NULL) return XP_NULL; + awk->__malloced = xp_true; + } + else awk->__malloced = xp_false; + + awk->errnum = XP_AWK_ENOERR; + return awk; +} + +int xp_awk_close (xp_awk_t* awk) +{ + if (awk->__malloced) xp_free (awk); + return 0; +} diff --git a/ase/awk/awk.h b/ase/awk/awk.h index 937a9a02..269c31f0 100644 --- a/ase/awk/awk.h +++ b/ase/awk/awk.h @@ -1,5 +1,5 @@ /* - * $Id: awk.h,v 1.1 2005-11-05 17:54:00 bacon Exp $ + * $Id: awk.h,v 1.2 2005-11-06 12:01:29 bacon Exp $ */ #ifndef _XP_AWK_AWK_H_ @@ -8,6 +8,11 @@ #include #include +enum +{ + XP_AWK_ENOERR +}; + /* * TYPE: xp_awk_t */ @@ -35,6 +40,7 @@ struct xp_awk_t void* output_arg; /* housekeeping */ + int errnum; xp_bool_t __malloced; }; diff --git a/ase/awk/lex.c b/ase/awk/lex.c new file mode 100644 index 00000000..dc4d51f9 --- /dev/null +++ b/ase/awk/lex.c @@ -0,0 +1,236 @@ +/* + * $Id: lex.c,v 1.1 2005-11-06 12:01:29 bacon Exp $ + */ + +#include +#include +#include + +static int __get_char (xp_awk_lex_t* lex); +static int __unget_char (xp_awk_lex_t* lex, xp_cint_t c); +static int __skip_spaces (xp_awk_lex_t* lex); +static int __skip_comment (xp_awk_lex_t* lex); + +#define GET_CHAR(lex) \ + do { if (__get_char(lex) == -1) return -1; } while(0) +#define GET_CHAR_TO(lex, c) \ + do { if (__get_char(lex) == -1) return -1; c = (lex)->curc; } while(0) + +#define SET_TOKEN_TYPE(lex,code) ((lex)->token.type = code) +#define ADD_TOKEN_STR(lex,str) \ + do { if (xp_str_cat(&(lex)->token, (str)) == -1) return -1; } while (0) + +xp_awk_lex_t* xp_awk_lex_open ( + xp_awk_lex_t* lex, xp_awk_t* awk) +{ + if (awk == XP_NULL) return XP_NULL; + + if (lex == XP_NULL) { + lex = (xp_awk_lex_t*) xp_malloc (xp_sizeof(xp_awk_lex_t)); + if (lex == XP_NULL) return XP_NULL; + lex->__malloced = xp_true; + } + else lex->__malloced = xp_false; + + if (xp_str_open (&lex->token, 128) == XP_NULL) { + if (lex->__malloced) xp_free (lex); + return XP_NULL; + } + + lex->awk = awk; + lex->ungotc_count = 0; + + /* if rewind is not supported, the following rewind call + * would fail. in this case, we just ignore the failure + * assuming that this lex can still be used in a single-pass + * compiler */ + xp_awk_lex_rewind(lex); + + if (__get_char(lex) == -1) { + xp_str_close (&lex->token); + if (lex->__malloced) xp_free (lex); + return XP_NULL; + } + + return lex; +} + +void xp_awk_lex_close (xp_awk_lex_t* lex) +{ + xp_str_close (&lex->token); + if (lex->__malloced) xp_free (lex); +} + +int xp_awk_lex_rewind (xp_awk_lex_t* lex) +{ + xp_awk_t* awk = lex->awk; + lex->ungotc_count = 0; + return awk->input_func (awk, XP_SCE_INPUT_REWIND, XP_NULL); +} + +int xp_awk_lex_fetch_token (xp_awk_lex_t* lex) +{ + xp_awk_t* awk = lex->awk; + xp_cint_t c; + int n; + + do { + if (__skip_spaces(lex) == -1) return -1; + if ((n = __skip_comment(lex)) == -1) return -1; + } while (n == 1); + + xp_str_clear (&lex->token); + c = lex->curc; + + if (c == XP_CHAR_EOF) { + SET_TOKEN_TYPE (lex, XP_SCE_TOKEN_END); + } + else if (xp_isdigit(c)) { + } + else if (xp_isalpha(c) || c == XP_CHAR('_')) { + } + else if (c == XP_CHAR('\"')) { + } + else if (c == XP_CHAR('=')) { + GET_CHAR_TO (lex, c); + if (c == XP_CHAR('=')) { + SET_TOKEN_TYPE (lex, XP_SCE_TOKEN_EQ); + ADD_TOKEN_STR(lex, XP_TEXT("==")); + GET_CHAR_TO (lex, c); + } + else { + SET_TOKEN_TYPE (lex, XP_SCE_TOKEN_ASSIGN); + ADD_TOKEN_STR(lex, XP_TEXT("=")); + } + } + else if (c == XP_CHAR('!')) { + GET_CHAR_TO (lex, c); + if (c == XP_CHAR('=')) { + SET_TOKEN_TYPE (lex, XP_SCE_TOKEN_NE); + ADD_TOKEN_STR(lex, XP_TEXT("!=")); + GET_CHAR_TO (lex, c); + } + else { + SET_TOKEN_TYPE (lex, XP_SCE_TOKEN_NOT); + ADD_TOKEN_STR(lex, XP_TEXT("!")); + } + } + else if (c == XP_CHAR('+')) { + GET_CHAR_TO (lex, c); + if (c == XP_CHAR('+')) { + SET_TOKEN_TYPE (lex, XP_SCE_TOKEN_INC); + ADD_TOKEN_STR(lex, XP_TEXT("++")); + GET_CHAR_TO (lex, c); + } + else if (c == XP_CHAR('=')) { + SET_TOKEN_TYPE (lex, XP_SCE_TOKEN_PLUS_ASSIGN); + ADD_TOKEN_STR(lex, XP_TEXT("+=")); + GET_CHAR_TO (lex, c); + } + else if (xp_isdigit(c)) { + // read_number (XP_CHAR('+')); + } + else { + SET_TOKEN_TYPE (lex, XP_SCE_TOKEN_PLUS); + ADD_TOKEN_STR(lex, XP_TEXT("+")); + } + } + else if (c == XP_CHAR('-')) { + GET_CHAR_TO (lex, c); + if (c == XP_CHAR('-')) { + SET_TOKEN_TYPE (lex, XP_SCE_TOKEN_DEC); + ADD_TOKEN_STR(lex, XP_TEXT("--")); + GET_CHAR_TO (lex, c); + } + else if (c == XP_CHAR('=')) { + SET_TOKEN_TYPE (lex, XP_SCE_TOKEN_MINUS_ASSIGN); + ADD_TOKEN_STR(lex, XP_TEXT("-=")); + GET_CHAR_TO (lex, c); + } + else if (xp_isdigit(c)) { + // read_number (XP_CHAR('-')); + } + else { + SET_TOKEN_TYPE (lex, XP_SCE_TOKEN_MINUS); + ADD_TOKEN_STR(lex, XP_TEXT("-")); + } + } + else { + /* set the error into awk directly though it + * might look a bit awkard */ + lex->awk->error_code = XP_SCE_ERROR_WRONG_CHAR; + return -1; + } + + return 0; +} + +static int __get_char (xp_awk_lex_t* lex) +{ + xp_awk_t* awk = lex->awk; + + if (lex->ungotc_count > 0) { + lex->curc = lex->ungotc[--lex->ungotc_count]; + return 0; + } + + if (awk->input_func (awk, XP_SCE_INPUT_CONSUME, &lex->curc) == -1) { + awk->error_code = XP_SCE_ERROR_INPUT; + return -1; + } + + return 0; +} + +static int __unget_char (xp_awk_lex_t* lex, xp_cint_t c) +{ + xp_awk_t* awk = lex->awk; + + if (lex->ungotc_count >= xp_countof(lex->ungotc)) { + awk->error_code = XP_SCE_ERROR_UNGET; + return -1; + } + + lex->ungotc[lex->ungotc_count++] = c; + return 0; +} + +static int __skip_spaces (xp_awk_lex_t* lex) +{ + xp_cint_t c = lex->curc; + while (xp_isspace(c)) GET_CHAR_TO (lex, c); + return 0; +} + +static int __skip_comment (xp_awk_lex_t* lex) +{ + xp_cint_t c = lex->curc; + + if (c != XP_CHAR('/')) return 0; + GET_CHAR_TO (lex, c); + + if (c == XP_CHAR('/')) { + do { + GET_CHAR_TO (lex, c); + } while (c != '\n' && c != XP_CHAR_EOF); + GET_CHAR (lex); + return 1; + } + else if (c == XP_CHAR('*')) { + do { + GET_CHAR_TO (lex, c); + if (c == XP_CHAR('*')) { + GET_CHAR_TO (lex, c); + if (c == XP_CHAR('/')) { + GET_CHAR_TO (lex, c); + break; + } + } + } while (0); + return 1; + } + + if (__unget_char(lex, c) == -1) return -1; + return 0; +} + diff --git a/ase/awk/lex.h b/ase/awk/lex.h new file mode 100644 index 00000000..d8fb9cd4 --- /dev/null +++ b/ase/awk/lex.h @@ -0,0 +1,36 @@ +/* + * $Id: lex.h,v 1.1 2005-11-06 12:01:29 bacon Exp $ + */ + +#ifndef _XP_AWK_LEX_H_ +#define _XP_AWK_LEX_H_ + +#include +#include + +struct xp_awk_lex_t +{ + xp_awk_t* awk; + xp_str_t token; + xp_cint_t curc; + xp_cint_t ungotc[5]; + xp_size_t ungotc_count; + xp_bool_t __malloced; +}; + +typedef struct xp_awk_lex_t xp_awk_lex_t; + +#ifdef __cplusplus +extern "C" { +#endif + +xp_awk_lex_t* xp_awk_lex_open (xp_awk_lex_t* lex, xp_awk_t* awk); +void xp_awk_lex_close (xp_awk_lex_t* lex); +int xp_awk_lex_rewind (xp_awk_lex_t* lex); +int xp_awk_lex_fetch_token (xp_awk_lex_t* lex); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/ase/awk/parse.c b/ase/awk/parse.c new file mode 100644 index 00000000..1843689f --- /dev/null +++ b/ase/awk/parse.c @@ -0,0 +1,12 @@ +/* + * $Id: parse.c,v 1.1 2005-11-06 12:01:29 bacon Exp $ + */ + +#include + + +int xp_awk_parse (xp_awk_t* awk) +{ + + return -1; +}