diff --git a/ase/awk/awk.h b/ase/awk/awk.h index 0bbded98..62d58800 100644 --- a/ase/awk/awk.h +++ b/ase/awk/awk.h @@ -1,5 +1,5 @@ /* - * $Id: awk.h,v 1.78 2006-07-12 07:25:15 bacon Exp $ + * $Id: awk.h,v 1.79 2006-07-19 11:45:23 bacon Exp $ */ #ifndef _XP_AWK_AWK_H_ @@ -11,6 +11,7 @@ typedef struct xp_awk_t xp_awk_t; typedef struct xp_awk_val_t xp_awk_val_t; typedef struct xp_awk_extio_t xp_awk_extio_t; +typedef struct xp_awk_rex_t xp_awk_rex_t; typedef xp_ssize_t (*xp_awk_io_t) ( int cmd, int opt, void* arg, xp_char_t* data, xp_size_t count); diff --git a/ase/awk/rex.c b/ase/awk/rex.c index 9766e537..aa190568 100644 --- a/ase/awk/rex.c +++ b/ase/awk/rex.c @@ -1,5 +1,5 @@ /* - * $Id: rex.c,v 1.3 2006-07-18 15:28:26 bacon Exp $ + * $Id: rex.c,v 1.4 2006-07-19 11:45:23 bacon Exp $ */ #include @@ -11,6 +11,8 @@ enum { + CMD_BOL_CHAR, + CMD_EOL_CHAR, CMD_ORD_CHAR, CMD_ANY_CHAR, CMD_CHAR_RANGE, @@ -25,20 +27,39 @@ enum CMD_CHAR_CLASS_ALNUM }; -struct __code -{ - unsigned char cmd; - unsigned char bflag; /* bound flag */ - xp_char_t lbound; - xp_char_t ubound; -}; - #define PC_CMD(rex,base) (rex)->code[(base)].dc.cmd #define PC_BFLAG(rex,base) (rex)->code[(base)].dc.bflag #define PC_LBOUND(rex,base) (rex)->code[(base)].dc.lbound #define PC_UBOUND(rex,base) (rex)->code[(base)].dc.ubound #define PC_VALUE(rex,base) (rex)->code[(base)].cc +#define BOUND_MIN 0 +#define BOUND_MAX (XP_TYPE_MAX(xp_size_t)) + +struct __code +{ + xp_byte_t cmd; + xp_size_t lbound; + xp_size_t ubound; + xp_char_t cc; /* optional */ +}; + +#define AT_END(rex) ((rex)->ptn.curp >= (rex)->ptn.end) + +#define NEXT_CHAR(rex) \ + do { if (__next_char(rex) == -1) return -1; } while (0) + +#define ADD_CODE(rex,data,len) \ + do { if (__add_code(rex,data,len) == -1) return -1; } while (0) + +static int __compile_expression (xp_awk_rex_t* rex); +static int __compile_branch (xp_awk_rex_t* rex); +static int __compile_atom (xp_awk_rex_t* rex); +static int __compile_charset (xp_awk_rex_t* rex); +static int __compile_bound (xp_awk_rex_t* rex); +static int __next_char (xp_awk_rex_t* rex); +static int __add_code (xp_awk_rex_t* rex, void* data, xp_size_t len); + xp_awk_rex_t* xp_awk_rex_open (xp_awk_rex_t* rex) { if (rex == XP_NULL) @@ -49,105 +70,265 @@ xp_awk_rex_t* xp_awk_rex_open (xp_awk_rex_t* rex) } else rex->__dynamic = xp_false; + rex->code.capa = 512; + rex->code.size = 0; + rex->code.buf = (xp_byte_t*) xp_malloc (rex->code.capa); + if (rex->code.buf == XP_NULL) + { + if (rex->__dynamic) xp_free (rex); + return XP_NULL; + } + return rex; } void xp_awk_rex_close (xp_awk_rex_t* rex) { + xp_free (rex->code.buf); if (rex->__dynamic) xp_free (rex); } -int xp_awk_rex_compile (xp_awk_rex_t* rex, const xp_char_t* ptn) +int xp_awk_rex_compile (xp_awk_rex_t* rex, const xp_char_t* ptn, xp_size_t len) { - const xp_char_t* p = ptn; - xp_char_t c; + rex->ptn.ptr = ptn; + rex->ptn.end = rex->ptn.ptr + len; + rex->ptn.curp = rex->ptn.ptr; + rex->ptn.curc = XP_CHAR_EOF; - rex->ptn = ptn; + rex->code.size = 0; - while (*p != XP_T('\0')) + NEXT_CHAR (rex); + if (AT_END(rex)) return 0; /* empty pattern */ + + if (__compile_expression (rex) == -1) { - c = *p++; // TODO: backspace escaping... - - if (c == XP_T('|')) - { - } - + /* TODO: clear expression */ +xp_printf (XP_T("fuck ........ \n")); + return -1; } - return -1; + if (!AT_END(rex)) + { + /* TODO: error handling */ + /* garbage after the expression */ +xp_printf (XP_T("garbage after expression\n")); + return -1; + } + + return 0; } -int __compile_expression (xp_awk_rex_t* rex) +static int __compile_expression (xp_awk_rex_t* rex) { if (__compile_branch (rex) == -1) return -1; - while (rex->curc == VBAR) + while (!AT_END(rex) && rex->ptn.curc == XP_T('|')) { - GET_NEXT_CHAR (rex); + NEXT_CHAR (rex); - branch_base = rex->code_size; - if (compie_branch(rex) == -1) return -1; + //branch_base = rex->code_size; + if (__compile_branch(rex) == -1) return -1; + /* rex->code[branch_base]++; rex->code[len_base] += xxxxx; + */ } + + return 0; } -int __compile_branch (xp_awk_rex_t* rex) +static int __compile_branch (xp_awk_rex_t* rex) { + int n; - while (1) + while (!AT_END(rex)) { - atom_base = rex->code_size; + //atom_base = rex->code_size; - n = compile_atom (); + n = __compile_atom (rex); if (n == -1) return -1; if (n == 1) break; - c = rex->curc; - if (c == PLUS) /* + */ + if (AT_END(rex)) break; + + switch (rex->ptn.curc) { - __apply_bound (1, MAX); - get_next_char (); - } - else if (c == STAR) /* * */ - { - __apply_bound (0, MAX); - get_next_char (); - } - else if (c == QUEST) /* ? */ - { - __apply_bound (0, 1); - get_next_char (); - } - else if (c == LBRACE) /* { */ - { - if (__compile_bound(rex) == -1) return -1; + case XP_T('+'): + { + //__apply_bound (1, MAX); + NEXT_CHAR (rex); + break; + } + + case XP_T('*'): + { + //__apply_bound (0, MAX); + NEXT_CHAR (rex); + break; + } + + case XP_T('?'): + { + //__apply_bound (0, 1); + NEXT_CHAR (rex); + break; + } + + case XP_T('{'): + { + if (__compile_bound(rex) == -1) return -1; + break; + } } } + + return 0; } -int __compile_atom (xp_awk_rex_t* rex) +static int __compile_atom (xp_awk_rex_t* rex) { - xp_char_t c; + int n = 0; - c = rex->curc; - - if (c == LPAREN) + if (rex->ptn.curc == XP_T('(')) { + // GROUP + NEXT_CHAR (rex); + + n = __compile_expression (rex); + if (n == -1) return -1; + if (rex->ptn.curc != ')') + { + // rex->errnum = XP_AWK_REX_ERPAREN; + return -1; + } + + NEXT_CHAR (rex); } else { - if (c == CARET) - else if (c == DOLLAR) - else if (c == PERIOD) - else if (c == LBRACKET) - else if (....) + xp_size_t index = rex->code.size; + + if (rex->ptn.curc == XP_T('^')) + { + struct __code tmp; + + tmp.cmd = CMD_BOL_CHAR; + tmp.lbound = 1; + tmp.ubound = 1; + + ADD_CODE (rex, &tmp, xp_sizeof(tmp)); + NEXT_CHAR (rex); + } + else if (rex->ptn.curc == XP_T('$')) + { + struct __code tmp; + + tmp.cmd = CMD_EOL_CHAR; + tmp.lbound = 1; + tmp.ubound = 1; + + ADD_CODE (rex, &tmp, xp_sizeof(tmp)); + NEXT_CHAR (rex); + } + else if (rex->ptn.curc == XP_T('.')) + { + struct __code tmp; + + tmp.cmd = CMD_ANY_CHAR; + tmp.lbound = 1; + tmp.ubound = 1; + + ADD_CODE (rex, &tmp, xp_sizeof(tmp)); + NEXT_CHAR (rex); + } + else if (rex->ptn.curc == XP_T('[')) + { + if (__compile_charset (rex) == -1) return -1; + } + else + { + struct __code tmp; + + tmp.cmd = CMD_ORD_CHAR; + tmp.lbound = 1; + tmp.ubound = 1; + + ADD_CODE (rex, &tmp, xp_sizeof(tmp)); + ADD_CODE (rex, &rex->ptn.curc, xp_sizeof(rex->ptn.curc)); + NEXT_CHAR (rex); + } + } - return n; + return 0; } -int __compile_bound (xp_awk_rex_t* rex) +static int __compile_charset (xp_awk_rex_t* rex) { + return -1; +} + +static int __compile_bound (xp_awk_rex_t* rex) +{ + return -1; +} + +static int __next_char (xp_awk_rex_t* rex) +{ + if (AT_END(rex)) + { +xp_printf (XP_T("XP_AWK_REX_EEOF\n")); + //rex->errnum = XP_AWK_REX_EEOF; + return -1; + } + + rex->ptn.curc = *rex->ptn.curp++; +xp_printf (XP_T("[%c]\n"), rex->ptn.curc); + if (rex->ptn.curc == XP_T('\\')) + { + if (rex->ptn.curp >= rex->ptn.end) + { + /* unexpected end of expression */ + //rex->errnum = XP_AWK_REX_EEND; + return -1; + } + + rex->ptn.curc = *rex->ptn.curp++; + + /* TODO: verify this part */ + if (rex->ptn.curc == XP_T('n')) rex->ptn.curc = XP_T('\n'); + else if (rex->ptn.curc == XP_T('r')) rex->ptn.curc = XP_T('\r'); + else if (rex->ptn.curc == XP_T('t')) rex->ptn.curc = XP_T('\t'); + } + + return 0; +} + +static int __add_code (xp_awk_rex_t* rex, void* data, xp_size_t len) +{ + if (len > rex->code.capa - rex->code.size) + { + xp_size_t capa = rex->code.capa * 2; + xp_byte_t* tmp; + + if (capa == 0) capa = 1; + while (len > capa - rex->code.size) { capa = capa * 2; } + + tmp = (xp_byte_t*) xp_realloc (rex->code.buf, capa); + if (tmp == XP_NULL) + { + /* TODO: */ + /*rex->errnum = XP_AWK_REX_ENOMEM;*/ + return -1; + } + + rex->code.buf = tmp; + rex->code.capa = capa; + } + + xp_memcpy (&rex->code.buf[rex->code.size], data, len); + rex->code.size += len; + + return 0; } diff --git a/ase/awk/rex.h b/ase/awk/rex.h index d722b4fc..ec7ef770 100644 --- a/ase/awk/rex.h +++ b/ase/awk/rex.h @@ -1,5 +1,5 @@ /* - * $Id: rex.h,v 1.1 2006-07-17 06:21:39 bacon Exp $ + * $Id: rex.h,v 1.2 2006-07-19 11:45:23 bacon Exp $ **/ #ifndef _XP_AWK_REX_H_ @@ -9,10 +9,23 @@ #error Never include this file directly. Include instead #endif -typedef struct xp_awk_rex_t xp_awk_rex_t; - struct xp_awk_rex_t { + struct + { + const xp_char_t* ptr; + const xp_char_t* end; + const xp_char_t* curp; + xp_char_t curc; + } ptn; + + struct + { + xp_byte_t* buf; + xp_size_t size; + xp_size_t capa; + } code; + xp_bool_t __dynamic; }; @@ -22,7 +35,7 @@ extern "C" { xp_awk_rex_t* xp_awk_rex_open (xp_awk_rex_t* rex); void xp_awk_rex_close (xp_awk_rex_t* rex); -int xp_awk_rex_compile (const xp_awk_rex_t* rex, const xp_char_t* ptn); +int xp_awk_rex_compile (xp_awk_rex_t* rex, const xp_char_t* ptn, xp_size_t len); #ifdef __cplusplus } diff --git a/ase/test/awk/makefile.cl b/ase/test/awk/makefile.cl index ebe8cd34..8dfa06c4 100644 --- a/ase/test/awk/makefile.cl +++ b/ase/test/awk/makefile.cl @@ -4,13 +4,16 @@ CFLAGS = /nologo /MT /W3 /GR- /D_WIN32_WINNT=0x0400 -I..\..\.. -D__STAND_ALONE LDFLAGS = /libpath:..\..\bas /libpath:..\..\awk LIBS = xpawk.lib user32.lib -all: awk +all: awk rex awk: awk.obj link /nologo /out:awk.exe $(LDFLAGS) $(LIBS) awk.obj +rex: rex.obj + link /nologo /out:rex.exe $(LDFLAGS) $(LIBS) rex.obj + clean: - del $(OBJS) *.obj awk.exe + del $(OBJS) *.obj awk.exe rex.exe .SUFFIXES: .c .obj .c.obj: diff --git a/ase/test/awk/rex.c b/ase/test/awk/rex.c new file mode 100644 index 00000000..822797f2 --- /dev/null +++ b/ase/test/awk/rex.c @@ -0,0 +1,36 @@ +#include + +#ifdef __STAND_ALONE + #define xp_printf xp_awk_printf + extern int xp_awk_printf (const xp_char_t* fmt, ...); + #define xp_strcmp xp_awk_strcmp + extern int xp_awk_strcmp (const xp_char_t* s1, const xp_char_t* s2); + #define xp_strlen xp_awk_strlen + extern int xp_awk_strlen (const xp_char_t* s); +#endif + +int xp_main (int argc, const xp_char_t* argv[]) +{ + + xp_awk_rex_t* rex; + const xp_char_t* ptn; + + rex = xp_awk_rex_open (XP_NULL); + if (rex == XP_NULL) + { + xp_printf (XP_T("rex open failed\n")); + return -1; + } + + + ptn = XP_T("^he.llo"); + if (xp_awk_rex_compile (rex, ptn, xp_strlen(ptn)) == -1) + { + xp_printf (XP_T("cannot compile pattern...\n")); + xp_awk_rex_close (rex); + return -1; + } + + xp_awk_rex_close (rex); + return 0; +}