*** empty log message ***

This commit is contained in:
hyung-hwan 2006-07-19 11:45:24 +00:00
parent 6df65679cd
commit 25da30c536
5 changed files with 301 additions and 67 deletions

View File

@ -1,5 +1,5 @@
/* /*
* $Id: awk.h,v 1.78 2006-07-12 07:25:15 bacon Exp $ * $Id: awk.h,v 1.79 2006-07-19 11:45:23 bacon Exp $
*/ */
#ifndef _XP_AWK_AWK_H_ #ifndef _XP_AWK_AWK_H_
@ -11,6 +11,7 @@
typedef struct xp_awk_t xp_awk_t; typedef struct xp_awk_t xp_awk_t;
typedef struct xp_awk_val_t xp_awk_val_t; typedef struct xp_awk_val_t xp_awk_val_t;
typedef struct xp_awk_extio_t xp_awk_extio_t; typedef struct xp_awk_extio_t xp_awk_extio_t;
typedef struct xp_awk_rex_t xp_awk_rex_t;
typedef xp_ssize_t (*xp_awk_io_t) ( typedef xp_ssize_t (*xp_awk_io_t) (
int cmd, int opt, void* arg, xp_char_t* data, xp_size_t count); int cmd, int opt, void* arg, xp_char_t* data, xp_size_t count);

View File

@ -1,5 +1,5 @@
/* /*
* $Id: rex.c,v 1.3 2006-07-18 15:28:26 bacon Exp $ * $Id: rex.c,v 1.4 2006-07-19 11:45:23 bacon Exp $
*/ */
#include <xp/awk/awk_i.h> #include <xp/awk/awk_i.h>
@ -11,6 +11,8 @@
enum enum
{ {
CMD_BOL_CHAR,
CMD_EOL_CHAR,
CMD_ORD_CHAR, CMD_ORD_CHAR,
CMD_ANY_CHAR, CMD_ANY_CHAR,
CMD_CHAR_RANGE, CMD_CHAR_RANGE,
@ -25,20 +27,39 @@ enum
CMD_CHAR_CLASS_ALNUM CMD_CHAR_CLASS_ALNUM
}; };
struct __code
{
unsigned char cmd;
unsigned char bflag; /* bound flag */
xp_char_t lbound;
xp_char_t ubound;
};
#define PC_CMD(rex,base) (rex)->code[(base)].dc.cmd #define PC_CMD(rex,base) (rex)->code[(base)].dc.cmd
#define PC_BFLAG(rex,base) (rex)->code[(base)].dc.bflag #define PC_BFLAG(rex,base) (rex)->code[(base)].dc.bflag
#define PC_LBOUND(rex,base) (rex)->code[(base)].dc.lbound #define PC_LBOUND(rex,base) (rex)->code[(base)].dc.lbound
#define PC_UBOUND(rex,base) (rex)->code[(base)].dc.ubound #define PC_UBOUND(rex,base) (rex)->code[(base)].dc.ubound
#define PC_VALUE(rex,base) (rex)->code[(base)].cc #define PC_VALUE(rex,base) (rex)->code[(base)].cc
#define BOUND_MIN 0
#define BOUND_MAX (XP_TYPE_MAX(xp_size_t))
struct __code
{
xp_byte_t cmd;
xp_size_t lbound;
xp_size_t ubound;
xp_char_t cc; /* optional */
};
#define AT_END(rex) ((rex)->ptn.curp >= (rex)->ptn.end)
#define NEXT_CHAR(rex) \
do { if (__next_char(rex) == -1) return -1; } while (0)
#define ADD_CODE(rex,data,len) \
do { if (__add_code(rex,data,len) == -1) return -1; } while (0)
static int __compile_expression (xp_awk_rex_t* rex);
static int __compile_branch (xp_awk_rex_t* rex);
static int __compile_atom (xp_awk_rex_t* rex);
static int __compile_charset (xp_awk_rex_t* rex);
static int __compile_bound (xp_awk_rex_t* rex);
static int __next_char (xp_awk_rex_t* rex);
static int __add_code (xp_awk_rex_t* rex, void* data, xp_size_t len);
xp_awk_rex_t* xp_awk_rex_open (xp_awk_rex_t* rex) xp_awk_rex_t* xp_awk_rex_open (xp_awk_rex_t* rex)
{ {
if (rex == XP_NULL) if (rex == XP_NULL)
@ -49,105 +70,265 @@ xp_awk_rex_t* xp_awk_rex_open (xp_awk_rex_t* rex)
} }
else rex->__dynamic = xp_false; else rex->__dynamic = xp_false;
rex->code.capa = 512;
rex->code.size = 0;
rex->code.buf = (xp_byte_t*) xp_malloc (rex->code.capa);
if (rex->code.buf == XP_NULL)
{
if (rex->__dynamic) xp_free (rex);
return XP_NULL;
}
return rex; return rex;
} }
void xp_awk_rex_close (xp_awk_rex_t* rex) void xp_awk_rex_close (xp_awk_rex_t* rex)
{ {
xp_free (rex->code.buf);
if (rex->__dynamic) xp_free (rex); if (rex->__dynamic) xp_free (rex);
} }
int xp_awk_rex_compile (xp_awk_rex_t* rex, const xp_char_t* ptn) int xp_awk_rex_compile (xp_awk_rex_t* rex, const xp_char_t* ptn, xp_size_t len)
{ {
const xp_char_t* p = ptn; rex->ptn.ptr = ptn;
xp_char_t c; rex->ptn.end = rex->ptn.ptr + len;
rex->ptn.curp = rex->ptn.ptr;
rex->ptn.curc = XP_CHAR_EOF;
rex->ptn = ptn; rex->code.size = 0;
while (*p != XP_T('\0')) NEXT_CHAR (rex);
if (AT_END(rex)) return 0; /* empty pattern */
if (__compile_expression (rex) == -1)
{ {
c = *p++; // TODO: backspace escaping... /* TODO: clear expression */
xp_printf (XP_T("fuck ........ \n"));
if (c == XP_T('|')) return -1;
{
}
} }
return -1; if (!AT_END(rex))
{
/* TODO: error handling */
/* garbage after the expression */
xp_printf (XP_T("garbage after expression\n"));
return -1;
}
return 0;
} }
int __compile_expression (xp_awk_rex_t* rex) static int __compile_expression (xp_awk_rex_t* rex)
{ {
if (__compile_branch (rex) == -1) return -1; if (__compile_branch (rex) == -1) return -1;
while (rex->curc == VBAR) while (!AT_END(rex) && rex->ptn.curc == XP_T('|'))
{ {
GET_NEXT_CHAR (rex); NEXT_CHAR (rex);
branch_base = rex->code_size; //branch_base = rex->code_size;
if (compie_branch(rex) == -1) return -1; if (__compile_branch(rex) == -1) return -1;
/*
rex->code[branch_base]++; rex->code[branch_base]++;
rex->code[len_base] += xxxxx; rex->code[len_base] += xxxxx;
*/
} }
return 0;
} }
int __compile_branch (xp_awk_rex_t* rex) static int __compile_branch (xp_awk_rex_t* rex)
{ {
int n;
while (1) while (!AT_END(rex))
{ {
atom_base = rex->code_size; //atom_base = rex->code_size;
n = compile_atom (); n = __compile_atom (rex);
if (n == -1) return -1; if (n == -1) return -1;
if (n == 1) break; if (n == 1) break;
c = rex->curc; if (AT_END(rex)) break;
if (c == PLUS) /* + */
switch (rex->ptn.curc)
{ {
__apply_bound (1, MAX); case XP_T('+'):
get_next_char (); {
} //__apply_bound (1, MAX);
else if (c == STAR) /* * */ NEXT_CHAR (rex);
{ break;
__apply_bound (0, MAX); }
get_next_char ();
} case XP_T('*'):
else if (c == QUEST) /* ? */ {
{ //__apply_bound (0, MAX);
__apply_bound (0, 1); NEXT_CHAR (rex);
get_next_char (); break;
} }
else if (c == LBRACE) /* { */
{ case XP_T('?'):
if (__compile_bound(rex) == -1) return -1; {
//__apply_bound (0, 1);
NEXT_CHAR (rex);
break;
}
case XP_T('{'):
{
if (__compile_bound(rex) == -1) return -1;
break;
}
} }
} }
return 0;
} }
int __compile_atom (xp_awk_rex_t* rex) static int __compile_atom (xp_awk_rex_t* rex)
{ {
xp_char_t c; int n = 0;
c = rex->curc; if (rex->ptn.curc == XP_T('('))
if (c == LPAREN)
{ {
// GROUP
NEXT_CHAR (rex);
n = __compile_expression (rex);
if (n == -1) return -1;
if (rex->ptn.curc != ')')
{
// rex->errnum = XP_AWK_REX_ERPAREN;
return -1;
}
NEXT_CHAR (rex);
} }
else else
{ {
if (c == CARET) xp_size_t index = rex->code.size;
else if (c == DOLLAR)
else if (c == PERIOD) if (rex->ptn.curc == XP_T('^'))
else if (c == LBRACKET) {
else if (....) struct __code tmp;
tmp.cmd = CMD_BOL_CHAR;
tmp.lbound = 1;
tmp.ubound = 1;
ADD_CODE (rex, &tmp, xp_sizeof(tmp));
NEXT_CHAR (rex);
}
else if (rex->ptn.curc == XP_T('$'))
{
struct __code tmp;
tmp.cmd = CMD_EOL_CHAR;
tmp.lbound = 1;
tmp.ubound = 1;
ADD_CODE (rex, &tmp, xp_sizeof(tmp));
NEXT_CHAR (rex);
}
else if (rex->ptn.curc == XP_T('.'))
{
struct __code tmp;
tmp.cmd = CMD_ANY_CHAR;
tmp.lbound = 1;
tmp.ubound = 1;
ADD_CODE (rex, &tmp, xp_sizeof(tmp));
NEXT_CHAR (rex);
}
else if (rex->ptn.curc == XP_T('['))
{
if (__compile_charset (rex) == -1) return -1;
}
else
{
struct __code tmp;
tmp.cmd = CMD_ORD_CHAR;
tmp.lbound = 1;
tmp.ubound = 1;
ADD_CODE (rex, &tmp, xp_sizeof(tmp));
ADD_CODE (rex, &rex->ptn.curc, xp_sizeof(rex->ptn.curc));
NEXT_CHAR (rex);
}
} }
return n; return 0;
} }
int __compile_bound (xp_awk_rex_t* rex) static int __compile_charset (xp_awk_rex_t* rex)
{ {
return -1;
}
static int __compile_bound (xp_awk_rex_t* rex)
{
return -1;
}
static int __next_char (xp_awk_rex_t* rex)
{
if (AT_END(rex))
{
xp_printf (XP_T("XP_AWK_REX_EEOF\n"));
//rex->errnum = XP_AWK_REX_EEOF;
return -1;
}
rex->ptn.curc = *rex->ptn.curp++;
xp_printf (XP_T("[%c]\n"), rex->ptn.curc);
if (rex->ptn.curc == XP_T('\\'))
{
if (rex->ptn.curp >= rex->ptn.end)
{
/* unexpected end of expression */
//rex->errnum = XP_AWK_REX_EEND;
return -1;
}
rex->ptn.curc = *rex->ptn.curp++;
/* TODO: verify this part */
if (rex->ptn.curc == XP_T('n')) rex->ptn.curc = XP_T('\n');
else if (rex->ptn.curc == XP_T('r')) rex->ptn.curc = XP_T('\r');
else if (rex->ptn.curc == XP_T('t')) rex->ptn.curc = XP_T('\t');
}
return 0;
}
static int __add_code (xp_awk_rex_t* rex, void* data, xp_size_t len)
{
if (len > rex->code.capa - rex->code.size)
{
xp_size_t capa = rex->code.capa * 2;
xp_byte_t* tmp;
if (capa == 0) capa = 1;
while (len > capa - rex->code.size) { capa = capa * 2; }
tmp = (xp_byte_t*) xp_realloc (rex->code.buf, capa);
if (tmp == XP_NULL)
{
/* TODO: */
/*rex->errnum = XP_AWK_REX_ENOMEM;*/
return -1;
}
rex->code.buf = tmp;
rex->code.capa = capa;
}
xp_memcpy (&rex->code.buf[rex->code.size], data, len);
rex->code.size += len;
return 0;
} }

View File

@ -1,5 +1,5 @@
/* /*
* $Id: rex.h,v 1.1 2006-07-17 06:21:39 bacon Exp $ * $Id: rex.h,v 1.2 2006-07-19 11:45:23 bacon Exp $
**/ **/
#ifndef _XP_AWK_REX_H_ #ifndef _XP_AWK_REX_H_
@ -9,10 +9,23 @@
#error Never include this file directly. Include <xp/awk/awk.h> instead #error Never include this file directly. Include <xp/awk/awk.h> instead
#endif #endif
typedef struct xp_awk_rex_t xp_awk_rex_t;
struct xp_awk_rex_t struct xp_awk_rex_t
{ {
struct
{
const xp_char_t* ptr;
const xp_char_t* end;
const xp_char_t* curp;
xp_char_t curc;
} ptn;
struct
{
xp_byte_t* buf;
xp_size_t size;
xp_size_t capa;
} code;
xp_bool_t __dynamic; xp_bool_t __dynamic;
}; };
@ -22,7 +35,7 @@ extern "C" {
xp_awk_rex_t* xp_awk_rex_open (xp_awk_rex_t* rex); xp_awk_rex_t* xp_awk_rex_open (xp_awk_rex_t* rex);
void xp_awk_rex_close (xp_awk_rex_t* rex); void xp_awk_rex_close (xp_awk_rex_t* rex);
int xp_awk_rex_compile (const xp_awk_rex_t* rex, const xp_char_t* ptn); int xp_awk_rex_compile (xp_awk_rex_t* rex, const xp_char_t* ptn, xp_size_t len);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -4,13 +4,16 @@ CFLAGS = /nologo /MT /W3 /GR- /D_WIN32_WINNT=0x0400 -I..\..\.. -D__STAND_ALONE
LDFLAGS = /libpath:..\..\bas /libpath:..\..\awk LDFLAGS = /libpath:..\..\bas /libpath:..\..\awk
LIBS = xpawk.lib user32.lib LIBS = xpawk.lib user32.lib
all: awk all: awk rex
awk: awk.obj awk: awk.obj
link /nologo /out:awk.exe $(LDFLAGS) $(LIBS) awk.obj link /nologo /out:awk.exe $(LDFLAGS) $(LIBS) awk.obj
rex: rex.obj
link /nologo /out:rex.exe $(LDFLAGS) $(LIBS) rex.obj
clean: clean:
del $(OBJS) *.obj awk.exe del $(OBJS) *.obj awk.exe rex.exe
.SUFFIXES: .c .obj .SUFFIXES: .c .obj
.c.obj: .c.obj:

36
ase/test/awk/rex.c Normal file
View File

@ -0,0 +1,36 @@
#include <xp/awk/awk.h>
#ifdef __STAND_ALONE
#define xp_printf xp_awk_printf
extern int xp_awk_printf (const xp_char_t* fmt, ...);
#define xp_strcmp xp_awk_strcmp
extern int xp_awk_strcmp (const xp_char_t* s1, const xp_char_t* s2);
#define xp_strlen xp_awk_strlen
extern int xp_awk_strlen (const xp_char_t* s);
#endif
int xp_main (int argc, const xp_char_t* argv[])
{
xp_awk_rex_t* rex;
const xp_char_t* ptn;
rex = xp_awk_rex_open (XP_NULL);
if (rex == XP_NULL)
{
xp_printf (XP_T("rex open failed\n"));
return -1;
}
ptn = XP_T("^he.llo");
if (xp_awk_rex_compile (rex, ptn, xp_strlen(ptn)) == -1)
{
xp_printf (XP_T("cannot compile pattern...\n"));
xp_awk_rex_close (rex);
return -1;
}
xp_awk_rex_close (rex);
return 0;
}