*** empty log message ***

This commit is contained in:
hyung-hwan 2006-07-20 16:21:54 +00:00
parent 75a3eaeac0
commit 40b4aecca7
3 changed files with 207 additions and 49 deletions

View File

@ -1,5 +1,5 @@
/*
* $Id: rex.c,v 1.6 2006-07-20 03:41:00 bacon Exp $
* $Id: rex.c,v 1.7 2006-07-20 16:21:54 bacon Exp $
*/
#include <xp/awk/awk_i.h>
@ -26,12 +26,13 @@ enum
enum
{
CMD_BOL_CHAR,
CMD_EOL_CHAR,
CMD_ORD_CHAR,
CMD_BOL,
CMD_EOL,
CMD_ANY_CHAR,
CMD_ORD_CHAR,
CMD_CHAR_RANGE,
CMD_CHAR_CLASS
CMD_CHAR_CLASS,
CMD_GROUP
};
enum
@ -53,10 +54,10 @@ enum
struct __code
{
xp_byte_t cmd;
//xp_byte_t cmd;
int cmd;
xp_size_t lbound;
xp_size_t ubound;
xp_char_t cc; /* optional */
};
#define NEXT_CHAR(rex,level) \
@ -69,11 +70,15 @@ static int __compile_expression (xp_awk_rex_t* rex);
static int __compile_branch (xp_awk_rex_t* rex);
static int __compile_atom (xp_awk_rex_t* rex);
static int __compile_charset (xp_awk_rex_t* rex);
static int __compile_bound (xp_awk_rex_t* rex);
static int __compile_range (xp_awk_rex_t* rex);
static int __compile_bound (xp_awk_rex_t* rex, struct __code* cmd);
static int __compile_range (xp_awk_rex_t* rex, struct __code* cmd);
static int __next_char (xp_awk_rex_t* rex, int level);
static int __add_code (xp_awk_rex_t* rex, void* data, xp_size_t len);
static const xp_byte_t* __print_expression (const xp_byte_t* p);
static const xp_byte_t* __print_branch (const xp_byte_t* p);
static const xp_byte_t* __print_atom (const xp_byte_t* p);
xp_awk_rex_t* xp_awk_rex_open (xp_awk_rex_t* rex)
{
if (rex == XP_NULL)
@ -104,7 +109,6 @@ void xp_awk_rex_close (xp_awk_rex_t* rex)
int xp_awk_rex_compile (xp_awk_rex_t* rex, const xp_char_t* ptn, xp_size_t len)
{
rex->ptn.ptr = ptn;
rex->ptn.end = rex->ptn.ptr + len;
rex->ptn.curp = rex->ptn.ptr;
@ -136,9 +140,12 @@ xp_printf (XP_T("garbage after expression\n"));
static int __compile_expression (xp_awk_rex_t* rex)
{
xp_size_t zero = 0;
xp_size_t* nb, * el, * bl;
xp_size_t* nb, * el;
xp_size_t old_size;
int n;
old_size = rex->code.size;
/* secure space for header and set the header fields to zero */
nb = (xp_size_t*)&rex->code.buf[rex->code.size];
ADD_CODE (rex, &zero, xp_sizeof(zero));
@ -147,7 +154,6 @@ static int __compile_expression (xp_awk_rex_t* rex)
ADD_CODE (rex, &zero, xp_sizeof(zero));
/* handle the first branch */
bl = (xp_size_t*)&rex->code.buf[rex->code.size];
n = __compile_branch (rex);
if (n == -1) return -1;
if (n == 0)
@ -157,7 +163,6 @@ static int __compile_expression (xp_awk_rex_t* rex)
}
(*nb) += 1;
(*el) += *bl + xp_sizeof(*bl);
/* handle subsequent branches if any */
while (rex->ptn.curc.type == __SPECIAL &&
@ -165,7 +170,6 @@ static int __compile_expression (xp_awk_rex_t* rex)
{
NEXT_CHAR (rex, __TOP);
bl = (xp_size_t*)&rex->code.buf[rex->code.size];
n = __compile_branch(rex);
if (n == -1) return -1;
if (n == 0)
@ -179,26 +183,32 @@ static int __compile_expression (xp_awk_rex_t* rex)
}
(*nb) += 1;
(*el) += *bl + xp_sizeof(*bl);
}
*el = rex->code.size - old_size;
return 1;
}
static int __compile_branch (xp_awk_rex_t* rex)
{
int n;
xp_size_t* bl;
xp_size_t* na, * bl;
xp_size_t old_size;
xp_size_t zero = 0;
struct __code* cmd;
old_size = rex->code.size;
na = (xp_size_t*)&rex->code.buf[rex->code.size];
ADD_CODE (rex, &zero, xp_sizeof(zero));
bl = (xp_size_t*)&rex->code.buf[rex->code.size];
ADD_CODE (rex, &zero, xp_sizeof(zero));
while (1)
{
cmd = (struct __code*)&rex->code.buf[rex->code.size];
n = __compile_atom (rex);
if (n == -1)
{
@ -208,7 +218,7 @@ static int __compile_branch (xp_awk_rex_t* rex)
if (n == 0) break; /* no atom */
n = __compile_bound (rex);
n = __compile_bound (rex, cmd);
if (n == -1)
{
rex->code.size = old_size;
@ -217,27 +227,33 @@ static int __compile_branch (xp_awk_rex_t* rex)
/* n == 0 no bound character. just continue */
/* n == 1 bound has been applied by compile_bound */
(*na) += 1;
}
return 0;
*bl = rex->code.size - old_size;
return ((*na) == 0)? 0: 1;
}
static int __compile_atom (xp_awk_rex_t* rex)
{
int n = 0;
int n;
if (rex->ptn.curc.type == __EOF)
{
/* no atom */
return 0;
}
else if (rex->ptn.curc.type == __SPECIAL)
if (rex->ptn.curc.type == __EOF) return 0;
if (rex->ptn.curc.type == __SPECIAL)
{
if (rex->ptn.curc.value == XP_T('('))
{
// GROUP
struct __code tmp;
tmp.cmd = CMD_GROUP;
tmp.lbound = 1;
tmp.ubound = 1;
ADD_CODE (rex, &tmp, xp_sizeof(tmp));
NEXT_CHAR (rex, __TOP);
n = __compile_expression (rex);
if (n == -1) return -1;
@ -254,7 +270,7 @@ static int __compile_atom (xp_awk_rex_t* rex)
{
struct __code tmp;
tmp.cmd = CMD_BOL_CHAR;
tmp.cmd = CMD_BOL;
tmp.lbound = 1;
tmp.ubound = 1;
@ -265,7 +281,7 @@ static int __compile_atom (xp_awk_rex_t* rex)
{
struct __code tmp;
tmp.cmd = CMD_EOL_CHAR;
tmp.cmd = CMD_EOL;
tmp.lbound = 1;
tmp.ubound = 1;
@ -287,30 +303,26 @@ static int __compile_atom (xp_awk_rex_t* rex)
{
if (__compile_charset (rex) == -1) return -1;
}
else
{
/*invalid special character....*/
return -1;
}
else return 0;
return 1;
}
else
{
/* normal characters */
struct __code tmp;
xp_assert (rex->ptn.curc.type == __NORMAL);
tmp.cmd = CMD_ORD_CHAR;
tmp.lbound = 1;
tmp.ubound = 1;
ADD_CODE (rex, &tmp, xp_sizeof(tmp));
ADD_CODE (rex, &rex->ptn.curc, xp_sizeof(rex->ptn.curc));
ADD_CODE (rex, &rex->ptn.curc.value, xp_sizeof(rex->ptn.curc.value));
NEXT_CHAR (rex, __TOP);
return 1;
}
}
static int __compile_charset (xp_awk_rex_t* rex)
@ -318,7 +330,7 @@ static int __compile_charset (xp_awk_rex_t* rex)
return -1;
}
static int __compile_bound (xp_awk_rex_t* rex)
static int __compile_bound (xp_awk_rex_t* rex, struct __code* cmd)
{
if (rex->ptn.curc.type != __SPECIAL) return 0;
@ -326,28 +338,42 @@ static int __compile_bound (xp_awk_rex_t* rex)
{
case XP_T('+'):
{
//__apply_bound (1, MAX);
cmd->lbound = 1;
cmd->ubound = BOUND_MAX;
NEXT_CHAR(rex, __TOP);
return 1;
}
case XP_T('*'):
{
//__apply_bound (0, MAX);
cmd->lbound = 0;
cmd->ubound = BOUND_MAX;
NEXT_CHAR(rex, __TOP);
return 1;
}
case XP_T('?'):
{
//__apply_bound (0, 1);
cmd->lbound = 0;
cmd->ubound = 1;
NEXT_CHAR(rex, __TOP);
return 1;
}
case XP_T('{'):
{
if (__compile_range(rex) == -1) return -1;
NEXT_CHAR (rex, __IN_RANGE);
if (__compile_range(rex, cmd) == -1) return -1;
if (rex->ptn.curc.type != __SPECIAL ||
rex->ptn.curc.value != XP_T('}'))
{
// rex->errnum = XP_AWK_REX_ERBRACE
return -1;
}
NEXT_CHAR (rex, __TOP);
return 1;
}
}
@ -355,9 +381,40 @@ static int __compile_bound (xp_awk_rex_t* rex)
return 0;
}
static int __compile_range (xp_awk_rex_t* rex)
static int __compile_range (xp_awk_rex_t* rex, struct __code* cmd)
{
return -1;
xp_size_t bound;
// TODO: should allow white spaces in the range???
// what if it is not in the raight format? convert it to ordinary characters??
bound = 0;
while (rex->ptn.curc.type == __NORMAL &&
xp_isdigit(rex->ptn.curc.value))
{
bound = bound * 10 + rex->ptn.curc.value - XP_T('0');
NEXT_CHAR (rex, __IN_RANGE);
}
cmd->lbound = bound;
if (rex->ptn.curc.type == __SPECIAL &&
rex->ptn.curc.value == XP_T(','))
{
NEXT_CHAR (rex, __IN_RANGE);
bound = 0;
while (rex->ptn.curc.type == __NORMAL &&
xp_isdigit(rex->ptn.curc.value))
{
bound = bound * 10 + rex->ptn.curc.value - XP_T('0');
NEXT_CHAR (rex, __IN_RANGE);
}
cmd->ubound = bound;
}
else cmd->ubound = BOUND_MAX;
return 0;
}
static int __next_char (xp_awk_rex_t* rex, int level)
@ -461,3 +518,101 @@ static int __add_code (xp_awk_rex_t* rex, void* data, xp_size_t len)
return 0;
}
void xp_awk_rex_print (xp_awk_rex_t* rex)
{
const xp_byte_t* p;
p = __print_expression (rex->code.buf);
xp_printf (XP_T("\n"));
xp_assert (p == rex->code.buf + rex->code.size);
}
static const xp_byte_t* __print_expression (const xp_byte_t* p)
{
xp_size_t nb, el, i;
nb = *(xp_size_t*)p; p += xp_sizeof(nb);
el = *(xp_size_t*)p; p += xp_sizeof(el);
//xp_printf (XP_T("NA = %u, EL = %u\n"),
// (unsigned int)nb, (unsigned int)el);
for (i = 0; i < nb; i++)
{
if (i != 0) xp_printf (XP_T("|"));
p = __print_branch (p);
}
return p;
}
static const xp_byte_t* __print_branch (const xp_byte_t* p)
{
xp_size_t na, bl, i;
na = *(xp_size_t*)p; p += xp_sizeof(na);
bl = *(xp_size_t*)p; p += xp_sizeof(bl);
//xp_printf (XP_T("NA = %u, BL = %u\n"),
// (unsigned int) na, (unsigned int)bl);
for (i = 0; i < na; i++)
{
p = __print_atom (p);
}
return p;
}
static const xp_byte_t* __print_atom (const xp_byte_t* p)
{
struct __code* cp = (struct __code*)p;
if (cp->cmd == CMD_BOL)
{
xp_printf (XP_T("^"));
p += xp_sizeof(*cp);
}
else if (cp->cmd == CMD_EOL)
{
xp_printf (XP_T("$"));
p += xp_sizeof(*cp);
}
else if (cp->cmd == CMD_ANY_CHAR)
{
xp_printf (XP_T("."));
p += xp_sizeof(*cp);
}
else if (cp->cmd == CMD_ORD_CHAR)
{
p += xp_sizeof(*cp);
xp_printf (XP_T("%c"), *(xp_char_t*)p);
p += xp_sizeof(xp_char_t);
}
else if (cp->cmd == CMD_GROUP)
{
p += xp_sizeof(*cp);
xp_printf (XP_T("("));
p = __print_expression (p);
xp_printf (XP_T(")"));
}
else
{
xp_printf (XP_T("FUCK FUCK FUCK\n"));
}
if (cp->lbound == 0 && cp->ubound == BOUND_MAX)
xp_printf (XP_T("*"));
else if (cp->lbound == 1 && cp->ubound == BOUND_MAX)
xp_printf (XP_T("+"));
else if (cp->lbound == 0 && cp->ubound == 1)
xp_printf (XP_T("?"));
else if (cp->lbound != 1 || cp->ubound != 1)
{
xp_printf (XP_T("{%lu,%lu}"),
(unsigned long)cp->lbound, (unsigned long)cp->ubound);
}
return p;
}

View File

@ -1,5 +1,5 @@
/*
* $Id: rex.h,v 1.4 2006-07-20 03:41:00 bacon Exp $
* $Id: rex.h,v 1.5 2006-07-20 16:21:54 bacon Exp $
**/
#ifndef _XP_AWK_REX_H_
@ -17,12 +17,13 @@
*
* Compiled form of a regular expression:
*
* | expression |
* | header | branch | branch | branch |
* | nb | el | bl | cmd | arg | cmd | arg | bl | cmd | arg | bl | cmd |
* | expression |
* | header | branch | branch | branch |
* | nb | el | na | bl | cmd | arg | cmd | arg | na | bl | cmd | arg | na | bl | cmd |
*
* nb: the number of branches
* el: the length of a expression excluding the length of nb and el
* na: the number of atoms
* bl: the length of a branch excluding the length of bl
* cmd: The command and repetition info encoded together.
* Some commands require an argument to follow them but some other don't.
@ -67,6 +68,7 @@ extern "C" {
xp_awk_rex_t* xp_awk_rex_open (xp_awk_rex_t* rex);
void xp_awk_rex_close (xp_awk_rex_t* rex);
int xp_awk_rex_compile (xp_awk_rex_t* rex, const xp_char_t* ptn, xp_size_t len);
void xp_awk_rex_print (xp_awk_rex_t* rex);
#ifdef __cplusplus
}

View File

@ -23,7 +23,7 @@ int xp_main (int argc, const xp_char_t* argv[])
}
ptn = XP_T("^he.llo");
ptn = XP_T("^he.llo(jo(in|kk)s|com)+h*e{1,40}abc");
if (xp_awk_rex_compile (rex, ptn, xp_strlen(ptn)) == -1)
{
xp_printf (XP_T("cannot compile pattern...\n"));
@ -31,6 +31,7 @@ int xp_main (int argc, const xp_char_t* argv[])
return -1;
}
xp_awk_rex_print (rex);
xp_awk_rex_close (rex);
return 0;
}