*** empty log message ***

This commit is contained in:
hyung-hwan 2006-07-19 15:58:01 +00:00
parent 25da30c536
commit 24ff585ae8
2 changed files with 85 additions and 15 deletions

View File

@ -1,5 +1,5 @@
/*
* $Id: rex.c,v 1.4 2006-07-19 11:45:23 bacon Exp $
* $Id: rex.c,v 1.5 2006-07-19 15:58:01 bacon Exp $
*/
#include <xp/awk/awk_i.h>
@ -90,6 +90,7 @@ void xp_awk_rex_close (xp_awk_rex_t* rex)
int xp_awk_rex_compile (xp_awk_rex_t* rex, const xp_char_t* ptn, xp_size_t len)
{
rex->ptn.ptr = ptn;
rex->ptn.end = rex->ptn.ptr + len;
rex->ptn.curp = rex->ptn.ptr;
@ -98,7 +99,13 @@ int xp_awk_rex_compile (xp_awk_rex_t* rex, const xp_char_t* ptn, xp_size_t len)
rex->code.size = 0;
NEXT_CHAR (rex);
if (AT_END(rex)) return 0; /* empty pattern */
if (AT_END(rex))
{
xp_size_t tmp = 0;
ADD_CODE (rex, &tmp, xp_sizeof(tmp)); /* nb */
ADD_CODE (rex, &tmp, xp_sizeof(tmp)); /* el */
return 0; /* empty pattern */
}
if (__compile_expression (rex) == -1)
{
@ -120,19 +127,30 @@ xp_printf (XP_T("garbage after expression\n"));
static int __compile_expression (xp_awk_rex_t* rex)
{
xp_size_t zero = 0;
xp_size_t* nb, * el, * bl;
nb = (xp_size_t*)&rex->code.buf[rex->code.size];
ADD_CODE (rex, &zero, xp_sizeof(zero));
el = (xp_size_t*)&rex->code.buf[rex->code.size];
ADD_CODE (rex, &zero, xp_sizeof(zero));
bl = (xp_size_t*)&rex->code.buf[rex->code.size];
if (__compile_branch (rex) == -1) return -1;
(*nb) += 1;
(*el) += *bl + xp_sizeof(*bl);
while (!AT_END(rex) && rex->ptn.curc == XP_T('|'))
{
NEXT_CHAR (rex);
//branch_base = rex->code_size;
bl = (xp_size_t*)&rex->code.buf[rex->code.size];
if (__compile_branch(rex) == -1) return -1;
/*
rex->code[branch_base]++;
rex->code[len_base] += xxxxx;
*/
(*nb) += 1;
(*el) += *bl + xp_sizeof(*bl);
}
return 0;
@ -141,13 +159,22 @@ static int __compile_expression (xp_awk_rex_t* rex)
static int __compile_branch (xp_awk_rex_t* rex)
{
int n;
xp_size_t* bl;
xp_size_t old_size;
old_size = rex->code.size;
bl = (xp_size_t*)&rex->code.buf[rex->code.size];
ADD_CODE (rex, zero, xp_sizeof(zero));
while (!AT_END(rex))
{
//atom_base = rex->code_size;
n = __compile_atom (rex);
if (n == -1) return -1;
if (n == -1)
{
rex->code.size = old_size;
return -1;
}
if (n == 1) break;
if (AT_END(rex)) break;
@ -157,27 +184,43 @@ static int __compile_branch (xp_awk_rex_t* rex)
case XP_T('+'):
{
//__apply_bound (1, MAX);
NEXT_CHAR (rex);
if (__next_char(rex) == -1)
{
rex->code.size = old_size;
return -1;
}
break;
}
case XP_T('*'):
{
//__apply_bound (0, MAX);
NEXT_CHAR (rex);
if (__next_char(rex) == -1)
{
rex->code.size = old_size;
return -1;
}
break;
}
case XP_T('?'):
{
//__apply_bound (0, 1);
NEXT_CHAR (rex);
if (__next_char(rex) == -1)
{
rex->code.size = old_size;
return -1;
}
break;
}
case XP_T('{'):
{
if (__compile_bound(rex) == -1) return -1;
if (__compile_bound(rex) == -1)
{
rex->code.size = old_size;
return -1;
}
break;
}
}

View File

@ -1,5 +1,5 @@
/*
* $Id: rex.h,v 1.2 2006-07-19 11:45:23 bacon Exp $
* $Id: rex.h,v 1.3 2006-07-19 15:58:01 bacon Exp $
**/
#ifndef _XP_AWK_REX_H_
@ -9,6 +9,33 @@
#error Never include this file directly. Include <xp/awk/awk.h> instead
#endif
/*
* Regular Expression Syntax
* A regular expression is zero or more branches, separated by '|'.
* ......
* ......
*
* Compiled form of a regular expression:
*
* | expression |
* | header | branch | branch | branch |
* | nb | el | bl | cmd | arg | cmd | arg | bl | cmd | arg | bl | cmd |
*
* nb: the number of branches
* el: the length of a expression excluding the length of nb and el
* bl: the length of a branch excluding the length of bl
* cmd: The command and repetition info encoded together.
* Some commands require an argument to follow them but some other don't.
* It is encoded as follows:
*
* Subexpressions can be nested by having the command "GROUP"
* and a subexpression as its argument.
*
* Examples:
* a.c -> |1|6|5|ORD_CHAR(no bound)|a|ANY_CHAR(no bound)|ORD_CHAR(no bound)|c|
* ab|xy -> |2|10|4|ORD_CHAR(no bound)|a|ORD_CHAR(no bound)|b|4|ORD_CHAR(no bound)|x|ORD_CHAR(no bound)|y|
*/
struct xp_awk_rex_t
{
struct