*** empty log message ***
This commit is contained in:
parent
25da30c536
commit
24ff585ae8
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: rex.c,v 1.4 2006-07-19 11:45:23 bacon Exp $
|
||||
* $Id: rex.c,v 1.5 2006-07-19 15:58:01 bacon Exp $
|
||||
*/
|
||||
|
||||
#include <xp/awk/awk_i.h>
|
||||
@ -90,6 +90,7 @@ void xp_awk_rex_close (xp_awk_rex_t* rex)
|
||||
|
||||
int xp_awk_rex_compile (xp_awk_rex_t* rex, const xp_char_t* ptn, xp_size_t len)
|
||||
{
|
||||
|
||||
rex->ptn.ptr = ptn;
|
||||
rex->ptn.end = rex->ptn.ptr + len;
|
||||
rex->ptn.curp = rex->ptn.ptr;
|
||||
@ -98,7 +99,13 @@ int xp_awk_rex_compile (xp_awk_rex_t* rex, const xp_char_t* ptn, xp_size_t len)
|
||||
rex->code.size = 0;
|
||||
|
||||
NEXT_CHAR (rex);
|
||||
if (AT_END(rex)) return 0; /* empty pattern */
|
||||
if (AT_END(rex))
|
||||
{
|
||||
xp_size_t tmp = 0;
|
||||
ADD_CODE (rex, &tmp, xp_sizeof(tmp)); /* nb */
|
||||
ADD_CODE (rex, &tmp, xp_sizeof(tmp)); /* el */
|
||||
return 0; /* empty pattern */
|
||||
}
|
||||
|
||||
if (__compile_expression (rex) == -1)
|
||||
{
|
||||
@ -120,19 +127,30 @@ xp_printf (XP_T("garbage after expression\n"));
|
||||
|
||||
static int __compile_expression (xp_awk_rex_t* rex)
|
||||
{
|
||||
xp_size_t zero = 0;
|
||||
xp_size_t* nb, * el, * bl;
|
||||
|
||||
nb = (xp_size_t*)&rex->code.buf[rex->code.size];
|
||||
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
||||
|
||||
el = (xp_size_t*)&rex->code.buf[rex->code.size];
|
||||
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
||||
|
||||
bl = (xp_size_t*)&rex->code.buf[rex->code.size];
|
||||
if (__compile_branch (rex) == -1) return -1;
|
||||
|
||||
(*nb) += 1;
|
||||
(*el) += *bl + xp_sizeof(*bl);
|
||||
|
||||
while (!AT_END(rex) && rex->ptn.curc == XP_T('|'))
|
||||
{
|
||||
NEXT_CHAR (rex);
|
||||
|
||||
//branch_base = rex->code_size;
|
||||
bl = (xp_size_t*)&rex->code.buf[rex->code.size];
|
||||
if (__compile_branch(rex) == -1) return -1;
|
||||
|
||||
/*
|
||||
rex->code[branch_base]++;
|
||||
rex->code[len_base] += xxxxx;
|
||||
*/
|
||||
(*nb) += 1;
|
||||
(*el) += *bl + xp_sizeof(*bl);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -141,13 +159,22 @@ static int __compile_expression (xp_awk_rex_t* rex)
|
||||
static int __compile_branch (xp_awk_rex_t* rex)
|
||||
{
|
||||
int n;
|
||||
xp_size_t* bl;
|
||||
xp_size_t old_size;
|
||||
|
||||
old_size = rex->code.size;
|
||||
|
||||
bl = (xp_size_t*)&rex->code.buf[rex->code.size];
|
||||
ADD_CODE (rex, zero, xp_sizeof(zero));
|
||||
|
||||
while (!AT_END(rex))
|
||||
{
|
||||
//atom_base = rex->code_size;
|
||||
|
||||
n = __compile_atom (rex);
|
||||
if (n == -1) return -1;
|
||||
if (n == -1)
|
||||
{
|
||||
rex->code.size = old_size;
|
||||
return -1;
|
||||
}
|
||||
if (n == 1) break;
|
||||
|
||||
if (AT_END(rex)) break;
|
||||
@ -157,27 +184,43 @@ static int __compile_branch (xp_awk_rex_t* rex)
|
||||
case XP_T('+'):
|
||||
{
|
||||
//__apply_bound (1, MAX);
|
||||
NEXT_CHAR (rex);
|
||||
if (__next_char(rex) == -1)
|
||||
{
|
||||
rex->code.size = old_size;
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case XP_T('*'):
|
||||
{
|
||||
//__apply_bound (0, MAX);
|
||||
NEXT_CHAR (rex);
|
||||
if (__next_char(rex) == -1)
|
||||
{
|
||||
rex->code.size = old_size;
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case XP_T('?'):
|
||||
{
|
||||
//__apply_bound (0, 1);
|
||||
NEXT_CHAR (rex);
|
||||
if (__next_char(rex) == -1)
|
||||
{
|
||||
rex->code.size = old_size;
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case XP_T('{'):
|
||||
{
|
||||
if (__compile_bound(rex) == -1) return -1;
|
||||
if (__compile_bound(rex) == -1)
|
||||
{
|
||||
rex->code.size = old_size;
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: rex.h,v 1.2 2006-07-19 11:45:23 bacon Exp $
|
||||
* $Id: rex.h,v 1.3 2006-07-19 15:58:01 bacon Exp $
|
||||
**/
|
||||
|
||||
#ifndef _XP_AWK_REX_H_
|
||||
@ -9,6 +9,33 @@
|
||||
#error Never include this file directly. Include <xp/awk/awk.h> instead
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Regular Expression Syntax
|
||||
* A regular expression is zero or more branches, separated by '|'.
|
||||
* ......
|
||||
* ......
|
||||
*
|
||||
* Compiled form of a regular expression:
|
||||
*
|
||||
* | expression |
|
||||
* | header | branch | branch | branch |
|
||||
* | nb | el | bl | cmd | arg | cmd | arg | bl | cmd | arg | bl | cmd |
|
||||
*
|
||||
* nb: the number of branches
|
||||
* el: the length of a expression excluding the length of nb and el
|
||||
* bl: the length of a branch excluding the length of bl
|
||||
* cmd: The command and repetition info encoded together.
|
||||
* Some commands require an argument to follow them but some other don't.
|
||||
* It is encoded as follows:
|
||||
*
|
||||
* Subexpressions can be nested by having the command "GROUP"
|
||||
* and a subexpression as its argument.
|
||||
*
|
||||
* Examples:
|
||||
* a.c -> |1|6|5|ORD_CHAR(no bound)|a|ANY_CHAR(no bound)|ORD_CHAR(no bound)|c|
|
||||
* ab|xy -> |2|10|4|ORD_CHAR(no bound)|a|ORD_CHAR(no bound)|b|4|ORD_CHAR(no bound)|x|ORD_CHAR(no bound)|y|
|
||||
*/
|
||||
|
||||
struct xp_awk_rex_t
|
||||
{
|
||||
struct
|
||||
|
Loading…
Reference in New Issue
Block a user