*** empty log message ***
This commit is contained in:
parent
25da30c536
commit
24ff585ae8
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: rex.c,v 1.4 2006-07-19 11:45:23 bacon Exp $
|
* $Id: rex.c,v 1.5 2006-07-19 15:58:01 bacon Exp $
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <xp/awk/awk_i.h>
|
#include <xp/awk/awk_i.h>
|
||||||
@ -90,6 +90,7 @@ void xp_awk_rex_close (xp_awk_rex_t* rex)
|
|||||||
|
|
||||||
int xp_awk_rex_compile (xp_awk_rex_t* rex, const xp_char_t* ptn, xp_size_t len)
|
int xp_awk_rex_compile (xp_awk_rex_t* rex, const xp_char_t* ptn, xp_size_t len)
|
||||||
{
|
{
|
||||||
|
|
||||||
rex->ptn.ptr = ptn;
|
rex->ptn.ptr = ptn;
|
||||||
rex->ptn.end = rex->ptn.ptr + len;
|
rex->ptn.end = rex->ptn.ptr + len;
|
||||||
rex->ptn.curp = rex->ptn.ptr;
|
rex->ptn.curp = rex->ptn.ptr;
|
||||||
@ -98,7 +99,13 @@ int xp_awk_rex_compile (xp_awk_rex_t* rex, const xp_char_t* ptn, xp_size_t len)
|
|||||||
rex->code.size = 0;
|
rex->code.size = 0;
|
||||||
|
|
||||||
NEXT_CHAR (rex);
|
NEXT_CHAR (rex);
|
||||||
if (AT_END(rex)) return 0; /* empty pattern */
|
if (AT_END(rex))
|
||||||
|
{
|
||||||
|
xp_size_t tmp = 0;
|
||||||
|
ADD_CODE (rex, &tmp, xp_sizeof(tmp)); /* nb */
|
||||||
|
ADD_CODE (rex, &tmp, xp_sizeof(tmp)); /* el */
|
||||||
|
return 0; /* empty pattern */
|
||||||
|
}
|
||||||
|
|
||||||
if (__compile_expression (rex) == -1)
|
if (__compile_expression (rex) == -1)
|
||||||
{
|
{
|
||||||
@ -120,19 +127,30 @@ xp_printf (XP_T("garbage after expression\n"));
|
|||||||
|
|
||||||
static int __compile_expression (xp_awk_rex_t* rex)
|
static int __compile_expression (xp_awk_rex_t* rex)
|
||||||
{
|
{
|
||||||
|
xp_size_t zero = 0;
|
||||||
|
xp_size_t* nb, * el, * bl;
|
||||||
|
|
||||||
|
nb = (xp_size_t*)&rex->code.buf[rex->code.size];
|
||||||
|
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
||||||
|
|
||||||
|
el = (xp_size_t*)&rex->code.buf[rex->code.size];
|
||||||
|
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
||||||
|
|
||||||
|
bl = (xp_size_t*)&rex->code.buf[rex->code.size];
|
||||||
if (__compile_branch (rex) == -1) return -1;
|
if (__compile_branch (rex) == -1) return -1;
|
||||||
|
|
||||||
|
(*nb) += 1;
|
||||||
|
(*el) += *bl + xp_sizeof(*bl);
|
||||||
|
|
||||||
while (!AT_END(rex) && rex->ptn.curc == XP_T('|'))
|
while (!AT_END(rex) && rex->ptn.curc == XP_T('|'))
|
||||||
{
|
{
|
||||||
NEXT_CHAR (rex);
|
NEXT_CHAR (rex);
|
||||||
|
|
||||||
//branch_base = rex->code_size;
|
bl = (xp_size_t*)&rex->code.buf[rex->code.size];
|
||||||
if (__compile_branch(rex) == -1) return -1;
|
if (__compile_branch(rex) == -1) return -1;
|
||||||
|
|
||||||
/*
|
(*nb) += 1;
|
||||||
rex->code[branch_base]++;
|
(*el) += *bl + xp_sizeof(*bl);
|
||||||
rex->code[len_base] += xxxxx;
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -141,13 +159,22 @@ static int __compile_expression (xp_awk_rex_t* rex)
|
|||||||
static int __compile_branch (xp_awk_rex_t* rex)
|
static int __compile_branch (xp_awk_rex_t* rex)
|
||||||
{
|
{
|
||||||
int n;
|
int n;
|
||||||
|
xp_size_t* bl;
|
||||||
|
xp_size_t old_size;
|
||||||
|
|
||||||
|
old_size = rex->code.size;
|
||||||
|
|
||||||
|
bl = (xp_size_t*)&rex->code.buf[rex->code.size];
|
||||||
|
ADD_CODE (rex, zero, xp_sizeof(zero));
|
||||||
|
|
||||||
while (!AT_END(rex))
|
while (!AT_END(rex))
|
||||||
{
|
{
|
||||||
//atom_base = rex->code_size;
|
|
||||||
|
|
||||||
n = __compile_atom (rex);
|
n = __compile_atom (rex);
|
||||||
if (n == -1) return -1;
|
if (n == -1)
|
||||||
|
{
|
||||||
|
rex->code.size = old_size;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
if (n == 1) break;
|
if (n == 1) break;
|
||||||
|
|
||||||
if (AT_END(rex)) break;
|
if (AT_END(rex)) break;
|
||||||
@ -157,27 +184,43 @@ static int __compile_branch (xp_awk_rex_t* rex)
|
|||||||
case XP_T('+'):
|
case XP_T('+'):
|
||||||
{
|
{
|
||||||
//__apply_bound (1, MAX);
|
//__apply_bound (1, MAX);
|
||||||
NEXT_CHAR (rex);
|
if (__next_char(rex) == -1)
|
||||||
|
{
|
||||||
|
rex->code.size = old_size;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case XP_T('*'):
|
case XP_T('*'):
|
||||||
{
|
{
|
||||||
//__apply_bound (0, MAX);
|
//__apply_bound (0, MAX);
|
||||||
NEXT_CHAR (rex);
|
if (__next_char(rex) == -1)
|
||||||
|
{
|
||||||
|
rex->code.size = old_size;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case XP_T('?'):
|
case XP_T('?'):
|
||||||
{
|
{
|
||||||
//__apply_bound (0, 1);
|
//__apply_bound (0, 1);
|
||||||
NEXT_CHAR (rex);
|
if (__next_char(rex) == -1)
|
||||||
|
{
|
||||||
|
rex->code.size = old_size;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case XP_T('{'):
|
case XP_T('{'):
|
||||||
{
|
{
|
||||||
if (__compile_bound(rex) == -1) return -1;
|
if (__compile_bound(rex) == -1)
|
||||||
|
{
|
||||||
|
rex->code.size = old_size;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: rex.h,v 1.2 2006-07-19 11:45:23 bacon Exp $
|
* $Id: rex.h,v 1.3 2006-07-19 15:58:01 bacon Exp $
|
||||||
**/
|
**/
|
||||||
|
|
||||||
#ifndef _XP_AWK_REX_H_
|
#ifndef _XP_AWK_REX_H_
|
||||||
@ -9,6 +9,33 @@
|
|||||||
#error Never include this file directly. Include <xp/awk/awk.h> instead
|
#error Never include this file directly. Include <xp/awk/awk.h> instead
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Regular Expression Syntax
|
||||||
|
* A regular expression is zero or more branches, separated by '|'.
|
||||||
|
* ......
|
||||||
|
* ......
|
||||||
|
*
|
||||||
|
* Compiled form of a regular expression:
|
||||||
|
*
|
||||||
|
* | expression |
|
||||||
|
* | header | branch | branch | branch |
|
||||||
|
* | nb | el | bl | cmd | arg | cmd | arg | bl | cmd | arg | bl | cmd |
|
||||||
|
*
|
||||||
|
* nb: the number of branches
|
||||||
|
* el: the length of a expression excluding the length of nb and el
|
||||||
|
* bl: the length of a branch excluding the length of bl
|
||||||
|
* cmd: The command and repetition info encoded together.
|
||||||
|
* Some commands require an argument to follow them but some other don't.
|
||||||
|
* It is encoded as follows:
|
||||||
|
*
|
||||||
|
* Subexpressions can be nested by having the command "GROUP"
|
||||||
|
* and a subexpression as its argument.
|
||||||
|
*
|
||||||
|
* Examples:
|
||||||
|
* a.c -> |1|6|5|ORD_CHAR(no bound)|a|ANY_CHAR(no bound)|ORD_CHAR(no bound)|c|
|
||||||
|
* ab|xy -> |2|10|4|ORD_CHAR(no bound)|a|ORD_CHAR(no bound)|b|4|ORD_CHAR(no bound)|x|ORD_CHAR(no bound)|y|
|
||||||
|
*/
|
||||||
|
|
||||||
struct xp_awk_rex_t
|
struct xp_awk_rex_t
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
|
Loading…
Reference in New Issue
Block a user