*** empty log message ***
This commit is contained in:
parent
3030d45e33
commit
4790c85615
208
ase/awk/rex.c
208
ase/awk/rex.c
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: rex.c,v 1.9 2006-07-22 16:40:39 bacon Exp $
|
* $Id: rex.c,v 1.10 2006-07-23 16:31:20 bacon Exp $
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <xp/awk/awk_i.h>
|
#include <xp/awk/awk_i.h>
|
||||||
@ -62,6 +62,18 @@ struct __code_t
|
|||||||
xp_size_t ubound;
|
xp_size_t ubound;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct __match_t
|
||||||
|
{
|
||||||
|
const xp_char_t* bp; /* base point */
|
||||||
|
const xp_char_t* end;
|
||||||
|
const xp_char_t* mp; /* match point */
|
||||||
|
xp_bool_t matched;
|
||||||
|
xp_size_t match_len;
|
||||||
|
|
||||||
|
const xp_byte_t* branch;
|
||||||
|
const xp_byte_t* branch_end;
|
||||||
|
};
|
||||||
|
|
||||||
#define NCHARS_REMAINING(rex) ((rex)->ptn.end - (rex)->ptn.curp)
|
#define NCHARS_REMAINING(rex) ((rex)->ptn.end - (rex)->ptn.curp)
|
||||||
|
|
||||||
#define NEXT_CHAR(rex,level) \
|
#define NEXT_CHAR(rex,level) \
|
||||||
@ -86,9 +98,21 @@ static const xp_byte_t* __print_expression (const xp_byte_t* p);
|
|||||||
static const xp_byte_t* __print_branch (const xp_byte_t* p);
|
static const xp_byte_t* __print_branch (const xp_byte_t* p);
|
||||||
static const xp_byte_t* __print_atom (const xp_byte_t* p);
|
static const xp_byte_t* __print_atom (const xp_byte_t* p);
|
||||||
|
|
||||||
|
|
||||||
static xp_bool_t __begin_with (
|
static xp_bool_t __begin_with (
|
||||||
const xp_char_t* str, xp_size_t len, const xp_char_t* what);
|
const xp_char_t* str, xp_size_t len, const xp_char_t* what);
|
||||||
|
|
||||||
|
static xp_byte_t* __match_expression (
|
||||||
|
xp_awk_rex_t* rex, xp_byte_t* base, struct __match_t* mat);
|
||||||
|
static xp_byte_t* __match_branch (
|
||||||
|
xp_awk_rex_t* rex, xp_byte_t* base, struct __match_t* mat);
|
||||||
|
static xp_byte_t* __match_branch_body (
|
||||||
|
xp_awk_rex_t* rex, xp_byte_t* base, struct __match_t* mat);
|
||||||
|
static xp_byte_t* __match_atom (
|
||||||
|
xp_awk_rex_t* rex, xp_byte_t* base, struct __match_t* mat);
|
||||||
|
static xp_byte_t* __match_any_char (
|
||||||
|
xp_awk_rex_t* rex, xp_byte_t* base, struct __match_t* mat);
|
||||||
|
|
||||||
static xp_bool_t __cc_isalnum (xp_char_t c);
|
static xp_bool_t __cc_isalnum (xp_char_t c);
|
||||||
static xp_bool_t __cc_isalpha (xp_char_t c);
|
static xp_bool_t __cc_isalpha (xp_char_t c);
|
||||||
static xp_bool_t __cc_isblank (xp_char_t c);
|
static xp_bool_t __cc_isblank (xp_char_t c);
|
||||||
@ -762,7 +786,7 @@ static const xp_byte_t* __print_branch (const xp_byte_t* p)
|
|||||||
|
|
||||||
static const xp_byte_t* __print_atom (const xp_byte_t* p)
|
static const xp_byte_t* __print_atom (const xp_byte_t* p)
|
||||||
{
|
{
|
||||||
struct __code_t* cp = (struct __code_t*)p;
|
const struct __code_t* cp = (const struct __code_t*)p;
|
||||||
|
|
||||||
if (cp->cmd == CMD_BOL)
|
if (cp->cmd == CMD_BOL)
|
||||||
{
|
{
|
||||||
@ -876,40 +900,200 @@ static xp_bool_t __begin_with (
|
|||||||
|
|
||||||
int xp_awk_rex_match (xp_awk_rex_t* rex,
|
int xp_awk_rex_match (xp_awk_rex_t* rex,
|
||||||
const xp_char_t* str, xp_size_t len,
|
const xp_char_t* str, xp_size_t len,
|
||||||
const xp_char_t** match, xp_size_t* match_len)
|
xp_size_t* match_offset, xp_size_t* match_len)
|
||||||
{
|
{
|
||||||
xp_size_t offset = 0;
|
xp_size_t offset = 0;
|
||||||
|
struct __match_t mat;
|
||||||
|
|
||||||
|
mat.matched = xp_false;
|
||||||
|
|
||||||
while (offset <= len)
|
while (offset <= len)
|
||||||
{
|
{
|
||||||
__match_expression (rex);
|
mat.bp = str;
|
||||||
|
mat.end = str + len;
|
||||||
|
mat.mp = str + offset;
|
||||||
|
|
||||||
|
__match_expression (rex, rex->code.buf, &mat);
|
||||||
|
if (mat.matched)
|
||||||
|
{
|
||||||
|
*match_offset = offset;
|
||||||
|
*match_len = mat.match_len;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return (mat.matched)? 0: -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void __match_expression (xp_awk_rex_t* rex)
|
static const xp_byte_t* __match_expression (
|
||||||
|
xp_awk_rex_t* rex, const xp_byte_t* base, struct __match_t* mat)
|
||||||
{
|
{
|
||||||
|
xp_byte_t* p;
|
||||||
xp_size_t nb, el, i;
|
xp_size_t nb, el, i;
|
||||||
|
struct __match_t mat2;
|
||||||
|
|
||||||
|
p = base;
|
||||||
nb = *(xp_size_t*)p; p += xp_sizeof(nb);
|
nb = *(xp_size_t*)p; p += xp_sizeof(nb);
|
||||||
el = *(xp_size_t*)p; p += xp_sizeof(el);
|
el = *(xp_size_t*)p; p += xp_sizeof(el);
|
||||||
|
|
||||||
|
mat->matched = xp_false;
|
||||||
|
mat->match_len = 0;
|
||||||
|
|
||||||
|
mat2.bp = mat->bp;
|
||||||
|
mat2.end = mat->end;
|
||||||
|
|
||||||
for (i = 0; i < nb; i++)
|
for (i = 0; i < nb; i++)
|
||||||
{
|
{
|
||||||
__match_branch (rex);
|
mat2.mp = mat->mp;
|
||||||
|
|
||||||
|
p = __match_branch (rex, p, &mat2);
|
||||||
|
if (mat2.matched)
|
||||||
|
{
|
||||||
|
mat->matched = xp_true;
|
||||||
|
mat->match_len = mat2.match_len;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return base + el;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const xp_byte_t* __match_branch (
|
||||||
|
xp_awk_rex_t* rex, const xp_byte_t* base, struct __match_t* mat)
|
||||||
|
{
|
||||||
|
xp_byte_t* p;
|
||||||
|
xp_size_t na, bl, i;
|
||||||
|
|
||||||
|
p = base;
|
||||||
|
|
||||||
|
na = *(xp_size_t*)p; p += xp_sizeof(na);
|
||||||
|
bl = *(xp_size_t*)p; p += xp_sizeof(bl);
|
||||||
|
|
||||||
|
/* remember the current branch to work on */
|
||||||
|
mat->branch = base;
|
||||||
|
mat->branch_end = base + bl;
|
||||||
|
|
||||||
|
return __match_branch_body (rex, base, mat);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const xp_byte_t* __match_branch_body (
|
||||||
|
xp_awk_rex_t* rex, const xp_byte_t* base, struct __match_t* mat)
|
||||||
|
{
|
||||||
|
struct __match_t mat2;
|
||||||
|
const xp_byte_t* p;
|
||||||
|
|
||||||
|
mat->matched = xp_false;
|
||||||
|
mat->match_len = 0;
|
||||||
|
|
||||||
|
mat2.bp = mst->bp;
|
||||||
|
mat2.end = mst->end;
|
||||||
|
mat2.mp = mst->mp;
|
||||||
|
mat2.branch = mst->branch;
|
||||||
|
mat2.branch_end = mst->branch_end;
|
||||||
|
|
||||||
|
p = base;
|
||||||
|
|
||||||
|
while (p < mat->branch_end)
|
||||||
|
{
|
||||||
|
p = __match_atom (rex, p, &mat2);
|
||||||
|
|
||||||
|
if (!mat2.matched)
|
||||||
|
{
|
||||||
|
mat->matched = xp_false;
|
||||||
|
break; /* stop matching */
|
||||||
|
}
|
||||||
|
|
||||||
|
mat->matched = xp_true;
|
||||||
|
mat->match_len += mat2.match_len;
|
||||||
|
|
||||||
|
mat2.mp = mat2.match_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
return mst->branch_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const xp_byte_t* __match_atom (
|
||||||
|
xp_awk_rex_t* rex, const xp_byte_t* base, struct __match_t* mat)
|
||||||
|
{
|
||||||
|
xp_byte_t* p;
|
||||||
|
const struct __code_t* cp;
|
||||||
|
|
||||||
|
p = base;
|
||||||
|
cp = (struct __match_t*)p;
|
||||||
|
|
||||||
|
p += xp_sizeof(*cp);
|
||||||
|
|
||||||
|
if (cp->cmd == CMD_ANY_CHAR)
|
||||||
|
{
|
||||||
|
p = __match_any_char (rex, p, &mat2);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
xp_printf (XP_T("FUCK: __mtach_atom\n"));
|
||||||
}
|
}
|
||||||
|
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
void __match_branch (xp_awk_rex_t* rex)
|
static xp_byte_t* __match_any_char (
|
||||||
|
xp_awk_rex_t* rex, xp_byte_t* base, struct __match_t* mat)
|
||||||
{
|
{
|
||||||
}
|
xp_byte_t* p;
|
||||||
|
xp_size_t si = 0;
|
||||||
|
|
||||||
void __match_atom (xp_awk_rex_t* rex)
|
p = base;
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
static const xp_byte_t* __print_branch (const xp_byte_t* p)
|
mat->matched = xp_false;
|
||||||
|
mat->match_len = 0;
|
||||||
|
|
||||||
|
/* find the longest match */
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
if (si >= cp->ubound) break;
|
||||||
|
if (mat->mp[si] == XP_T('\0')) break;
|
||||||
|
si++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (si == cp->ubound)
|
||||||
|
{
|
||||||
|
/* the match found */
|
||||||
|
|
||||||
|
if (cp->lbound == cp->ubound)
|
||||||
|
{
|
||||||
|
/* fixed occurrences requested. returns the match */
|
||||||
|
mat->matched = xp_true;
|
||||||
|
mat->match_len = si;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* otherwise, it checks if the remaining atoms match */
|
||||||
|
while (si >= cp->lbound)
|
||||||
|
{
|
||||||
|
struct __match_t mat2;
|
||||||
|
|
||||||
|
mat2.bp = mat->bp;
|
||||||
|
mat2.end = mat->end;
|
||||||
|
mat2.mp = &mat->mp[si];
|
||||||
|
mat2.branch = mat->branch;
|
||||||
|
mat2.branch_end = mat->branch_end;
|
||||||
|
|
||||||
|
p = match_branch_body (rex, p, &mat2);
|
||||||
|
|
||||||
|
if (mat2.matched)
|
||||||
|
{
|
||||||
|
mat->matched = xp_true;
|
||||||
|
mat->match_len = si + mat2.match_len;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
si--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
static xp_bool_t __cc_isalnum (xp_char_t c)
|
static xp_bool_t __cc_isalnum (xp_char_t c)
|
||||||
|
Loading…
Reference in New Issue
Block a user