*** empty log message ***

This commit is contained in:
hyung-hwan 2006-07-23 16:31:20 +00:00
parent 3030d45e33
commit 4790c85615

View File

@ -1,5 +1,5 @@
/* /*
* $Id: rex.c,v 1.9 2006-07-22 16:40:39 bacon Exp $ * $Id: rex.c,v 1.10 2006-07-23 16:31:20 bacon Exp $
*/ */
#include <xp/awk/awk_i.h> #include <xp/awk/awk_i.h>
@ -62,6 +62,18 @@ struct __code_t
xp_size_t ubound; xp_size_t ubound;
}; };
struct __match_t
{
const xp_char_t* bp; /* base point */
const xp_char_t* end;
const xp_char_t* mp; /* match point */
xp_bool_t matched;
xp_size_t match_len;
const xp_byte_t* branch;
const xp_byte_t* branch_end;
};
#define NCHARS_REMAINING(rex) ((rex)->ptn.end - (rex)->ptn.curp) #define NCHARS_REMAINING(rex) ((rex)->ptn.end - (rex)->ptn.curp)
#define NEXT_CHAR(rex,level) \ #define NEXT_CHAR(rex,level) \
@ -86,9 +98,21 @@ static const xp_byte_t* __print_expression (const xp_byte_t* p);
static const xp_byte_t* __print_branch (const xp_byte_t* p); static const xp_byte_t* __print_branch (const xp_byte_t* p);
static const xp_byte_t* __print_atom (const xp_byte_t* p); static const xp_byte_t* __print_atom (const xp_byte_t* p);
static xp_bool_t __begin_with ( static xp_bool_t __begin_with (
const xp_char_t* str, xp_size_t len, const xp_char_t* what); const xp_char_t* str, xp_size_t len, const xp_char_t* what);
static xp_byte_t* __match_expression (
xp_awk_rex_t* rex, xp_byte_t* base, struct __match_t* mat);
static xp_byte_t* __match_branch (
xp_awk_rex_t* rex, xp_byte_t* base, struct __match_t* mat);
static xp_byte_t* __match_branch_body (
xp_awk_rex_t* rex, xp_byte_t* base, struct __match_t* mat);
static xp_byte_t* __match_atom (
xp_awk_rex_t* rex, xp_byte_t* base, struct __match_t* mat);
static xp_byte_t* __match_any_char (
xp_awk_rex_t* rex, xp_byte_t* base, struct __match_t* mat);
static xp_bool_t __cc_isalnum (xp_char_t c); static xp_bool_t __cc_isalnum (xp_char_t c);
static xp_bool_t __cc_isalpha (xp_char_t c); static xp_bool_t __cc_isalpha (xp_char_t c);
static xp_bool_t __cc_isblank (xp_char_t c); static xp_bool_t __cc_isblank (xp_char_t c);
@ -762,7 +786,7 @@ static const xp_byte_t* __print_branch (const xp_byte_t* p)
static const xp_byte_t* __print_atom (const xp_byte_t* p) static const xp_byte_t* __print_atom (const xp_byte_t* p)
{ {
struct __code_t* cp = (struct __code_t*)p; const struct __code_t* cp = (const struct __code_t*)p;
if (cp->cmd == CMD_BOL) if (cp->cmd == CMD_BOL)
{ {
@ -876,40 +900,200 @@ static xp_bool_t __begin_with (
int xp_awk_rex_match (xp_awk_rex_t* rex, int xp_awk_rex_match (xp_awk_rex_t* rex,
const xp_char_t* str, xp_size_t len, const xp_char_t* str, xp_size_t len,
const xp_char_t** match, xp_size_t* match_len) xp_size_t* match_offset, xp_size_t* match_len)
{ {
xp_size_t offset = 0; xp_size_t offset = 0;
struct __match_t mat;
mat.matched = xp_false;
while (offset <= len) while (offset <= len)
{ {
__match_expression (rex); mat.bp = str;
mat.end = str + len;
mat.mp = str + offset;
__match_expression (rex, rex->code.buf, &mat);
if (mat.matched)
{
*match_offset = offset;
*match_len = mat.match_len;
break;
}
offset++;
} }
return (mat.matched)? 0: -1;
} }
void __match_expression (xp_awk_rex_t* rex) static const xp_byte_t* __match_expression (
xp_awk_rex_t* rex, const xp_byte_t* base, struct __match_t* mat)
{ {
xp_byte_t* p;
xp_size_t nb, el, i; xp_size_t nb, el, i;
struct __match_t mat2;
p = base;
nb = *(xp_size_t*)p; p += xp_sizeof(nb); nb = *(xp_size_t*)p; p += xp_sizeof(nb);
el = *(xp_size_t*)p; p += xp_sizeof(el); el = *(xp_size_t*)p; p += xp_sizeof(el);
mat->matched = xp_false;
mat->match_len = 0;
mat2.bp = mat->bp;
mat2.end = mat->end;
for (i = 0; i < nb; i++) for (i = 0; i < nb; i++)
{ {
__match_branch (rex); mat2.mp = mat->mp;
p = __match_branch (rex, p, &mat2);
if (mat2.matched)
{
mat->matched = xp_true;
mat->match_len = mat2.match_len;
break;
}
}
return base + el;
}
static const xp_byte_t* __match_branch (
xp_awk_rex_t* rex, const xp_byte_t* base, struct __match_t* mat)
{
xp_byte_t* p;
xp_size_t na, bl, i;
p = base;
na = *(xp_size_t*)p; p += xp_sizeof(na);
bl = *(xp_size_t*)p; p += xp_sizeof(bl);
/* remember the current branch to work on */
mat->branch = base;
mat->branch_end = base + bl;
return __match_branch_body (rex, base, mat);
}
static const xp_byte_t* __match_branch_body (
xp_awk_rex_t* rex, const xp_byte_t* base, struct __match_t* mat)
{
struct __match_t mat2;
const xp_byte_t* p;
mat->matched = xp_false;
mat->match_len = 0;
mat2.bp = mst->bp;
mat2.end = mst->end;
mat2.mp = mst->mp;
mat2.branch = mst->branch;
mat2.branch_end = mst->branch_end;
p = base;
while (p < mat->branch_end)
{
p = __match_atom (rex, p, &mat2);
if (!mat2.matched)
{
mat->matched = xp_false;
break; /* stop matching */
}
mat->matched = xp_true;
mat->match_len += mat2.match_len;
mat2.mp = mat2.match_len;
}
return mst->branch_end;
}
static const xp_byte_t* __match_atom (
xp_awk_rex_t* rex, const xp_byte_t* base, struct __match_t* mat)
{
xp_byte_t* p;
const struct __code_t* cp;
p = base;
cp = (struct __match_t*)p;
p += xp_sizeof(*cp);
if (cp->cmd == CMD_ANY_CHAR)
{
p = __match_any_char (rex, p, &mat2);
}
else
{
xp_printf (XP_T("FUCK: __mtach_atom\n"));
} }
return p; return p;
} }
void __match_branch (xp_awk_rex_t* rex) static xp_byte_t* __match_any_char (
xp_awk_rex_t* rex, xp_byte_t* base, struct __match_t* mat)
{ {
} xp_byte_t* p;
xp_size_t si = 0;
void __match_atom (xp_awk_rex_t* rex) p = base;
{
}
static const xp_byte_t* __print_branch (const xp_byte_t* p) mat->matched = xp_false;
mat->match_len = 0;
/* find the longest match */
while (1)
{
if (si >= cp->ubound) break;
if (mat->mp[si] == XP_T('\0')) break;
si++;
}
if (si == cp->ubound)
{
/* the match found */
if (cp->lbound == cp->ubound)
{
/* fixed occurrences requested. returns the match */
mat->matched = xp_true;
mat->match_len = si;
}
else
{
/* otherwise, it checks if the remaining atoms match */
while (si >= cp->lbound)
{
struct __match_t mat2;
mat2.bp = mat->bp;
mat2.end = mat->end;
mat2.mp = &mat->mp[si];
mat2.branch = mat->branch;
mat2.branch_end = mat->branch_end;
p = match_branch_body (rex, p, &mat2);
if (mat2.matched)
{
mat->matched = xp_true;
mat->match_len = si + mat2.match_len;
break;
}
si--;
}
}
}
return p;
} }
static xp_bool_t __cc_isalnum (xp_char_t c) static xp_bool_t __cc_isalnum (xp_char_t c)