*** empty log message ***

This commit is contained in:
hyung-hwan 2007-03-01 04:31:27 +00:00
parent 53e48d3c59
commit 6c7776f0c7

View File

@ -1,5 +1,5 @@
/* /*
* $Id: rex.c,v 1.73 2007-02-28 14:46:08 bacon Exp $ * $Id: rex.c,v 1.74 2007-03-01 04:31:27 bacon Exp $
* *
* {License} * {License}
*/ */
@ -56,19 +56,12 @@ enum
typedef struct builder_t builder_t; typedef struct builder_t builder_t;
typedef struct matcher_t matcher_t; typedef struct matcher_t matcher_t;
typedef struct match_t match_t; typedef struct match_t match_t;
typedef struct code_t code_t;
typedef struct cshdr_t cshdr_t; typedef struct cshdr_t cshdr_t;
#include <ase/pack.h> struct builder_t
{
ASE_BEGIN_PACKED_STRUCT (code_t)
/*ase_byte_t cmd;*/
short cmd;
short negate; /* only for CMD_CHARSET */
ase_size_t lbound;
ase_size_t ubound;
ASE_END_PACKED_STRUCT ()
ASE_BEGIN_PACKED_STRUCT (builder_t)
ase_awk_t* awk; ase_awk_t* awk;
struct struct
@ -97,9 +90,10 @@ ASE_BEGIN_PACKED_STRUCT (builder_t)
} depth; } depth;
int errnum; int errnum;
ASE_END_PACKED_STRUCT () };
ASE_BEGIN_PACKED_STRUCT (matcher_t) struct matcher_t
{
ase_awk_t* awk; ase_awk_t* awk;
struct struct
@ -119,9 +113,10 @@ ASE_BEGIN_PACKED_STRUCT (matcher_t)
int ignorecase; int ignorecase;
int errnum; int errnum;
ASE_END_PACKED_STRUCT () };
ASE_BEGIN_PACKED_STRUCT (match_t) struct match_t
{
const ase_char_t* match_ptr; const ase_char_t* match_ptr;
ase_bool_t matched; ase_bool_t matched;
@ -129,11 +124,21 @@ ASE_BEGIN_PACKED_STRUCT (match_t)
const ase_byte_t* branch; const ase_byte_t* branch;
const ase_byte_t* branch_end; const ase_byte_t* branch_end;
};
#include <ase/pack.h>
ASE_BEGIN_PACKED_STRUCT (code_t)
/*ase_byte_t cmd;*/
short cmd;
short negate; /* only for CMD_CHARSET */
ase_size_t lbound;
ase_size_t ubound;
ASE_END_PACKED_STRUCT () ASE_END_PACKED_STRUCT ()
ASE_BEGIN_PACKED_STRUCT (cshdr_t) ASE_BEGIN_PACKED_STRUCT (cshdr_t)
ase_size_t csc; ase_size_t csc; /* count */
ase_size_t csl; ase_size_t csl; /* length */
ASE_END_PACKED_STRUCT () ASE_END_PACKED_STRUCT ()
#include <ase/unpack.h> #include <ase/unpack.h>
@ -166,10 +171,10 @@ static int __build_pattern (builder_t* rex);
static int __build_pattern0 (builder_t* rex); static int __build_pattern0 (builder_t* rex);
static int __build_branch (builder_t* rex); static int __build_branch (builder_t* rex);
static int __build_atom (builder_t* rex); static int __build_atom (builder_t* rex);
static int __build_charset (builder_t* rex, struct code_t* cmd); static int __build_charset (builder_t* rex, code_t* cmd);
static int __build_occurrences (builder_t* rex, struct code_t* cmd); static int __build_occurrences (builder_t* rex, code_t* cmd);
static int __build_cclass (builder_t* rex, ase_char_t* cc); static int __build_cclass (builder_t* rex, ase_char_t* cc);
static int __build_range (builder_t* rex, struct code_t* cmd); static int __build_range (builder_t* rex, code_t* cmd);
static int __next_char (builder_t* rex, int level); static int __next_char (builder_t* rex, int level);
static int __add_code (builder_t* rex, void* data, ase_size_t len); static int __add_code (builder_t* rex, void* data, ase_size_t len);
@ -514,7 +519,7 @@ static int __build_branch (builder_t* builder)
ase_size_t zero = 0; ase_size_t zero = 0;
ase_size_t old_size; ase_size_t old_size;
ase_size_t pos_na, pos_bl; ase_size_t pos_na, pos_bl;
struct code_t* cmd; code_t* cmd;
old_size = builder->code.size; old_size = builder->code.size;
@ -526,7 +531,7 @@ static int __build_branch (builder_t* builder)
while (1) while (1)
{ {
cmd = (struct code_t*)&builder->code.buf[builder->code.size]; cmd = (code_t*)&builder->code.buf[builder->code.size];
n = __build_atom (builder); n = __build_atom (builder);
if (n == -1) if (n == -1)
@ -558,7 +563,7 @@ static int __build_branch (builder_t* builder)
static int __build_atom (builder_t* builder) static int __build_atom (builder_t* builder)
{ {
int n; int n;
struct code_t tmp; code_t tmp;
if (builder->ptn.curc.type == CT_EOF) return 0; if (builder->ptn.curc.type == CT_EOF) return 0;
@ -610,10 +615,9 @@ static int __build_atom (builder_t* builder)
} }
else if (builder->ptn.curc.value == ASE_T('[')) else if (builder->ptn.curc.value == ASE_T('['))
{ {
struct code_t* cmd; code_t* cmd;
cmd = (struct code_t*) cmd = (code_t*)&builder->code.buf[builder->code.size];
&builder->code.buf[builder->code.size];
tmp.cmd = CMD_CHARSET; tmp.cmd = CMD_CHARSET;
tmp.negate = 0; tmp.negate = 0;
@ -661,7 +665,7 @@ static int __build_atom (builder_t* builder)
} }
} }
static int __build_charset (builder_t* builder, struct code_t* cmd) static int __build_charset (builder_t* builder, code_t* cmd)
{ {
ase_size_t zero = 0; ase_size_t zero = 0;
ase_size_t old_size; ase_size_t old_size;
@ -821,7 +825,7 @@ static int __build_cclass (builder_t* builder, ase_char_t* cc)
return 1; return 1;
} }
static int __build_occurrences (builder_t* builder, struct code_t* cmd) static int __build_occurrences (builder_t* builder, code_t* cmd)
{ {
if (builder->ptn.curc.type != CT_SPECIAL) return 0; if (builder->ptn.curc.type != CT_SPECIAL) return 0;
@ -872,7 +876,7 @@ static int __build_occurrences (builder_t* builder, struct code_t* cmd)
return 0; return 0;
} }
static int __build_range (builder_t* builder, struct code_t* cmd) static int __build_range (builder_t* builder, code_t* cmd)
{ {
ase_size_t bound; ase_size_t bound;
@ -1189,19 +1193,19 @@ static const ase_byte_t* __match_atom (
}; };
ASE_AWK_ASSERT (matcher->awk, ASE_AWK_ASSERT (matcher->awk,
((struct code_t*)base)->cmd >= 0 && ((code_t*)base)->cmd >= 0 &&
((struct code_t*)base)->cmd < ASE_COUNTOF(matchers)); ((code_t*)base)->cmd < ASE_COUNTOF(matchers));
return matchers[((struct code_t*)base)->cmd] (matcher, base, mat); return matchers[((code_t*)base)->cmd] (matcher, base, mat);
} }
static const ase_byte_t* __match_bol ( static const ase_byte_t* __match_bol (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
const ase_byte_t* p = base; const ase_byte_t* p = base;
const struct code_t* cp; const code_t* cp;
cp = (const struct code_t*)p; p += ASE_SIZEOF(*cp); cp = (const code_t*)p; p += ASE_SIZEOF(*cp);
ASE_AWK_ASSERT (matcher->awk, cp->cmd == CMD_BOL); ASE_AWK_ASSERT (matcher->awk, cp->cmd == CMD_BOL);
mat->matched = (mat->match_ptr == matcher->match.str.ptr || mat->matched = (mat->match_ptr == matcher->match.str.ptr ||
@ -1215,9 +1219,9 @@ static const ase_byte_t* __match_eol (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
const ase_byte_t* p = base; const ase_byte_t* p = base;
const struct code_t* cp; const code_t* cp;
cp = (const struct code_t*)p; p += ASE_SIZEOF(*cp); cp = (const code_t*)p; p += ASE_SIZEOF(*cp);
ASE_AWK_ASSERT (matcher->awk, cp->cmd == CMD_EOL); ASE_AWK_ASSERT (matcher->awk, cp->cmd == CMD_EOL);
mat->matched = (mat->match_ptr == matcher->match.str.end || mat->matched = (mat->match_ptr == matcher->match.str.end ||
@ -1231,10 +1235,10 @@ static const ase_byte_t* __match_any_char (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
const ase_byte_t* p = base; const ase_byte_t* p = base;
const struct code_t* cp; const code_t* cp;
ase_size_t si = 0, lbound, ubound; ase_size_t si = 0, lbound, ubound;
cp = (const struct code_t*)p; p += ASE_SIZEOF(*cp); cp = (const code_t*)p; p += ASE_SIZEOF(*cp);
ASE_AWK_ASSERT (matcher->awk, cp->cmd == CMD_ANY_CHAR); ASE_AWK_ASSERT (matcher->awk, cp->cmd == CMD_ANY_CHAR);
lbound = cp->lbound; lbound = cp->lbound;
@ -1245,10 +1249,10 @@ static const ase_byte_t* __match_any_char (
/* merge the same consecutive codes */ /* merge the same consecutive codes */
while (p < mat->branch_end && while (p < mat->branch_end &&
cp->cmd == ((const struct code_t*)p)->cmd) cp->cmd == ((const code_t*)p)->cmd)
{ {
lbound += ((const struct code_t*)p)->lbound; lbound += ((const code_t*)p)->lbound;
ubound += ((const struct code_t*)p)->ubound; ubound += ((const code_t*)p)->ubound;
p += ASE_SIZEOF(*cp); p += ASE_SIZEOF(*cp);
} }
@ -1283,11 +1287,11 @@ static const ase_byte_t* __match_ord_char (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
const ase_byte_t* p = base; const ase_byte_t* p = base;
const struct code_t* cp; const code_t* cp;
ase_size_t si = 0, lbound, ubound; ase_size_t si = 0, lbound, ubound;
ase_char_t cc; ase_char_t cc;
cp = (const struct code_t*)p; p += ASE_SIZEOF(*cp); cp = (const code_t*)p; p += ASE_SIZEOF(*cp);
ASE_AWK_ASSERT (matcher->awk, cp->cmd == CMD_ORD_CHAR); ASE_AWK_ASSERT (matcher->awk, cp->cmd == CMD_ORD_CHAR);
lbound = cp->lbound; lbound = cp->lbound;
@ -1301,12 +1305,12 @@ static const ase_byte_t* __match_ord_char (
if (matcher->ignorecase) if (matcher->ignorecase)
{ {
while (p < mat->branch_end && while (p < mat->branch_end &&
cp->cmd == ((const struct code_t*)p)->cmd) cp->cmd == ((const code_t*)p)->cmd)
{ {
if (ASE_AWK_TOUPPER (matcher->awk, *(ase_char_t*)(p+ASE_SIZEOF(*cp))) != cc) break; if (ASE_AWK_TOUPPER (matcher->awk, *(ase_char_t*)(p+ASE_SIZEOF(*cp))) != cc) break;
lbound += ((const struct code_t*)p)->lbound; lbound += ((const code_t*)p)->lbound;
ubound += ((const struct code_t*)p)->ubound; ubound += ((const code_t*)p)->ubound;
p += ASE_SIZEOF(*cp) + ASE_SIZEOF(cc); p += ASE_SIZEOF(*cp) + ASE_SIZEOF(cc);
} }
@ -1314,12 +1318,12 @@ static const ase_byte_t* __match_ord_char (
else else
{ {
while (p < mat->branch_end && while (p < mat->branch_end &&
cp->cmd == ((const struct code_t*)p)->cmd) cp->cmd == ((const code_t*)p)->cmd)
{ {
if (*(ase_char_t*)(p+ASE_SIZEOF(*cp)) != cc) break; if (*(ase_char_t*)(p+ASE_SIZEOF(*cp)) != cc) break;
lbound += ((const struct code_t*)p)->lbound; lbound += ((const code_t*)p)->lbound;
ubound += ((const struct code_t*)p)->ubound; ubound += ((const code_t*)p)->ubound;
p += ASE_SIZEOF(*cp) + ASE_SIZEOF(cc); p += ASE_SIZEOF(*cp) + ASE_SIZEOF(cc);
} }
@ -1382,73 +1386,52 @@ static const ase_byte_t* __match_charset (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
const ase_byte_t* p = base; const ase_byte_t* p = base;
const struct code_t* cp; ase_size_t si = 0;
ase_size_t si = 0, lbound, ubound, csc, csl;
ase_bool_t n; ase_bool_t n;
ase_char_t c; ase_char_t c;
code_t* cp;
cshdr_t* cshdr; cshdr_t* cshdr;
cp = (const struct code_t*)p; p += ASE_SIZEOF(*cp); cp = (code_t*)p; p += ASE_SIZEOF(*cp);
ASE_AWK_ASSERT (matcher->awk, cp->cmd == CMD_CHARSET); ASE_AWK_ASSERT (matcher->awk, cp->cmd == CMD_CHARSET);
lbound = cp->lbound; cshdr = (cshdr_t*)p; p += ASE_SIZEOF(*cshdr);
ubound = cp->ubound;
cshdr = (cshdr_t*)p;
csc = cshdr->csc;
csl = cshdr->csl;
p += ASE_SIZEOF(*cshdr);
/*
#if defined(__i386) || defined(__i386__)
csc = *(ase_size_t*)p;
#else
ase_memcpy (&csc, p, ASE_SIZEOF(csc));
#endif
p += ASE_SIZEOF(csc);
#if defined(__i386) || defined(__i386__)
csl = *(ase_size_t*)p;
#else
ase_memcpy (&csl, p, ASE_SIZEOF(csl));
#endif
p += ASE_SIZEOF(csl);
*/
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf ( ase_dprintf (
ASE_T("__match_charset: lbound = %u, ubound = %u\n"), ASE_T("__match_charset: lbound = %u, ubound = %u\n"),
(unsigned int)lbound, (unsigned int)ubound); (unsigned int)cp->lbound, (unsigned int)cp->ubound);
#endif #endif
mat->matched = ase_false; mat->matched = ase_false;
mat->match_len = 0; mat->match_len = 0;
while (si < ubound) while (si < cp->ubound)
{ {
if (&mat->match_ptr[si] >= matcher->match.str.end) break; if (&mat->match_ptr[si] >= matcher->match.str.end) break;
c = mat->match_ptr[si]; c = mat->match_ptr[si];
if (matcher->ignorecase) c = ASE_AWK_TOUPPER(matcher->awk, c); if (matcher->ignorecase) c = ASE_AWK_TOUPPER(matcher->awk, c);
n = __test_charset (matcher, p, csc, c); n = __test_charset (matcher, p, cshdr->csc, c);
if (cp->negate) n = !n; if (cp->negate) n = !n;
if (!n) break; if (!n) break;
si++; si++;
} }
p = p + csl - (ASE_SIZEOF(csc) + ASE_SIZEOF(csl)); p = p + cshdr->csl - ASE_SIZEOF(*cshdr);
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf ( ase_dprintf (
ASE_T("__match_charset: max occurrences=%u, lbound=%u, ubound=%u\n"), ASE_T("__match_charset: max occurrences=%u, lbound=%u, ubound=%u\n"),
(unsigned)si, (unsigned)lbound, (unsigned)ubound); (unsigned)si, (unsigned)cp->lbound, (unsigned)cp->ubound);
#endif #endif
if (si >= lbound && si <= ubound)
if (si >= cp->lbound && si <= cp->ubound)
{ {
p = __match_occurrences (matcher, si, p, lbound, ubound, mat); p = __match_occurrences (matcher, si, p, cp->lbound, cp->ubound, mat);
} }
return p; return p;
@ -1458,11 +1441,11 @@ static const ase_byte_t* __match_group (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
const ase_byte_t* p = base; const ase_byte_t* p = base;
const struct code_t* cp; const code_t* cp;
match_t mat2; match_t mat2;
ase_size_t si = 0, grp_len_static[16], * grp_len; ase_size_t si = 0, grp_len_static[16], * grp_len;
cp = (const struct code_t*)p; p += ASE_SIZEOF(*cp); cp = (const code_t*)p; p += ASE_SIZEOF(*cp);
ASE_AWK_ASSERT (matcher->awk, cp->cmd == CMD_GROUP); ASE_AWK_ASSERT (matcher->awk, cp->cmd == CMD_GROUP);
mat->matched = ase_false; mat->matched = ase_false;
@ -1855,7 +1838,7 @@ static const ase_byte_t* __print_branch (ase_awk_t* awk, const ase_byte_t* p)
static const ase_byte_t* __print_atom (ase_awk_t* awk, const ase_byte_t* p) static const ase_byte_t* __print_atom (ase_awk_t* awk, const ase_byte_t* p)
{ {
const struct code_t* cp = (const struct code_t*)p; const code_t* cp = (const code_t*)p;
if (cp->cmd == CMD_BOL) if (cp->cmd == CMD_BOL)
{ {
@ -1880,27 +1863,16 @@ static const ase_byte_t* __print_atom (ase_awk_t* awk, const ase_byte_t* p)
} }
else if (cp->cmd == CMD_CHARSET) else if (cp->cmd == CMD_CHARSET)
{ {
ase_size_t csc, csl, i; ase_size_t i;
cshdr_t* cshdr;
p += ASE_SIZEOF(*cp); p += ASE_SIZEOF(*cp);
DPRINTF (DCUSTOM, ASE_T("[")); DPRINTF (DCUSTOM, ASE_T("["));
if (cp->negate) DPRINTF (DCUSTOM, ASE_T("^")); if (cp->negate) DPRINTF (DCUSTOM, ASE_T("^"));
#if defined(__i386) || defined(__i386__) cshdr = (cshdr_t*)p; p += ASE_SIZEOF(*cshdr);
csc = *(ase_size_t*)p;
#else
ase_memcpy (&csc, p, ASE_SIZEOF(csc));
#endif
p += ASE_SIZEOF(csc);
#if defined(__i386) || defined(__i386__) for (i = 0; i < cshdr->csc; i++)
csl = *(ase_size_t*)p;
#else
ase_memcpy (&csl, p, ASE_SIZEOF(csl));
#endif
p += ASE_SIZEOF(csl);
for (i = 0; i < csc; i++)
{ {
ase_char_t c0, c1, c2; ase_char_t c0, c1, c2;