*** empty log message ***

This commit is contained in:
hyung-hwan 2007-02-28 11:00:32 +00:00
parent b8fb83234a
commit ff3823788f

View File

@ -1,11 +1,15 @@
/* /*
* $Id: rex.c,v 1.68 2007-02-23 10:57:09 bacon Exp $ * $Id: rex.c,v 1.69 2007-02-28 11:00:32 bacon Exp $
* *
* {License} * {License}
*/ */
#include <ase/awk/awk_i.h> #include <ase/awk/awk_i.h>
#ifdef DEBUG_REX
#include <ase/utl/stdio.h>
#endif
enum enum
{ {
CT_EOF, CT_EOF,
@ -220,11 +224,9 @@ static ase_bool_t __cc_isspace (ase_awk_t* awk, ase_char_t c);
static ase_bool_t __cc_isupper (ase_awk_t* awk, ase_char_t c); static ase_bool_t __cc_isupper (ase_awk_t* awk, ase_char_t c);
static ase_bool_t __cc_isxdigit (ase_awk_t* awk, ase_char_t c); static ase_bool_t __cc_isxdigit (ase_awk_t* awk, ase_char_t c);
#ifdef DEBUG_REX static const ase_byte_t* __print_pattern (ase_awk_t* awk, const ase_byte_t* p);
static const ase_byte_t* __print_pattern (const ase_byte_t* p); static const ase_byte_t* __print_branch (ase_awk_t* awk, const ase_byte_t* p);
static const ase_byte_t* __print_branch (const ase_byte_t* p); static const ase_byte_t* __print_atom (ase_awk_t* awk, const ase_byte_t* p);
static const ase_byte_t* __print_atom (const ase_byte_t* p);
#endif
struct __char_class_t struct __char_class_t
{ {
@ -402,8 +404,19 @@ ase_bool_t ase_awk_isemptyrex (ase_awk_t* awk, void* code)
ASE_AWK_ASSERT (awk, p != ASE_NULL); ASE_AWK_ASSERT (awk, p != ASE_NULL);
nb = *(ase_size_t*)p; p += ASE_SIZEOF(nb); #if defined(__sparc) || defined(__sparc__)
el = *(ase_size_t*)p; p += ASE_SIZEOF(el); ase_memcpy (&nb, p, ASE_SIZEOF(nb));
#else
nb = *(ase_size_t*)p;
#endif
p += ASE_SIZEOF(nb);
#if defined(__sparc) || defined(__sparc__)
ase_memcpy (&el, p, ASE_SIZEOF(el));
#else
el = *(ase_size_t*)p;
#endif
p += ASE_SIZEOF(el);
/* an empty regular expression look like: /* an empty regular expression look like:
* | expression | * | expression |
@ -727,7 +740,7 @@ static int __build_charset (__builder_t* builder, struct __code_t* cmd)
{ {
/* invalid range */ /* invalid range */
#ifdef DEBUG_REX #ifdef DEBUG_REX
builder->awk->prmfns.dprintf ( ase_dprintf (
ASE_T("__build_charset: invalid character set range\n")); ASE_T("__build_charset: invalid character set range\n"));
#endif #endif
builder->errnum = ASE_AWK_EREXCRANGE; builder->errnum = ASE_AWK_EREXCRANGE;
@ -759,8 +772,7 @@ static int __build_cclass (__builder_t* builder, ase_char_t* cc)
{ {
/* wrong class name */ /* wrong class name */
#ifdef DEBUG_REX #ifdef DEBUG_REX
builder->awk->prmfns.dprintf ( ase_dprintf (ASE_T("__build_cclass: wrong class name\n"));
ASE_T("__build_cclass: wrong class name\n"));
#endif #endif
builder->errnum = ASE_AWK_EREXCCLASS; builder->errnum = ASE_AWK_EREXCCLASS;
return -1; return -1;
@ -772,9 +784,8 @@ static int __build_cclass (__builder_t* builder, ase_char_t* cc)
if (builder->ptn.curc.type != CT_NORMAL || if (builder->ptn.curc.type != CT_NORMAL ||
builder->ptn.curc.value != ASE_T(':')) builder->ptn.curc.value != ASE_T(':'))
{ {
#ifdef BUILD_REX #ifdef DEBUG_REX
builder->awk->prmfns.dprintf ( ase_dprintf (ASE_T("__build_cclass: a colon(:) expected\n"));
ASE_T("__build_cclass: a colon(:) expected\n"));
#endif #endif
builder->errnum = ASE_AWK_EREXCOLON; builder->errnum = ASE_AWK_EREXCOLON;
return -1; return -1;
@ -787,8 +798,7 @@ static int __build_cclass (__builder_t* builder, ase_char_t* cc)
builder->ptn.curc.value != ASE_T(']')) builder->ptn.curc.value != ASE_T(']'))
{ {
#ifdef DEBUG_REX #ifdef DEBUG_REX
builder->awk->prmfns.dprintf ( ase_dprintf (ASE_T("__build_cclass: ] expected\n"));
ASE_T("__build_cclass: ] expected\n"));
#endif #endif
builder->errnum = ASE_AWK_EREXRBRACKET; builder->errnum = ASE_AWK_EREXRBRACKET;
return -1; return -1;
@ -1029,14 +1039,26 @@ static const ase_byte_t* __match_pattern (
ase_size_t nb, el, i; ase_size_t nb, el, i;
p = base; p = base;
nb = *(ase_size_t*)p; p += ASE_SIZEOF(nb); #if defined(__sparc) || defined(__sparc__)
el = *(ase_size_t*)p; p += ASE_SIZEOF(el); ase_memcpy (&nb, p, ASE_SIZEOF(nb));
#else
nb = *(ase_size_t*)p;
#endif
p += ASE_SIZEOF(nb);
#ifdef BUILD_REX #if defined(__sparc) || defined(__sparc__)
matcher->awk->prmfns.dprintf ( ase_memcpy (&el, p, ASE_SIZEOF(el));
#else
el = *(ase_size_t*)p;
#endif
p += ASE_SIZEOF(el);
#ifdef DEBUG_REX
ase_dprintf (
ASE_T("__match_pattern: NB = %u, EL = %u\n"), ASE_T("__match_pattern: NB = %u, EL = %u\n"),
(unsigned)nb, (unsigned)el); (unsigned)nb, (unsigned)el);
#endif #endif
mat->matched = ase_false; mat->matched = ase_false;
mat->match_len = 0; mat->match_len = 0;
@ -1220,8 +1242,8 @@ static const ase_byte_t* __match_any_char (
p += ASE_SIZEOF(*cp); p += ASE_SIZEOF(*cp);
} }
#ifdef BUILD_REX #ifdef DEBUG_REX
matcher->awk->prmfns.dprintf ( ase_dprintf (
ASE_T("__match_any_char: lbound = %u, ubound = %u\n"), ASE_T("__match_any_char: lbound = %u, ubound = %u\n"),
(unsigned int)lbound, (unsigned int)ubound); (unsigned int)lbound, (unsigned int)ubound);
#endif #endif
@ -1233,8 +1255,8 @@ static const ase_byte_t* __match_any_char (
si++; si++;
} }
#ifdef BUILD_REX #ifdef DEBUG_REX
matcher->awk->prmfns.dprintf ( ase_dprintf (
ASE_T("__match_any_char: max si = %u\n"), (unsigned)si); ASE_T("__match_any_char: max si = %u\n"), (unsigned)si);
#endif #endif
@ -1292,8 +1314,8 @@ static const ase_byte_t* __match_ord_char (
} }
} }
#ifdef BUILD_REX #ifdef DEBUG_REX
matcher->awk->prmfns.dprintf ( ase_dprintf (
ASE_T("__match_ord_char: cc = %c, lbound = %u, ubound = %u\n"), ASE_T("__match_ord_char: cc = %c, lbound = %u, ubound = %u\n"),
cc, (unsigned int)lbound, (unsigned int)ubound); cc, (unsigned int)lbound, (unsigned int)ubound);
#endif #endif
@ -1308,7 +1330,7 @@ static const ase_byte_t* __match_ord_char (
{ {
if (&mat->match_ptr[si] >= matcher->match.str.end) break; if (&mat->match_ptr[si] >= matcher->match.str.end) break;
#ifdef DEBUG_REX #ifdef DEBUG_REX
matcher->awk->prmfns.dprintf ( ase_dprintf (
ASE_T("__match_ord_char: <ignorecase> %c %c\n"), ASE_T("__match_ord_char: <ignorecase> %c %c\n"),
cc, mat->match_ptr[si]); cc, mat->match_ptr[si]);
#endif #endif
@ -1322,7 +1344,7 @@ static const ase_byte_t* __match_ord_char (
{ {
if (&mat->match_ptr[si] >= matcher->match.str.end) break; if (&mat->match_ptr[si] >= matcher->match.str.end) break;
#ifdef DEBUG_REX #ifdef DEBUG_REX
matcher->awk->prmfns.dprintf ( ase_dprintf (
ASE_T("__match_ord_char: %c %c\n"), ASE_T("__match_ord_char: %c %c\n"),
cc, mat->match_ptr[si]); cc, mat->match_ptr[si]);
#endif #endif
@ -1332,7 +1354,7 @@ static const ase_byte_t* __match_ord_char (
} }
#ifdef DEBUG_REX #ifdef DEBUG_REX
matcher->awk->prmfns.dprintf ( ase_dprintf (
ASE_T("__match_ord_char: max occurrences=%u, lbound=%u, ubound=%u\n"), ASE_T("__match_ord_char: max occurrences=%u, lbound=%u, ubound=%u\n"),
(unsigned)si, (unsigned)lbound, (unsigned)ubound); (unsigned)si, (unsigned)lbound, (unsigned)ubound);
#endif #endif
@ -1360,11 +1382,22 @@ static const ase_byte_t* __match_charset (
lbound = cp->lbound; lbound = cp->lbound;
ubound = cp->ubound; ubound = cp->ubound;
csc = *(ase_size_t*)p; p += ASE_SIZEOF(csc); #if defined(__sparc) || defined(__sparc__)
csl = *(ase_size_t*)p; p += ASE_SIZEOF(csl); ase_memcpy (&csc, p, ASE_SIZEOF(csc));
#else
csc = *(ase_size_t*)p;
#endif
p += ASE_SIZEOF(csc);
#ifdef BUILD_REX #if defined(__sparc) || defined(__sparc__)
matcher->awk->prmfns.dprintf ( ase_memcpy (&csl, p, ASE_SIZEOF(csl));
#else
csl = *(ase_size_t*)p;
#endif
p += ASE_SIZEOF(csl);
#ifdef DEBUG_REX
ase_dprintf (
ASE_T("__match_charset: lbound = %u, ubound = %u\n"), ASE_T("__match_charset: lbound = %u, ubound = %u\n"),
(unsigned int)lbound, (unsigned int)ubound); (unsigned int)lbound, (unsigned int)ubound);
#endif #endif
@ -1389,7 +1422,7 @@ static const ase_byte_t* __match_charset (
p = p + csl - (ASE_SIZEOF(csc) + ASE_SIZEOF(csl)); p = p + csl - (ASE_SIZEOF(csc) + ASE_SIZEOF(csl));
#ifdef DEBUG_REX #ifdef DEBUG_REX
matcher->awk->prmfns.dprintf ( ase_dprintf (
ASE_T("__match_charset: max occurrences=%u, lbound=%u, ubound=%u\n"), ASE_T("__match_charset: max occurrences=%u, lbound=%u, ubound=%u\n"),
(unsigned)si, (unsigned)lbound, (unsigned)ubound); (unsigned)si, (unsigned)lbound, (unsigned)ubound);
#endif #endif
@ -1502,7 +1535,7 @@ static const ase_byte_t* __match_group (
mat2.branch_end = mat->branch_end; mat2.branch_end = mat->branch_end;
#ifdef DEBUG_REX #ifdef DEBUG_REX
matcher->awk->prmfns.dprintf ( ase_dprintf (
ASE_T("__match_group: GROUP si=%d [%s]\n"), ASE_T("__match_group: GROUP si=%d [%s]\n"),
(unsigned)si, mat->match_ptr); (unsigned)si, mat->match_ptr);
#endif #endif
@ -1603,7 +1636,7 @@ static const ase_byte_t* __match_occurrences (
mat2.branch_end = mat->branch_end; mat2.branch_end = mat->branch_end;
#ifdef DEBUG_REX #ifdef DEBUG_REX
matcher->awk->prmfns.dprintf ( ase_dprintf (
ASE_T("__match occurrences: si=%u [%s]\n"), ASE_T("__match occurrences: si=%u [%s]\n"),
(unsigned)si, mat->match_ptr); (unsigned)si, mat->match_ptr);
#endif #endif
@ -1643,7 +1676,7 @@ static ase_bool_t __test_charset (
if (matcher->ignorecase) if (matcher->ignorecase)
c1 = ASE_AWK_TOUPPER(matcher->awk, c1); c1 = ASE_AWK_TOUPPER(matcher->awk, c1);
#ifdef DEBUG_REX #ifdef DEBUG_REX
matcher->awk->prmfns.dprintf ( ase_dprintf (
ASE_T("__match_charset: <one> %c %c\n"), c, c1); ASE_T("__match_charset: <one> %c %c\n"), c, c1);
#endif #endif
if (c == c1) return ase_true; if (c == c1) return ase_true;
@ -1660,7 +1693,7 @@ static ase_bool_t __test_charset (
c2 = ASE_AWK_TOUPPER(matcher->awk, c2); c2 = ASE_AWK_TOUPPER(matcher->awk, c2);
} }
#ifdef DEBUG_REX #ifdef DEBUG_REX
matcher->awk->prmfns.dprintf ( ase_dprintf (
ASE_T("__match_charset: <range> %c %c-%c\n"), c, c1, c2); ASE_T("__match_charset: <range> %c %c-%c\n"), c, c1, c2);
#endif #endif
if (c >= c1 && c <= c2) return ase_true; if (c >= c1 && c <= c2) return ase_true;
@ -1669,7 +1702,7 @@ static ase_bool_t __test_charset (
{ {
c1 = *(const ase_char_t*)p; c1 = *(const ase_char_t*)p;
#ifdef DEBUG_REX #ifdef DEBUG_REX
matcher->awk->prmfns.dprintf ( ase_dprintf (
ASE_T("__match_charset: <class> %c %s\n"), ASE_T("__match_charset: <class> %c %s\n"),
c, __char_class[c1].name); c, __char_class[c1].name);
#endif #endif
@ -1749,75 +1782,80 @@ static ase_bool_t __cc_isxdigit (ase_awk_t* awk, ase_char_t c)
return ASE_AWK_ISXDIGIT (awk, c); return ASE_AWK_ISXDIGIT (awk, c);
} }
#ifdef DEBUG_REX #define DPRINTF awk->prmfns.misc.dprintf
#define DCUSTOM awk->prmfns.misc.custom_data
void ase_awk_printrex (ase_awk_t* awk, void* rex) void ase_awk_dprintrex (ase_awk_t* awk, void* rex)
{ {
__print_pattern (rex); __print_pattern (awk, rex);
awk->prmfns.dprintf (ASE_T("\n")); DPRINTF (DCUSTOM, awk->prmfns.misc.custom_data, ASE_T("\n"));
} }
static const ase_byte_t* __print_pattern (const ase_byte_t* p) static const ase_byte_t* __print_pattern (ase_awk_t* awk, const ase_byte_t* p)
{ {
ase_size_t nb, el, i; ase_size_t nb, el, i;
nb = *(ase_size_t*)p; p += ASE_SIZEOF(nb); #if defined(__sparc) || defined(__sparc__)
el = *(ase_size_t*)p; p += ASE_SIZEOF(el); ase_memcpy (&nb, p, ASE_SIZEOF(nb));
#else
#ifdef DEBUG_REX nb = *(ase_size_t*)p;
ase_printf (ASE_T("__print_pattern: NB = %u, EL = %u\n"), (unsigned int)nb, (unsigned int)el);
#endif #endif
p += ASE_SIZEOF(nb);
#if defined(__sparc) || defined(__sparc__)
ase_memcpy (&el, p, ASE_SIZEOF(el));
#else
el = *(ase_size_t*)p;
#endif
p += ASE_SIZEOF(el);
for (i = 0; i < nb; i++) for (i = 0; i < nb; i++)
{ {
if (i != 0) ase_printf (ASE_T("|")); if (i != 0) DPRINTF (DCUSTOM, ASE_T("|"));
p = __print_branch (p); p = __print_branch (awk, p);
} }
return p; return p;
} }
static const ase_byte_t* __print_branch (const ase_byte_t* p) static const ase_byte_t* __print_branch (ase_awk_t* awk, const ase_byte_t* p)
{ {
ase_size_t na, bl, i; ase_size_t na, bl, i;
na = *(ase_size_t*)p; p += ASE_SIZEOF(na); na = *(ase_size_t*)p; p += ASE_SIZEOF(na);
bl = *(ase_size_t*)p; p += ASE_SIZEOF(bl); bl = *(ase_size_t*)p; p += ASE_SIZEOF(bl);
#ifdef DEBUG_REX
ase_printf (ASE_T("__print_branch: NA = %u, BL = %u\n"), (unsigned int) na, (unsigned int)bl);
#endif
for (i = 0; i < na; i++) for (i = 0; i < na; i++)
{ {
p = __print_atom (p); p = __print_atom (awk, p);
} }
return p; return p;
} }
static const ase_byte_t* __print_atom (const ase_byte_t* p) static const ase_byte_t* __print_atom (ase_awk_t* awk, const ase_byte_t* p)
{ {
const struct __code_t* cp = (const struct __code_t*)p; const struct __code_t* cp = (const struct __code_t*)p;
if (cp->cmd == CMD_BOL) if (cp->cmd == CMD_BOL)
{ {
ase_printf (ASE_T("^")); DPRINTF (DCUSTOM, ASE_T("^"));
p += ASE_SIZEOF(*cp); p += ASE_SIZEOF(*cp);
} }
else if (cp->cmd == CMD_EOL) else if (cp->cmd == CMD_EOL)
{ {
ase_printf (ASE_T("$")); DPRINTF (DCUSTOM, ASE_T("$"));
p += ASE_SIZEOF(*cp); p += ASE_SIZEOF(*cp);
} }
else if (cp->cmd == CMD_ANY_CHAR) else if (cp->cmd == CMD_ANY_CHAR)
{ {
ase_printf (ASE_T(".")); DPRINTF (DCUSTOM, ASE_T("."));
p += ASE_SIZEOF(*cp); p += ASE_SIZEOF(*cp);
} }
else if (cp->cmd == CMD_ORD_CHAR) else if (cp->cmd == CMD_ORD_CHAR)
{ {
p += ASE_SIZEOF(*cp); p += ASE_SIZEOF(*cp);
ase_printf (ASE_T("%c"), *(ase_char_t*)p); DPRINTF (DCUSTOM, ASE_T("%c"), *(ase_char_t*)p);
p += ASE_SIZEOF(ase_char_t); p += ASE_SIZEOF(ase_char_t);
} }
else if (cp->cmd == CMD_CHARSET) else if (cp->cmd == CMD_CHARSET)
@ -1825,11 +1863,22 @@ static const ase_byte_t* __print_atom (const ase_byte_t* p)
ase_size_t csc, csl, i; ase_size_t csc, csl, i;
p += ASE_SIZEOF(*cp); p += ASE_SIZEOF(*cp);
ase_printf (ASE_T("[")); DPRINTF (DCUSTOM, ASE_T("["));
if (cp->negate) ase_printf (ASE_T("^")); if (cp->negate) DPRINTF (DCUSTOM, ASE_T("^"));
csc = *(ase_size_t*)p; p += ASE_SIZEOF(csc); #if defined(__sparc) || defined(__sparc__)
csl = *(ase_size_t*)p; p += ASE_SIZEOF(csl); ase_memcpy (&csc, p, ASE_SIZEOF(csc));
#else
csc = *(ase_size_t*)p;
#endif
p += ASE_SIZEOF(csc);
#if defined(__sparc) || defined(__sparc__)
ase_memcpy (&csl, p, ASE_SIZEOF(csl));
#else
csl = *(ase_size_t*)p;
#endif
p += ASE_SIZEOF(csl);
for (i = 0; i < csc; i++) for (i = 0; i < csc; i++)
{ {
@ -1841,55 +1890,54 @@ static const ase_byte_t* __print_atom (const ase_byte_t* p)
if (c0 == CHARSET_ONE) if (c0 == CHARSET_ONE)
{ {
c1 = *(ase_char_t*)p; c1 = *(ase_char_t*)p;
ase_printf (ASE_T("%c"), c1); DPRINTF (DCUSTOM, ASE_T("%c"), c1);
} }
else if (c0 == CHARSET_RANGE) else if (c0 == CHARSET_RANGE)
{ {
c1 = *(ase_char_t*)p; c1 = *(ase_char_t*)p;
p += ASE_SIZEOF(c1); p += ASE_SIZEOF(c1);
c2 = *(ase_char_t*)p; c2 = *(ase_char_t*)p;
ase_printf (ASE_T("%c-%c"), c1, c2); DPRINTF (DCUSTOM, ASE_T("%c-%c"), c1, c2);
} }
else if (c0 == CHARSET_CLASS) else if (c0 == CHARSET_CLASS)
{ {
c1 = *(ase_char_t*)p; c1 = *(ase_char_t*)p;
ase_printf (ASE_T("[:%s:]"), __char_class[c1].name); DPRINTF (DCUSTOM, ASE_T("[:%s:]"), __char_class[c1].name);
} }
else else
{ {
ase_printf (ASE_T("should never happen - invalid charset code\n")); DPRINTF (DCUSTOM, ASE_T("should never happen - invalid charset code\n"));
} }
p += ASE_SIZEOF(c1); p += ASE_SIZEOF(c1);
} }
ase_printf (ASE_T("]")); DPRINTF (DCUSTOM, ASE_T("]"));
} }
else if (cp->cmd == CMD_GROUP) else if (cp->cmd == CMD_GROUP)
{ {
p += ASE_SIZEOF(*cp); p += ASE_SIZEOF(*cp);
ase_printf (ASE_T("(")); DPRINTF (DCUSTOM, ASE_T("("));
p = __print_pattern (p); p = __print_pattern (awk, p);
ase_printf (ASE_T(")")); DPRINTF (DCUSTOM, ASE_T(")"));
} }
else else
{ {
ase_printf (ASE_T("should never happen - invalid atom code\n")); DPRINTF (DCUSTOM, ASE_T("should never happen - invalid atom code\n"));
} }
if (cp->lbound == 0 && cp->ubound == BOUND_MAX) if (cp->lbound == 0 && cp->ubound == BOUND_MAX)
ase_printf (ASE_T("*")); DPRINTF (DCUSTOM, ASE_T("*"));
else if (cp->lbound == 1 && cp->ubound == BOUND_MAX) else if (cp->lbound == 1 && cp->ubound == BOUND_MAX)
ase_printf (ASE_T("+")); DPRINTF (DCUSTOM, ASE_T("+"));
else if (cp->lbound == 0 && cp->ubound == 1) else if (cp->lbound == 0 && cp->ubound == 1)
ase_printf (ASE_T("?")); DPRINTF (DCUSTOM, ASE_T("?"));
else if (cp->lbound != 1 || cp->ubound != 1) else if (cp->lbound != 1 || cp->ubound != 1)
{ {
ase_printf (ASE_T("{%lu,%lu}"), DPRINTF (DCUSTOM, ASE_T("{%lu,%lu}"),
(unsigned long)cp->lbound, (unsigned long)cp->ubound); (unsigned long)cp->lbound, (unsigned long)cp->ubound);
} }
return p; return p;
} }
#endif