fixed a bug in merging consecutive codes with occurrences

This commit is contained in:
hyung-hwan 2009-09-07 07:18:08 +00:00
parent f14f01864d
commit f9d4ccc50a

View File

@ -1,5 +1,5 @@
/* /*
* $Id: rex.c 207 2009-06-22 13:01:28Z hyunghwan.chung $ * $Id: rex.c 279 2009-09-06 13:18:08Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
@ -1483,16 +1483,25 @@ static const qse_byte_t* match_any_char (
while (p < mat->branch_end && while (p < mat->branch_end &&
cp->cmd == ((const atom_t*)p)->cmd) cp->cmd == ((const atom_t*)p)->cmd)
{ {
lbound += ((const atom_t*)p)->lbound; qse_size_t lb, ub;
ubound += ((const atom_t*)p)->ubound;
lb = ((const atom_t*)p)->lbound;
ub = ((const atom_t*)p)->ubound;
/* perform minimal overflow check as this implementation
* uses the maximum value to mean infinite.
* consider the upper bound of '+' and '*'. */
lbound = (BOUND_MAX-lb >= lbound)? (lbound + lb): BOUND_MAX;
lbound = (BOUND_MAX-lb >= lbound)? (lbound + lb): BOUND_MAX;
ubound = (BOUND_MAX-ub >= ubound)? (ubound + ub): BOUND_MAX;
p += QSE_SIZEOF(*cp); p += QSE_SIZEOF(*cp);
} }
#ifdef DEBUG_REX #ifdef DEBUG_REX
qse_dprintf ( qse_dprintf (
QSE_T("match_any_char: lbound = %u, ubound = %u\n"), QSE_T("match_any_char: lbound = %lu, ubound = %lu\n"),
(unsigned int)lbound, (unsigned int)ubound); (unsigned long)lbound, (unsigned long)ubound);
#endif #endif
/* find the longest match */ /* find the longest match */
@ -1504,7 +1513,7 @@ static const qse_byte_t* match_any_char (
#ifdef DEBUG_REX #ifdef DEBUG_REX
qse_dprintf ( qse_dprintf (
QSE_T("match_any_char: max si = %u\n"), (unsigned)si); QSE_T("match_any_char: max si = %lu\n"), (unsigned long)si);
#endif #endif
if (si >= lbound && si <= ubound) if (si >= lbound && si <= ubound)
@ -1529,42 +1538,45 @@ static const qse_byte_t* match_ord_char (
lbound = cp->lbound; lbound = cp->lbound;
ubound = cp->ubound; ubound = cp->ubound;
#ifdef DEBUG_REX
qse_dprintf (
QSE_T("match_ord_char: cc=%c, lbound=%lu, ubound=%lu\n"),
cc, (unsigned long)lbound, (unsigned long)ubound);
#endif
cc = *(qse_char_t*)p; p += QSE_SIZEOF(cc); cc = *(qse_char_t*)p; p += QSE_SIZEOF(cc);
if (matcher->option & QSE_REX_MATCH_IGNORECASE) cc = QSE_TOUPPER(cc); if (matcher->option & QSE_REX_MATCH_IGNORECASE) cc = QSE_TOUPPER(cc);
/* merge the same consecutive codes /* merge the same consecutive codes
* for example, a{1,10}a{0,10} is shortened to a{1,20} */ * for example, a{1,10}a{0,10} is shortened to a{1,20} */
while (p < mat->branch_end &&
cp->cmd == ((const atom_t*)p)->cmd)
{
qse_size_t lb, ub;
qse_char_t xc;
xc = *(qse_char_t*)(p+QSE_SIZEOF(*cp));
if (matcher->option & QSE_REX_MATCH_IGNORECASE) if (matcher->option & QSE_REX_MATCH_IGNORECASE)
{ xc = QSE_TOUPPER(xc);
while (p < mat->branch_end &&
cp->cmd == ((const atom_t*)p)->cmd)
{
if (QSE_TOUPPER (*(qse_char_t*)(p+QSE_SIZEOF(*cp))) != cc) break;
lbound += ((const atom_t*)p)->lbound; if (xc != cc) break;
ubound += ((const atom_t*)p)->ubound;
lb = ((const atom_t*)p)->lbound;
ub = ((const atom_t*)p)->ubound;
/* perform minimal overflow check as this implementation
* uses the maximum value to mean infinite.
* consider the upper bound of '+' and '*'. */
lbound = (BOUND_MAX-lb >= lbound)? (lbound + lb): BOUND_MAX;
ubound = (BOUND_MAX-ub >= ubound)? (ubound + ub): BOUND_MAX;
p += QSE_SIZEOF(*cp) + QSE_SIZEOF(cc); p += QSE_SIZEOF(*cp) + QSE_SIZEOF(cc);
} }
}
else
{
while (p < mat->branch_end &&
cp->cmd == ((const atom_t*)p)->cmd)
{
if (*(qse_char_t*)(p+QSE_SIZEOF(*cp)) != cc) break;
lbound += ((const atom_t*)p)->lbound;
ubound += ((const atom_t*)p)->ubound;
p += QSE_SIZEOF(*cp) + QSE_SIZEOF(cc);
}
}
#ifdef DEBUG_REX #ifdef DEBUG_REX
qse_dprintf ( qse_dprintf (
QSE_T("match_ord_char: cc = %c, lbound = %u, ubound = %u\n"), QSE_T("match_ord_char(after merging): cc=%c, lbound=%lu, ubound=%lu\n"),
cc, (unsigned int)lbound, (unsigned int)ubound); cc, (unsigned long)lbound, (unsigned long)ubound);
#endif #endif
mat->matched = QSE_FALSE; mat->matched = QSE_FALSE;
@ -1602,8 +1614,8 @@ static const qse_byte_t* match_ord_char (
#ifdef DEBUG_REX #ifdef DEBUG_REX
qse_dprintf ( qse_dprintf (
QSE_T("match_ord_char: max occurrences=%u, lbound=%u, ubound=%u\n"), QSE_T("match_ord_char: cc=%c, max occ=%lu, lbound=%lu, ubound=%lu\n"),
(unsigned)si, (unsigned)lbound, (unsigned)ubound); cc, (unsigned long)si, (unsigned long)lbound, (unsigned long)ubound);
#endif #endif
if (si >= lbound && si <= ubound) if (si >= lbound && si <= ubound)
@ -1632,8 +1644,8 @@ static const qse_byte_t* match_charset (
#ifdef DEBUG_REX #ifdef DEBUG_REX
qse_dprintf ( qse_dprintf (
QSE_T("match_charset: lbound = %u, ubound = %u\n"), QSE_T("match_charset: lbound=%lu, ubound=%lu\n"),
(unsigned int)cp->lbound, (unsigned int)cp->ubound); (unsigned long)cp->lbound, (unsigned long)cp->ubound);
#endif #endif
mat->matched = QSE_FALSE; mat->matched = QSE_FALSE;