fixed a bug in merging consecutive codes with occurrences
This commit is contained in:
parent
f14f01864d
commit
f9d4ccc50a
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: rex.c 207 2009-06-22 13:01:28Z hyunghwan.chung $
|
* $Id: rex.c 279 2009-09-06 13:18:08Z hyunghwan.chung $
|
||||||
*
|
*
|
||||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||||
|
|
||||||
@ -1483,16 +1483,25 @@ static const qse_byte_t* match_any_char (
|
|||||||
while (p < mat->branch_end &&
|
while (p < mat->branch_end &&
|
||||||
cp->cmd == ((const atom_t*)p)->cmd)
|
cp->cmd == ((const atom_t*)p)->cmd)
|
||||||
{
|
{
|
||||||
lbound += ((const atom_t*)p)->lbound;
|
qse_size_t lb, ub;
|
||||||
ubound += ((const atom_t*)p)->ubound;
|
|
||||||
|
lb = ((const atom_t*)p)->lbound;
|
||||||
|
ub = ((const atom_t*)p)->ubound;
|
||||||
|
|
||||||
|
/* perform minimal overflow check as this implementation
|
||||||
|
* uses the maximum value to mean infinite.
|
||||||
|
* consider the upper bound of '+' and '*'. */
|
||||||
|
lbound = (BOUND_MAX-lb >= lbound)? (lbound + lb): BOUND_MAX;
|
||||||
|
lbound = (BOUND_MAX-lb >= lbound)? (lbound + lb): BOUND_MAX;
|
||||||
|
ubound = (BOUND_MAX-ub >= ubound)? (ubound + ub): BOUND_MAX;
|
||||||
|
|
||||||
p += QSE_SIZEOF(*cp);
|
p += QSE_SIZEOF(*cp);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUG_REX
|
#ifdef DEBUG_REX
|
||||||
qse_dprintf (
|
qse_dprintf (
|
||||||
QSE_T("match_any_char: lbound = %u, ubound = %u\n"),
|
QSE_T("match_any_char: lbound = %lu, ubound = %lu\n"),
|
||||||
(unsigned int)lbound, (unsigned int)ubound);
|
(unsigned long)lbound, (unsigned long)ubound);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* find the longest match */
|
/* find the longest match */
|
||||||
@ -1504,7 +1513,7 @@ static const qse_byte_t* match_any_char (
|
|||||||
|
|
||||||
#ifdef DEBUG_REX
|
#ifdef DEBUG_REX
|
||||||
qse_dprintf (
|
qse_dprintf (
|
||||||
QSE_T("match_any_char: max si = %u\n"), (unsigned)si);
|
QSE_T("match_any_char: max si = %lu\n"), (unsigned long)si);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (si >= lbound && si <= ubound)
|
if (si >= lbound && si <= ubound)
|
||||||
@ -1529,42 +1538,45 @@ static const qse_byte_t* match_ord_char (
|
|||||||
lbound = cp->lbound;
|
lbound = cp->lbound;
|
||||||
ubound = cp->ubound;
|
ubound = cp->ubound;
|
||||||
|
|
||||||
|
#ifdef DEBUG_REX
|
||||||
|
qse_dprintf (
|
||||||
|
QSE_T("match_ord_char: cc=%c, lbound=%lu, ubound=%lu\n"),
|
||||||
|
cc, (unsigned long)lbound, (unsigned long)ubound);
|
||||||
|
#endif
|
||||||
|
|
||||||
cc = *(qse_char_t*)p; p += QSE_SIZEOF(cc);
|
cc = *(qse_char_t*)p; p += QSE_SIZEOF(cc);
|
||||||
if (matcher->option & QSE_REX_MATCH_IGNORECASE) cc = QSE_TOUPPER(cc);
|
if (matcher->option & QSE_REX_MATCH_IGNORECASE) cc = QSE_TOUPPER(cc);
|
||||||
|
|
||||||
/* merge the same consecutive codes
|
/* merge the same consecutive codes
|
||||||
* for example, a{1,10}a{0,10} is shortened to a{1,20} */
|
* for example, a{1,10}a{0,10} is shortened to a{1,20} */
|
||||||
|
while (p < mat->branch_end &&
|
||||||
|
cp->cmd == ((const atom_t*)p)->cmd)
|
||||||
|
{
|
||||||
|
qse_size_t lb, ub;
|
||||||
|
qse_char_t xc;
|
||||||
|
|
||||||
|
xc = *(qse_char_t*)(p+QSE_SIZEOF(*cp));
|
||||||
if (matcher->option & QSE_REX_MATCH_IGNORECASE)
|
if (matcher->option & QSE_REX_MATCH_IGNORECASE)
|
||||||
{
|
xc = QSE_TOUPPER(xc);
|
||||||
while (p < mat->branch_end &&
|
|
||||||
cp->cmd == ((const atom_t*)p)->cmd)
|
|
||||||
{
|
|
||||||
if (QSE_TOUPPER (*(qse_char_t*)(p+QSE_SIZEOF(*cp))) != cc) break;
|
|
||||||
|
|
||||||
lbound += ((const atom_t*)p)->lbound;
|
if (xc != cc) break;
|
||||||
ubound += ((const atom_t*)p)->ubound;
|
|
||||||
|
lb = ((const atom_t*)p)->lbound;
|
||||||
|
ub = ((const atom_t*)p)->ubound;
|
||||||
|
|
||||||
|
/* perform minimal overflow check as this implementation
|
||||||
|
* uses the maximum value to mean infinite.
|
||||||
|
* consider the upper bound of '+' and '*'. */
|
||||||
|
lbound = (BOUND_MAX-lb >= lbound)? (lbound + lb): BOUND_MAX;
|
||||||
|
ubound = (BOUND_MAX-ub >= ubound)? (ubound + ub): BOUND_MAX;
|
||||||
|
|
||||||
p += QSE_SIZEOF(*cp) + QSE_SIZEOF(cc);
|
p += QSE_SIZEOF(*cp) + QSE_SIZEOF(cc);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
while (p < mat->branch_end &&
|
|
||||||
cp->cmd == ((const atom_t*)p)->cmd)
|
|
||||||
{
|
|
||||||
if (*(qse_char_t*)(p+QSE_SIZEOF(*cp)) != cc) break;
|
|
||||||
|
|
||||||
lbound += ((const atom_t*)p)->lbound;
|
|
||||||
ubound += ((const atom_t*)p)->ubound;
|
|
||||||
|
|
||||||
p += QSE_SIZEOF(*cp) + QSE_SIZEOF(cc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef DEBUG_REX
|
#ifdef DEBUG_REX
|
||||||
qse_dprintf (
|
qse_dprintf (
|
||||||
QSE_T("match_ord_char: cc = %c, lbound = %u, ubound = %u\n"),
|
QSE_T("match_ord_char(after merging): cc=%c, lbound=%lu, ubound=%lu\n"),
|
||||||
cc, (unsigned int)lbound, (unsigned int)ubound);
|
cc, (unsigned long)lbound, (unsigned long)ubound);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
mat->matched = QSE_FALSE;
|
mat->matched = QSE_FALSE;
|
||||||
@ -1602,8 +1614,8 @@ static const qse_byte_t* match_ord_char (
|
|||||||
|
|
||||||
#ifdef DEBUG_REX
|
#ifdef DEBUG_REX
|
||||||
qse_dprintf (
|
qse_dprintf (
|
||||||
QSE_T("match_ord_char: max occurrences=%u, lbound=%u, ubound=%u\n"),
|
QSE_T("match_ord_char: cc=%c, max occ=%lu, lbound=%lu, ubound=%lu\n"),
|
||||||
(unsigned)si, (unsigned)lbound, (unsigned)ubound);
|
cc, (unsigned long)si, (unsigned long)lbound, (unsigned long)ubound);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (si >= lbound && si <= ubound)
|
if (si >= lbound && si <= ubound)
|
||||||
@ -1632,8 +1644,8 @@ static const qse_byte_t* match_charset (
|
|||||||
|
|
||||||
#ifdef DEBUG_REX
|
#ifdef DEBUG_REX
|
||||||
qse_dprintf (
|
qse_dprintf (
|
||||||
QSE_T("match_charset: lbound = %u, ubound = %u\n"),
|
QSE_T("match_charset: lbound=%lu, ubound=%lu\n"),
|
||||||
(unsigned int)cp->lbound, (unsigned int)cp->ubound);
|
(unsigned long)cp->lbound, (unsigned long)cp->ubound);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
mat->matched = QSE_FALSE;
|
mat->matched = QSE_FALSE;
|
||||||
|
Loading…
Reference in New Issue
Block a user