2007-05-02 01:07:00 +00:00
/*
2012-08-16 03:47:55 +00:00
* $ Id $
2009-09-16 04:01:02 +00:00
*
2013-12-31 10:24:12 +00:00
Copyright 2006 - 2014 Chung , Hyung - Hwan .
2009-09-16 04:01:02 +00:00
This file is part of QSE .
2009-01-06 04:40:25 +00:00
2009-09-16 04:01:02 +00:00
QSE is free software : you can redistribute it and / or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation , either version 3 of
the License , or ( at your option ) any later version .
2009-01-06 04:40:25 +00:00
2009-09-16 04:01:02 +00:00
QSE is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU Lesser General Public License for more details .
2009-01-06 04:40:25 +00:00
2009-09-16 04:01:02 +00:00
You should have received a copy of the GNU Lesser General Public
License along with QSE . If not , see < http : //www.gnu.org/licenses/>.
2007-05-02 01:07:00 +00:00
*/
2008-12-21 21:35:07 +00:00
# include <qse/cmn/rex.h>
2009-05-08 07:15:04 +00:00
# include <qse/cmn/chr.h>
2009-12-11 07:03:54 +00:00
# include <qse/cmn/str.h>
# include <qse/cmn/lda.h>
2008-08-19 05:21:48 +00:00
# include "mem.h"
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
# define OCC_MAX QSE_TYPE_MAX(qse_size_t)
2007-05-02 01:07:00 +00:00
2010-05-10 07:44:39 +00:00
/*#define XTRA_DEBUG*/
2009-12-11 07:03:54 +00:00
typedef struct comp_t comp_t ;
struct comp_t
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
qse_rex_t * rex ;
2007-05-02 01:07:00 +00:00
2014-07-08 14:30:42 +00:00
qse_cstr_t re ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
const qse_char_t * ptr ;
const qse_char_t * end ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
struct
{
qse_cint_t value ;
int escaped ;
} c ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
qse_size_t gdepth ; /* group depth */
qse_rex_node_t * start ;
2007-05-02 01:07:00 +00:00
} ;
2009-12-11 07:03:54 +00:00
typedef struct exec_t exec_t ;
struct exec_t
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
qse_rex_t * rex ;
2007-05-02 01:07:00 +00:00
struct
{
2008-12-21 21:35:07 +00:00
const qse_char_t * ptr ;
const qse_char_t * end ;
2009-12-11 07:03:54 +00:00
} str ;
2007-05-02 01:07:00 +00:00
struct
{
2009-12-11 07:03:54 +00:00
const qse_char_t * ptr ;
const qse_char_t * end ;
} sub ;
2007-05-02 01:07:00 +00:00
struct
{
2009-12-11 07:03:54 +00:00
int active ;
int pending ;
qse_lda_t set [ 2 ] ; /* candidate arrays */
} cand ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
qse_size_t nmatches ;
const qse_char_t * matchend ; /* 1 character past the match end */
2007-05-02 01:07:00 +00:00
} ;
2009-12-11 07:03:54 +00:00
typedef struct pair_t pair_t ;
struct pair_t
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
qse_rex_node_t * head ;
qse_rex_node_t * tail ;
} ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
/* The group_t type defines a structure to maintain the nested
* traces of subgroups . The actual traces are maintained in a stack
* of sinlgly linked group_t elements . The head element acts
* as a management element where the occ field is a reference count
* and the node field is QSE_NULL always
*/
typedef struct group_t group_t ;
struct group_t
{
qse_rex_node_t * node ;
qse_size_t occ ;
group_t * next ;
} ;
2009-05-16 07:31:43 +00:00
2009-12-11 07:03:54 +00:00
typedef struct cand_t cand_t ;
struct cand_t
{
qse_rex_node_t * node ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
/* occurrence */
qse_size_t occ ;
/* the stack of groups that this candidate belongs to.
* it is in the singliy linked list form */
group_t * group ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
/* match pointer. the number of character advancement
* differs across various node types . BOL and EOL don ' t advance to
* the next character on match while ANY and CHAR do on match .
* therefore , the match pointer is managed per candidate basis . */
const qse_char_t * mptr ;
2007-05-02 01:07:00 +00:00
} ;
2011-09-01 09:43:46 +00:00
int qse_rex_init ( qse_rex_t * rex , qse_mmgr_t * mmgr , qse_rex_node_t * code )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
QSE_MEMSET ( rex , 0 , QSE_SIZEOF ( * rex ) ) ;
rex - > mmgr = mmgr ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
QSE_ASSERT ( code = = QSE_NULL | | code - > id = = QSE_REX_NODE_START ) ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
/* note that passing a compiled expression to qse_rex_open()
* is to delegate it to this rex object . when this rex object
* is closed , the code delegated is destroyed . */
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
rex - > code = code ;
2011-09-01 09:43:46 +00:00
return 0 ;
2009-12-11 07:03:54 +00:00
}
2007-05-02 01:07:00 +00:00
2014-07-11 14:17:00 +00:00
qse_rex_t * qse_rex_open ( qse_mmgr_t * mmgr , qse_size_t xtnsize , qse_rex_node_t * code )
2009-12-11 07:03:54 +00:00
{
qse_rex_t * rex ;
2007-05-02 01:07:00 +00:00
2014-07-11 14:17:00 +00:00
rex = ( qse_rex_t * ) QSE_MMGR_ALLOC ( mmgr , QSE_SIZEOF ( qse_rex_t ) + xtnsize ) ;
2009-12-11 07:03:54 +00:00
if ( rex = = QSE_NULL ) return QSE_NULL ;
2007-05-02 01:07:00 +00:00
2011-09-01 09:43:46 +00:00
if ( qse_rex_init ( rex , mmgr , code ) < = - 1 )
2011-05-23 08:14:36 +00:00
{
QSE_MMGR_FREE ( mmgr , rex ) ;
return QSE_NULL ;
}
2014-07-11 14:17:00 +00:00
QSE_MEMSET ( QSE_XTN ( rex ) , 0 , xtnsize ) ;
2011-05-23 08:14:36 +00:00
return rex ;
2009-05-08 07:15:04 +00:00
}
2009-12-11 07:03:54 +00:00
static void freenode ( qse_rex_node_t * node , qse_mmgr_t * mmgr )
2009-05-08 07:15:04 +00:00
{
2009-12-11 07:03:54 +00:00
if ( node - > id = = QSE_REX_NODE_CSET )
{
if ( node - > u . cset . member ! = QSE_NULL )
qse_str_close ( node - > u . cset . member ) ;
}
2009-05-08 07:15:04 +00:00
2009-12-11 07:03:54 +00:00
QSE_MMGR_FREE ( mmgr , node ) ;
2009-05-08 07:15:04 +00:00
}
2009-12-11 07:03:54 +00:00
static void freeallnodes ( qse_rex_node_t * start )
2009-05-08 07:15:04 +00:00
{
2009-12-11 07:03:54 +00:00
qse_rex_node_t * x , * y ;
qse_mmgr_t * mmgr ;
2009-05-08 07:15:04 +00:00
2009-12-11 07:03:54 +00:00
QSE_ASSERT ( start - > id = = QSE_REX_NODE_START ) ;
2009-05-08 07:15:04 +00:00
2009-12-11 07:03:54 +00:00
mmgr = start - > u . s . mmgr ;
x = start - > u . s . link ;
while ( x ! = QSE_NULL )
{
2010-05-10 07:44:39 +00:00
y = x ; x = x - > link ;
freenode ( y , mmgr ) ;
2009-12-11 07:03:54 +00:00
}
QSE_MMGR_FREE ( mmgr , start ) ;
2009-05-08 07:15:04 +00:00
}
2009-12-11 07:03:54 +00:00
void qse_rex_fini ( qse_rex_t * rex )
2009-05-08 07:15:04 +00:00
{
2009-12-11 07:03:54 +00:00
if ( rex - > code ! = QSE_NULL )
{
freeallnodes ( rex - > code ) ;
rex - > code = QSE_NULL ;
}
2009-05-08 07:15:04 +00:00
}
2009-12-11 07:03:54 +00:00
void qse_rex_close ( qse_rex_t * rex )
2009-05-08 07:15:04 +00:00
{
2009-12-11 07:03:54 +00:00
qse_rex_fini ( rex ) ;
QSE_MMGR_FREE ( rex - > mmgr , rex ) ;
2009-05-08 07:15:04 +00:00
}
2012-12-01 13:13:13 +00:00
qse_mmgr_t * qse_rex_getmmgr ( qse_rex_t * rex )
{
return rex - > mmgr ;
}
void * qse_rex_getxtn ( qse_rex_t * rex )
{
return QSE_XTN ( rex ) ;
}
2009-12-11 07:03:54 +00:00
qse_rex_node_t * qse_rex_yield ( qse_rex_t * rex )
2009-05-08 07:15:04 +00:00
{
2009-12-11 07:03:54 +00:00
qse_rex_node_t * code = rex - > code ;
rex - > code = QSE_NULL ;
return code ;
2009-05-08 07:15:04 +00:00
}
2012-02-25 14:52:26 +00:00
int qse_rex_getoption ( const qse_rex_t * rex )
2009-05-08 07:15:04 +00:00
{
2009-12-11 07:03:54 +00:00
return rex - > option ;
2009-05-08 07:15:04 +00:00
}
2009-12-11 07:03:54 +00:00
void qse_rex_setoption ( qse_rex_t * rex , int opts )
2009-05-08 07:15:04 +00:00
{
2009-12-11 07:03:54 +00:00
rex - > option = opts ;
2009-05-08 07:15:04 +00:00
}
2012-02-25 14:52:26 +00:00
qse_rex_errnum_t qse_rex_geterrnum ( const qse_rex_t * rex )
2009-05-08 07:15:04 +00:00
{
2009-12-11 07:03:54 +00:00
return rex - > errnum ;
2009-05-08 07:15:04 +00:00
}
2012-02-25 14:52:26 +00:00
const qse_char_t * qse_rex_geterrmsg ( const qse_rex_t * rex )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
static const qse_char_t * errstr [ ] =
{
QSE_T ( " no error " ) ,
2013-07-22 13:27:00 +00:00
QSE_T ( " other error " ) ,
QSE_T ( " not implemented " ) ,
QSE_T ( " subsystem error " ) ,
QSE_T ( " internal error that should never have happened " ) ,
2009-12-11 07:03:54 +00:00
QSE_T ( " no sufficient memory available " ) ,
QSE_T ( " no expression compiled " ) ,
QSE_T ( " recursion too deep " ) ,
QSE_T ( " right parenthesis expected " ) ,
QSE_T ( " right bracket expected " ) ,
QSE_T ( " right brace expected " ) ,
QSE_T ( " colon expected " ) ,
QSE_T ( " invalid character range " ) ,
QSE_T ( " invalid character class " ) ,
QSE_T ( " invalid occurrence bound " ) ,
QSE_T ( " special character at wrong position " ) ,
QSE_T ( " premature expression end " )
} ;
return ( rex - > errnum > = 0 & & rex - > errnum < QSE_COUNTOF ( errstr ) ) ?
errstr [ rex - > errnum ] : QSE_T ( " unknown error " ) ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
static qse_rex_node_t * newnode ( comp_t * c , qse_rex_node_id_t id )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
qse_rex_node_t * node ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
/* TODO: performance optimization.
* preallocate a large chunk of memory and allocate a node
* from the chunk . increase the chunk if it has been used up .
*/
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
node = ( qse_rex_node_t * )
QSE_MMGR_ALLOC ( c - > rex - > mmgr , QSE_SIZEOF ( qse_rex_node_t ) ) ;
if ( node = = QSE_NULL )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
c - > rex - > errnum = QSE_REX_ENOMEM ;
2008-12-21 21:35:07 +00:00
return QSE_NULL ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
QSE_MEMSET ( node , 0 , QSE_SIZEOF ( * node ) ) ;
node - > id = id ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( c - > start ! = QSE_NULL )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
QSE_ASSERT ( c - > start - > id = = QSE_REX_NODE_START ) ;
node - > link = c - > start - > u . s . link ;
c - > start - > u . s . link = node ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
return node ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
static qse_rex_node_t * newstartnode ( comp_t * c )
{
qse_rex_node_t * n = newnode ( c , QSE_REX_NODE_START ) ;
if ( n ! = QSE_NULL )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
n - > u . s . mmgr = c - > rex - > mmgr ;
n - > u . s . link = QSE_NULL ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
return n ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
static qse_rex_node_t * newendnode ( comp_t * c )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
return newnode ( c , QSE_REX_NODE_END ) ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
static qse_rex_node_t * newnopnode ( comp_t * c )
{
return newnode ( c , QSE_REX_NODE_NOP ) ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
static qse_rex_node_t * newgroupnode ( comp_t * c )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
return newnode ( c , QSE_REX_NODE_GROUP ) ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
static qse_rex_node_t * newgroupendnode ( comp_t * c , qse_rex_node_t * group )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
qse_rex_node_t * n = newnode ( c , QSE_REX_NODE_GROUPEND ) ;
if ( n ! = QSE_NULL ) n - > u . ge . group = group ;
return n ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
static qse_rex_node_t * newcharnode ( comp_t * c , qse_char_t ch )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
qse_rex_node_t * n = newnode ( c , QSE_REX_NODE_CHAR ) ;
if ( n ! = QSE_NULL ) n - > u . c = ch ;
return n ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
static qse_rex_node_t * newbranchnode (
2010-05-10 07:44:39 +00:00
comp_t * c , qse_rex_node_t * left , qse_rex_node_t * alter )
2009-12-11 07:03:54 +00:00
{
qse_rex_node_t * n = newnode ( c , QSE_REX_NODE_BRANCH ) ;
if ( n ! = QSE_NULL )
2007-05-02 01:07:00 +00:00
{
2010-05-10 07:44:39 +00:00
/*n->u.b.left = left; */
n - > next = left ;
n - > u . b . alter = alter ;
2007-05-02 01:07:00 +00:00
}
return n ;
}
2009-12-11 07:03:54 +00:00
# define CHECK_END(com) \
do { \
if ( com - > ptr > = com - > end ) \
{ \
com - > rex - > errnum = QSE_REX_EPREEND ; \
return - 1 ; \
} \
} while ( 0 )
# define IS_HEX(c) \
( ( c > = QSE_T ( ' 0 ' ) & & c < = QSE_T ( ' 9 ' ) ) | | \
( c > = QSE_T ( ' A ' ) & & c < = QSE_T ( ' F ' ) ) | | \
( c > = QSE_T ( ' a ' ) & & c < = QSE_T ( ' f ' ) ) )
# define HEX_TO_NUM(c) \
( ( c > = QSE_T ( ' 0 ' ) & & c < = QSE_T ( ' 9 ' ) ) ? c - QSE_T ( ' 0 ' ) : \
( c > = QSE_T ( ' A ' ) & & c < = QSE_T ( ' F ' ) ) ? c - QSE_T ( ' A ' ) + 10 : \
c - QSE_T ( ' a ' ) + 10 )
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
# define IS_SPE(com,ch) ((com)->c.value == (ch) && !(com)->c.escaped)
# define IS_ESC(com) ((com)->c.escaped)
# define IS_EOF(com) ((com)->c.value == QSE_CHAR_EOF)
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
# define getc_noesc(c) getc(c,1)
# define getc_esc(c) getc(c,0)
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
static int getc ( comp_t * com , int noesc )
{
qse_char_t c ;
if ( com - > ptr > = com - > end )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
com - > c . value = QSE_CHAR_EOF ;
com - > c . escaped = 0 ;
2007-05-02 01:07:00 +00:00
return 0 ;
}
2009-12-11 07:03:54 +00:00
com - > c . value = * com - > ptr + + ;
com - > c . escaped = 0 ;
if ( noesc | | com - > c . value ! = QSE_T ( ' \\ ' ) ) return 0 ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
CHECK_END ( com ) ;
c = * com - > ptr + + ;
if ( c = = QSE_T ( ' n ' ) ) c = QSE_T ( ' \n ' ) ;
else if ( c = = QSE_T ( ' r ' ) ) c = QSE_T ( ' \r ' ) ;
else if ( c = = QSE_T ( ' t ' ) ) c = QSE_T ( ' \t ' ) ;
else if ( c = = QSE_T ( ' f ' ) ) c = QSE_T ( ' \f ' ) ;
else if ( c = = QSE_T ( ' b ' ) ) c = QSE_T ( ' \b ' ) ;
else if ( c = = QSE_T ( ' v ' ) ) c = QSE_T ( ' \v ' ) ;
else if ( c = = QSE_T ( ' a ' ) ) c = QSE_T ( ' \a ' ) ;
2010-07-09 00:58:44 +00:00
#if 0
/* backrefernce conflicts with octal notation */
2009-12-11 07:03:54 +00:00
else if ( c > = QSE_T ( ' 0 ' ) & & c < = QSE_T ( ' 7 ' ) )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
qse_char_t cx ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
c = c - QSE_T ( ' 0 ' ) ;
CHECK_END ( com ) ;
cx = * com - > ptr + + ;
if ( cx > = QSE_T ( ' 0 ' ) & & cx < = QSE_T ( ' 7 ' ) )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
c = c * 8 + cx - QSE_T ( ' 0 ' ) ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
CHECK_END ( com ) ;
cx = * com - > ptr + + ;
if ( cx > = QSE_T ( ' 0 ' ) & & cx < = QSE_T ( ' 7 ' ) )
{
c = c * 8 + cx - QSE_T ( ' 0 ' ) ;
}
}
2007-05-02 01:07:00 +00:00
}
2010-07-09 00:58:44 +00:00
# endif
2009-12-11 07:03:54 +00:00
else if ( c = = QSE_T ( ' x ' ) )
{
qse_char_t cx ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
CHECK_END ( com ) ;
cx = * com - > ptr + + ;
if ( IS_HEX ( cx ) )
{
c = HEX_TO_NUM ( cx ) ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
CHECK_END ( com ) ;
cx = * com - > ptr + + ;
if ( IS_HEX ( cx ) )
{
c = c * 16 + HEX_TO_NUM ( cx ) ;
}
}
}
2014-11-14 02:44:20 +00:00
# if defined(QSE_CHAR_IS_WCHAR)
2009-12-11 07:03:54 +00:00
else if ( c = = QSE_T ( ' u ' ) & & QSE_SIZEOF ( qse_char_t ) > = 2 )
{
qse_char_t cx ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
CHECK_END ( com ) ;
cx = * com - > ptr + + ;
if ( IS_HEX ( cx ) )
{
qse_size_t i ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
c = HEX_TO_NUM ( cx ) ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
for ( i = 0 ; i < 3 ; i + + )
{
CHECK_END ( com ) ;
cx = * com - > ptr + + ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( ! IS_HEX ( cx ) ) break ;
c = c * 16 + HEX_TO_NUM ( cx ) ;
}
}
}
else if ( c = = QSE_T ( ' U ' ) & & QSE_SIZEOF ( qse_char_t ) > = 4 )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
qse_char_t cx ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
CHECK_END ( com ) ;
cx = * com - > ptr + + ;
if ( IS_HEX ( cx ) )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
qse_size_t i ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
c = HEX_TO_NUM ( cx ) ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
for ( i = 0 ; i < 7 ; i + + )
{
CHECK_END ( com ) ;
cx = * com - > ptr + + ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( ! IS_HEX ( cx ) ) break ;
c = c * 16 + HEX_TO_NUM ( cx ) ;
}
}
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
# endif
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
com - > c . value = c ;
com - > c . escaped = QSE_TRUE ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
#if 0
com - > c = ( com - > ptr < com - > end ) ? * com - > ptr + + : QSE_CHAR_EOF ;
if ( com - > c = = QSE_CHAR_EOF )
qse_printf ( QSE_T ( " getc => <EOF> \n " ) ) ;
else qse_printf ( QSE_T ( " getc => %c \n " ) , com - > c ) ;
# endif
return 0 ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
struct ccinfo_t
2007-05-02 01:07:00 +00:00
{
2014-07-08 14:30:42 +00:00
qse_cstr_t name ;
2009-12-11 07:03:54 +00:00
int ( * func ) ( exec_t * e , qse_char_t c ) ;
} ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
# define ISBLANK(c) ((c) == QSE_T(' ') || (c) == QSE_T('\t'))
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
static int cc_isalnum ( exec_t * e , qse_char_t c ) { return QSE_ISALNUM ( c ) ; }
static int cc_isalpha ( exec_t * e , qse_char_t c ) { return QSE_ISALPHA ( c ) ; }
2011-08-22 23:26:26 +00:00
static int cc_isblank ( exec_t * e , qse_char_t c ) { return QSE_ISBLANK ( c ) ; }
2009-12-11 07:03:54 +00:00
static int cc_iscntrl ( exec_t * e , qse_char_t c ) { return QSE_ISCNTRL ( c ) ; }
static int cc_isdigit ( exec_t * e , qse_char_t c ) { return QSE_ISDIGIT ( c ) ; }
static int cc_isgraph ( exec_t * e , qse_char_t c ) { return QSE_ISGRAPH ( c ) ; }
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
static int cc_islower ( exec_t * e , qse_char_t c )
{
if ( e - > rex - > option & QSE_REX_IGNORECASE ) return ! 0 ;
return QSE_ISLOWER ( c ) ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
static int cc_isprint ( exec_t * e , qse_char_t c ) { return QSE_ISPRINT ( c ) ; }
static int cc_ispunct ( exec_t * e , qse_char_t c ) { return QSE_ISPUNCT ( c ) ; }
static int cc_isspace ( exec_t * e , qse_char_t c ) { return QSE_ISSPACE ( c ) ; }
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
static int cc_isupper ( exec_t * e , qse_char_t c )
{
if ( e - > rex - > option & QSE_REX_IGNORECASE ) return ! 0 ;
return QSE_ISUPPER ( c ) ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
static int cc_isxdigit ( exec_t * e , qse_char_t c ) { return QSE_ISXDIGIT ( c ) ; }
2007-05-02 01:07:00 +00:00
2010-07-09 00:58:44 +00:00
static int cc_isword ( exec_t * e , qse_char_t c )
{
return QSE_ISALNUM ( c ) | | c = = QSE_T ( ' _ ' ) ;
}
2009-12-11 07:03:54 +00:00
static struct ccinfo_t ccinfo [ ] =
{
{ { QSE_T ( " alnum " ) , 5 } , cc_isalnum } ,
{ { QSE_T ( " alpha " ) , 5 } , cc_isalpha } ,
{ { QSE_T ( " blank " ) , 5 } , cc_isblank } ,
{ { QSE_T ( " cntrl " ) , 5 } , cc_iscntrl } ,
{ { QSE_T ( " digit " ) , 5 } , cc_isdigit } ,
{ { QSE_T ( " graph " ) , 5 } , cc_isgraph } ,
{ { QSE_T ( " lower " ) , 5 } , cc_islower } ,
{ { QSE_T ( " print " ) , 5 } , cc_isprint } ,
{ { QSE_T ( " punct " ) , 5 } , cc_ispunct } ,
{ { QSE_T ( " space " ) , 5 } , cc_isspace } ,
{ { QSE_T ( " upper " ) , 5 } , cc_isupper } ,
{ { QSE_T ( " xdigit " ) , 6 } , cc_isxdigit } ,
2010-07-09 00:58:44 +00:00
{ { QSE_T ( " word " ) , 4 } , cc_isword } ,
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
/*
{ { QSE_T ( " arabic " ) , 6 } , cc_isarabic } ,
{ { QSE_T ( " chinese " ) , 7 } , cc_ischinese } ,
{ { QSE_T ( " english " ) , 7 } , cc_isenglish } ,
{ { QSE_T ( " japanese " ) , 8 } , cc_isjapanese } ,
{ { QSE_T ( " korean " ) , 6 } , cc_iskorean } ,
{ { QSE_T ( " thai " ) , 4 } , cc_isthai } ,
*/
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
{ { QSE_NULL , 0 } , QSE_NULL }
} ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
static int charclass ( comp_t * com )
{
const struct ccinfo_t * ccp = ccinfo ;
qse_size_t len = com - > end - com - > ptr ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
while ( ccp - > name . ptr ! = QSE_NULL )
{
if ( qse_strxbeg ( com - > ptr , len , ccp - > name . ptr ) ! = QSE_NULL ) break ;
ccp + + ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( ccp - > name . ptr = = QSE_NULL )
{
/* wrong class name */
com - > rex - > errnum = QSE_REX_ECCLASS ;
return - 1 ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
com - > ptr + = ccp - > name . len ;
if ( getc_noesc ( com ) < = - 1 ) return - 1 ;
if ( com - > c . value ! = QSE_T ( ' : ' ) )
{
com - > rex - > errnum = QSE_REX_ECCLASS ;
return - 1 ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
if ( getc_noesc ( com ) < = - 1 ) return - 1 ;
if ( com - > c . value ! = QSE_T ( ' ] ' ) )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
com - > rex - > errnum = QSE_REX_ERBRACK ;
return - 1 ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( getc_esc ( com ) < = - 1 ) return - 1 ;
return ( int ) ( ccp - ccinfo ) ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
# define ADD_CSET_CODE(com,node,code,len) \
do { if ( add_cset_code ( com , node , code , len ) < = - 1 ) return - 1 ; } while ( 0 )
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
static int add_cset_code (
comp_t * com , qse_rex_node_t * node , const qse_char_t * c , qse_size_t l )
{
if ( qse_str_ncat ( node - > u . cset . member , c , l ) = = ( qse_size_t ) - 1 )
{
com - > rex - > errnum = QSE_REX_ENOMEM ;
return - 1 ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
return 0 ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
static int charset ( comp_t * com , qse_rex_node_t * node )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
QSE_ASSERT ( node - > id = = QSE_REX_NODE_CSET ) ;
QSE_ASSERT ( node - > u . cset . negated = = 0 ) ;
QSE_ASSERT ( node - > u . cset . member = = QSE_NULL ) ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( IS_SPE ( com , QSE_T ( ' ^ ' ) ) )
{
/* negate an expression */
node - > u . cset . negated = 1 ;
if ( getc_noesc ( com ) < = - 1 ) return - 1 ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
/* initialize the member array */
node - > u . cset . member = qse_str_open ( com - > rex - > mmgr , 0 , 64 ) ;
if ( node - > u . cset . member = = QSE_NULL )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
com - > rex - > errnum = QSE_REX_ENOMEM ;
return - 1 ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
/* if ] is the first character or the second character following ^,
* it is treated literally */
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
do
{
int x1 , x2 ;
qse_char_t c1 , c2 ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
x1 = com - > c . escaped ;
c1 = com - > c . value ;
if ( c1 = = QSE_CHAR_EOF )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
com - > rex - > errnum = QSE_REX_EPREEND ;
return - 1 ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
if ( getc_esc ( com ) < = - 1 ) return - 1 ;
x2 = com - > c . escaped ;
c2 = com - > c . value ;
if ( ! x1 & & c1 = = QSE_T ( ' [ ' ) & &
! x2 & & c2 = = QSE_T ( ' : ' ) )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
int n ;
qse_char_t tmp [ 2 ] ;
/* begins with [:
* don ' t read in the next character as charclass ( )
* matches a class name differently from other routines .
* if ( getc_noesc ( com ) < = - 1 ) return - 1 ;
*/
if ( ( n = charclass ( com ) ) < = - 1 ) return - 1 ;
QSE_ASSERT ( n < QSE_TYPE_MAX ( qse_char_t ) ) ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
tmp [ 0 ] = QSE_REX_CSET_CLASS ;
tmp [ 1 ] = n ;
ADD_CSET_CODE ( com , node , tmp , QSE_COUNTOF ( tmp ) ) ;
}
else if ( ! x2 & & c2 = = QSE_T ( ' - ' ) )
{
if ( getc_esc ( com ) < = - 1 ) return - 1 ;
if ( IS_SPE ( com , QSE_T ( ' ] ' ) ) )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
qse_char_t tmp [ 4 ] ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
/* '-' is the last character in the set.
* treat it literally */
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
tmp [ 0 ] = QSE_REX_CSET_CHAR ;
tmp [ 1 ] = c1 ;
tmp [ 2 ] = QSE_REX_CSET_CHAR ;
tmp [ 3 ] = c2 ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
ADD_CSET_CODE ( com , node , tmp , QSE_COUNTOF ( tmp ) ) ;
break ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( c1 > com - > c . value )
{
/* range end must be >= range start */
com - > rex - > errnum = QSE_REX_ECRANGE ;
return - 1 ;
}
else if ( c1 = = com - > c . value )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
/* if two chars in the range are the same,
* treat it as a single character */
qse_char_t tmp [ 2 ] ;
tmp [ 0 ] = QSE_REX_CSET_CHAR ;
tmp [ 1 ] = c1 ;
ADD_CSET_CODE ( com , node , tmp , QSE_COUNTOF ( tmp ) ) ;
2007-05-02 01:07:00 +00:00
}
else
{
2009-12-11 07:03:54 +00:00
qse_char_t tmp [ 3 ] ;
tmp [ 0 ] = QSE_REX_CSET_RANGE ;
tmp [ 1 ] = c1 ;
tmp [ 2 ] = com - > c . value ;
ADD_CSET_CODE ( com , node , tmp , QSE_COUNTOF ( tmp ) ) ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
if ( getc_esc ( com ) < = - 1 ) return - 1 ;
2007-05-02 01:07:00 +00:00
}
else
{
2009-12-11 07:03:54 +00:00
qse_char_t tmp [ 2 ] ;
tmp [ 0 ] = QSE_REX_CSET_CHAR ;
tmp [ 1 ] = c1 ;
ADD_CSET_CODE ( com , node , tmp , QSE_COUNTOF ( tmp ) ) ;
2007-05-02 01:07:00 +00:00
}
}
2009-12-11 07:03:54 +00:00
while ( ! IS_SPE ( com , QSE_T ( ' ] ' ) ) ) ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( getc_esc ( com ) < = - 1 ) return - 1 ;
return 0 ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
static int occbound ( comp_t * com , qse_rex_node_t * n )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
qse_size_t bound ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
bound = 0 ;
while ( com - > c . value > = QSE_T ( ' 0 ' ) & & com - > c . value < = QSE_T ( ' 9 ' ) )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
bound = bound * 10 + com - > c . value - QSE_T ( ' 0 ' ) ;
if ( getc_noesc ( com ) < = - 1 ) return - 1 ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
n - > occ . min = bound ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( com - > c . value = = QSE_T ( ' , ' ) )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
if ( getc_noesc ( com ) < = - 1 ) return - 1 ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( com - > c . value > = QSE_T ( ' 0 ' ) & & com - > c . value < = QSE_T ( ' 9 ' ) )
2007-05-02 01:07:00 +00:00
{
2009-06-18 06:43:50 +00:00
bound = 0 ;
2007-05-02 01:07:00 +00:00
2009-06-18 06:43:50 +00:00
do
{
2009-12-11 07:03:54 +00:00
bound = bound * 10 + com - > c . value - QSE_T ( ' 0 ' ) ;
if ( getc_noesc ( com ) < = - 1 ) return - 1 ;
2009-06-18 06:43:50 +00:00
}
2009-12-11 07:03:54 +00:00
while ( com - > c . value > = QSE_T ( ' 0 ' ) & &
com - > c . value < = QSE_T ( ' 9 ' ) ) ;
2009-06-18 06:43:50 +00:00
2009-12-11 07:03:54 +00:00
n - > occ . max = bound ;
2009-06-18 06:43:50 +00:00
}
2009-12-11 07:03:54 +00:00
else n - > occ . max = OCC_MAX ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
else n - > occ . max = n - > occ . min ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( n - > occ . min > n - > occ . min )
2007-05-02 01:07:00 +00:00
{
/* invalid occurrences range */
2009-12-11 07:03:54 +00:00
com - > rex - > errnum = QSE_REX_EBOUND ;
return - 1 ;
}
if ( com - > c . value ! = QSE_T ( ' } ' ) )
{
com - > rex - > errnum = QSE_REX_ERBRACE ;
2007-05-02 01:07:00 +00:00
return - 1 ;
}
2009-12-11 07:03:54 +00:00
if ( getc_esc ( com ) < = - 1 ) return - 1 ;
2007-05-02 01:07:00 +00:00
return 0 ;
}
2010-05-10 07:44:39 +00:00
static qse_rex_node_t * comp_branches ( comp_t * com , qse_rex_node_t * ge ) ;
2007-12-05 21:18:15 +00:00
2010-05-10 07:44:39 +00:00
static qse_rex_node_t * comp_group ( comp_t * com )
2007-05-02 01:07:00 +00:00
{
2010-05-10 07:44:39 +00:00
/* enter a subgroup */
qse_rex_node_t * body , * g , * ge ;
2007-05-02 01:07:00 +00:00
2010-05-10 07:44:39 +00:00
g = newgroupnode ( com ) ;
if ( g = = QSE_NULL ) return QSE_NULL ;
ge = newgroupendnode ( com , g ) ;
if ( ge = = QSE_NULL ) return QSE_NULL ;
/* skip '(' */
if ( getc_esc ( com ) < = - 1 ) return QSE_NULL ;
com - > gdepth + + ;
/* pass the GROUPEND node so that the
* last node in the subgroup links to
* this GROUPEND node . */
body = comp_branches ( com , ge ) ;
if ( body = = QSE_NULL ) return QSE_NULL ;
if ( ! IS_SPE ( com , QSE_T ( ' ) ' ) ) )
2009-12-11 07:03:54 +00:00
{
2010-05-10 07:44:39 +00:00
com - > rex - > errnum = QSE_REX_ERPAREN ;
return QSE_NULL ;
}
2007-12-05 21:18:15 +00:00
2010-05-10 07:44:39 +00:00
com - > gdepth - - ;
2007-12-05 21:18:15 +00:00
2010-05-10 07:44:39 +00:00
/* skip ')' */
if ( getc_esc ( com ) < = - 1 ) return QSE_NULL ;
2007-12-05 21:18:15 +00:00
2010-05-10 07:44:39 +00:00
g - > u . g . head = body ;
g - > u . g . end = ge ;
2007-12-05 21:18:15 +00:00
2010-05-10 07:44:39 +00:00
return g ;
}
2007-12-05 21:18:15 +00:00
2010-05-10 07:44:39 +00:00
static qse_rex_node_t * comp_occ ( comp_t * com , qse_rex_node_t * atom )
{
switch ( com - > c . value )
{
case QSE_T ( ' ? ' ) :
atom - > occ . min = 0 ;
atom - > occ . max = 1 ;
if ( getc_esc ( com ) < = - 1 ) return QSE_NULL ;
break ;
case QSE_T ( ' * ' ) :
atom - > occ . min = 0 ;
atom - > occ . max = OCC_MAX ;
if ( getc_esc ( com ) < = - 1 ) return QSE_NULL ;
break ;
2007-12-05 21:18:15 +00:00
2010-05-10 07:44:39 +00:00
case QSE_T ( ' + ' ) :
atom - > occ . min = 1 ;
atom - > occ . max = OCC_MAX ;
if ( getc_esc ( com ) < = - 1 ) return QSE_NULL ;
break ;
case QSE_T ( ' { ' ) :
if ( ! ( com - > rex - > option & QSE_REX_NOBOUND ) )
{
2013-07-24 04:10:02 +00:00
if ( getc_noesc ( com ) < = - 1 | |
occbound ( com , atom ) < = - 1 ) return QSE_NULL ;
2010-05-10 07:44:39 +00:00
}
break ;
}
2009-12-11 07:03:54 +00:00
2010-05-10 07:44:39 +00:00
return atom ;
}
static qse_rex_node_t * comp_atom ( comp_t * com )
{
qse_rex_node_t * atom ;
2009-12-11 07:03:54 +00:00
2010-05-10 07:44:39 +00:00
if ( ! IS_ESC ( com ) )
{
switch ( com - > c . value )
{
case QSE_T ( ' ( ' ) :
atom = comp_group ( com ) ;
if ( atom = = QSE_NULL ) return QSE_NULL ;
2009-12-11 07:03:54 +00:00
break ;
case QSE_T ( ' . ' ) :
2010-05-10 07:44:39 +00:00
atom = newnode ( com , QSE_REX_NODE_ANY ) ;
if ( atom = = QSE_NULL ) return QSE_NULL ;
2009-12-11 07:03:54 +00:00
if ( getc_esc ( com ) < = - 1 ) return QSE_NULL ;
break ;
2007-12-05 21:18:15 +00:00
2009-12-11 07:03:54 +00:00
case QSE_T ( ' ^ ' ) :
2010-05-10 07:44:39 +00:00
atom = newnode ( com , QSE_REX_NODE_BOL ) ;
if ( atom = = QSE_NULL ) return QSE_NULL ;
2009-12-11 07:03:54 +00:00
if ( getc_esc ( com ) < = - 1 ) return QSE_NULL ;
break ;
case QSE_T ( ' $ ' ) :
2010-05-10 07:44:39 +00:00
atom = newnode ( com , QSE_REX_NODE_EOL ) ;
if ( atom = = QSE_NULL ) return QSE_NULL ;
2009-12-11 07:03:54 +00:00
if ( getc_esc ( com ) < = - 1 ) return QSE_NULL ;
break ;
2007-12-05 21:18:15 +00:00
2009-12-11 07:03:54 +00:00
case QSE_T ( ' [ ' ) :
2010-05-10 07:44:39 +00:00
atom = newnode ( com , QSE_REX_NODE_CSET ) ;
if ( atom = = QSE_NULL ) return QSE_NULL ;
2009-12-11 07:03:54 +00:00
if ( getc_esc ( com ) < = - 1 ) return QSE_NULL ;
2010-05-10 07:44:39 +00:00
if ( charset ( com , atom ) < = - 1 ) return QSE_NULL ;
2009-12-11 07:03:54 +00:00
break ;
2007-12-05 21:18:15 +00:00
2009-12-11 07:03:54 +00:00
default :
if ( com - > rex - > option & QSE_REX_STRICT )
2007-12-05 21:18:15 +00:00
{
2014-11-18 16:10:12 +00:00
/* check if a special charcter is at the
* position that requires a normal character . */
switch ( com - > c . value )
2009-12-11 07:03:54 +00:00
{
2014-11-18 16:10:12 +00:00
case QSE_T ( ' { ' ) :
/* { is a normal charcter when bound is disabled */
if ( com - > rex - > option & QSE_REX_NOBOUND ) break ;
case QSE_T ( ' ) ' ) :
case QSE_T ( ' ? ' ) :
case QSE_T ( ' * ' ) :
case QSE_T ( ' + ' ) :
/* it's at the wrong postion */
com - > rex - > errnum = QSE_REX_ESPCAWP ;
return QSE_NULL ;
2009-12-11 07:03:54 +00:00
}
}
2007-12-05 21:18:15 +00:00
2009-12-11 07:03:54 +00:00
goto normal_char ;
}
2007-05-02 01:07:00 +00:00
}
else
{
2009-12-11 07:03:54 +00:00
normal_char :
/* normal character */
2010-05-10 07:44:39 +00:00
atom = newcharnode ( com , com - > c . value ) ;
if ( atom = = QSE_NULL ) return QSE_NULL ;
2009-12-11 07:03:54 +00:00
if ( getc_esc ( com ) < = - 1 ) return QSE_NULL ;
2007-05-02 01:07:00 +00:00
}
2010-05-10 07:44:39 +00:00
atom - > occ . min = 1 ;
atom - > occ . max = 1 ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( ! IS_ESC ( com ) )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
/* handle the occurrence specifier, if any */
2010-05-10 07:44:39 +00:00
if ( comp_occ ( com , atom ) = = QSE_NULL ) return QSE_NULL ;
}
2007-05-02 01:07:00 +00:00
2010-05-10 07:44:39 +00:00
return atom ;
}
2007-05-02 01:07:00 +00:00
2010-05-11 07:15:55 +00:00
#if 0
static qse_rex_node_t * zero_or_more ( comp_t * c , qse_rex_node_t * atom )
{
qse_rex_node_t * b ;
b = newbranchnode ( c , QSE_NULL , atom ) ;
if ( b = = QSE_NULL ) return QSE_NULL ;
atom - > occ . min = 1 ;
atom - > occ . max = 1 ;
atom - > next = b ;
return b ;
}
static qse_rex_node_t * one_or_more ( comp_t * c , qse_rex_node_t * atom )
{
qse_rex_node_t * b ;
b = newbranchnode ( c , atom , QSE_NULL ) ;
atom - > occ . min = 1 ;
atom - > occ . max = 1 ;
atom - > next = b ;
TODO : return b as the tail . . . .
return atom ;
}
# endif
2010-05-10 07:44:39 +00:00
static qse_rex_node_t * pseudo_group ( comp_t * c , qse_rex_node_t * atom )
{
qse_rex_node_t * g , * ge , * b ;
2007-05-02 01:07:00 +00:00
2010-05-10 07:44:39 +00:00
QSE_ASSERT ( atom - > occ . min < = 0 ) ;
2007-05-02 01:07:00 +00:00
2010-05-10 07:44:39 +00:00
g = newgroupnode ( c ) ;
if ( g = = QSE_NULL ) return QSE_NULL ;
ge = newgroupendnode ( c , g ) ;
if ( ge = = QSE_NULL ) return QSE_NULL ;
b = newbranchnode ( c , atom , ge ) ;
if ( b = = QSE_NULL ) return QSE_NULL ;
atom - > occ . min = 1 ;
atom - > next = ge ;
QSE_ASSERT ( atom - > occ . max > = atom - > occ . min ) ;
g - > occ . max = 1 ;
g - > occ . min = 1 ;
g - > u . g . end = ge ;
g - > u . g . head = b ;
g - > u . g . pseudo = 1 ;
ge - > u . ge . pseudo = 1 ;
return g ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
/* compile a list of atoms at the outermost level and/or
* within a subgroup */
2010-05-10 07:44:39 +00:00
static qse_rex_node_t * comp_branch ( comp_t * c , pair_t * pair )
2007-05-02 01:07:00 +00:00
{
2010-05-10 07:44:39 +00:00
# define REACHED_END(c) \
( IS_EOF ( c ) | | IS_SPE ( c , QSE_T ( ' | ' ) ) | | \
( c - > gdepth > 0 & & IS_SPE ( c , QSE_T ( ' ) ' ) ) ) )
2007-05-02 01:07:00 +00:00
2010-05-10 07:44:39 +00:00
if ( REACHED_END ( c ) )
2007-05-02 01:07:00 +00:00
{
2010-05-10 07:44:39 +00:00
qse_rex_node_t * nop = newnopnode ( c ) ;
if ( nop = = QSE_NULL ) return QSE_NULL ;
nop - > occ . min = 1 ; nop - > occ . max = 1 ;
pair - > head = nop ; pair - > tail = nop ;
}
else
{
pair - > head = QSE_NULL ; pair - > tail = QSE_NULL ;
2007-05-02 01:07:00 +00:00
2010-05-10 07:44:39 +00:00
do
2007-05-02 01:07:00 +00:00
{
2010-05-10 07:44:39 +00:00
qse_rex_node_t * atom = comp_atom ( c ) ;
if ( atom = = QSE_NULL ) return QSE_NULL ;
2010-05-11 07:15:55 +00:00
2010-05-10 07:44:39 +00:00
if ( atom - > occ . min < = 0 )
2009-12-11 07:03:54 +00:00
{
2010-05-11 07:15:55 +00:00
#if 0
if ( atom - > occ . max > = OCC_MAX )
{
/*
* + - - - - - - - - - - - next - - +
* v |
* BR - - alter - - - - > ORG ( atom )
* |
* + - - - - next - - - - - - - - - - - - - - - - - - - >
*
*/
atom = zero_or_more ( c , atom ) ;
}
else
{
# endif
/*
* Given an atom , enclose it with a
* pseudogroup head and a psuedogroup
* tail . the head is followed by a
* branch that conntects to the tail
* and the atom given . The atom given
* gets connected to the tail .
* Head - > BR - > Tail
* - > ORG ( atom ) - > Tail
*/
atom = pseudo_group ( c , atom ) ;
2013-07-24 04:10:02 +00:00
#if 0
2010-05-11 07:15:55 +00:00
}
2013-07-24 04:10:02 +00:00
# endif
2010-05-10 07:44:39 +00:00
if ( atom = = QSE_NULL ) return QSE_NULL ;
2009-12-11 07:03:54 +00:00
}
2010-05-10 07:44:39 +00:00
if ( pair - > tail = = QSE_NULL )
{
QSE_ASSERT ( pair - > head = = QSE_NULL ) ;
pair - > head = atom ;
}
else pair - > tail - > next = atom ;
pair - > tail = atom ;
2007-05-02 01:07:00 +00:00
}
2010-05-10 07:44:39 +00:00
while ( ! REACHED_END ( c ) ) ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
return pair - > head ;
2010-05-10 07:44:39 +00:00
# undef REACHED_END
2007-05-02 01:07:00 +00:00
}
2010-05-10 07:44:39 +00:00
static qse_rex_node_t * comp_branches ( comp_t * c , qse_rex_node_t * ge )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
qse_rex_node_t * left , * right , * tmp ;
pair_t xpair ;
2007-05-02 01:07:00 +00:00
2010-05-10 07:44:39 +00:00
left = comp_branch ( c , & xpair ) ;
2009-12-11 07:03:54 +00:00
if ( left = = QSE_NULL ) return QSE_NULL ;
xpair . tail - > next = ge ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
while ( IS_SPE ( c , QSE_T ( ' | ' ) ) )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
if ( getc_esc ( c ) < = - 1 ) return QSE_NULL ;
2010-05-10 07:44:39 +00:00
right = comp_branch ( c , & xpair ) ;
2009-12-11 07:03:54 +00:00
if ( right = = QSE_NULL ) return QSE_NULL ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
xpair . tail - > next = ge ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
tmp = newbranchnode ( c , left , right ) ;
if ( tmp = = QSE_NULL ) return QSE_NULL ;
left = tmp ;
}
return left ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
qse_rex_node_t * qse_rex_comp (
qse_rex_t * rex , const qse_char_t * ptr , qse_size_t len )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
comp_t c ;
qse_rex_node_t * end , * body ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
c . rex = rex ;
c . re . ptr = ptr ;
c . re . len = len ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
c . ptr = ptr ;
c . end = ptr + len ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
c . c . value = QSE_CHAR_EOF ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
c . gdepth = 0 ;
c . start = QSE_NULL ;
/* read the first character */
if ( getc_esc ( & c ) < = - 1 ) return QSE_NULL ;
c . start = newstartnode ( & c ) ;
if ( c . start = = QSE_NULL ) return QSE_NULL ;
end = newendnode ( & c ) ;
if ( end = = QSE_NULL )
{
freenode ( c . start , c . rex - > mmgr ) ;
return QSE_NULL ;
}
2007-05-02 01:07:00 +00:00
2010-05-10 07:44:39 +00:00
body = comp_branches ( & c , end ) ;
2009-12-11 07:03:54 +00:00
if ( body = = QSE_NULL )
{
freeallnodes ( c . start ) ;
return QSE_NULL ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
c . start - > next = body ;
if ( rex - > code ! = QSE_NULL ) freeallnodes ( rex - > code ) ;
rex - > code = c . start ;
return rex - > code ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
static void freegroupstackmembers ( group_t * gs , qse_mmgr_t * mmgr )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
while ( gs ! = QSE_NULL )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
group_t * next = gs - > next ;
QSE_MMGR_FREE ( mmgr , gs ) ;
gs = next ;
}
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
static void freegroupstack ( group_t * gs , qse_mmgr_t * mmgr )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
QSE_ASSERT ( gs ! = QSE_NULL ) ;
QSE_ASSERTX ( gs - > node = = QSE_NULL ,
2012-12-13 13:07:16 +00:00
" The head of a group stack must point to QSE_NULL for management purpose. " ) ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
freegroupstackmembers ( gs , mmgr ) ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
static void refupgroupstack ( group_t * gs )
{
if ( gs ! = QSE_NULL )
{
QSE_ASSERTX ( gs - > node = = QSE_NULL ,
2012-12-13 13:07:16 +00:00
" The head of a group stack must point to QSE_NULL for management purpose. " ) ;
2009-12-11 07:03:54 +00:00
gs - > occ + + ;
}
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
static void refdowngroupstack ( group_t * gs , qse_mmgr_t * mmgr )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
if ( gs ! = QSE_NULL )
{
QSE_ASSERTX ( gs - > node = = QSE_NULL ,
2012-12-13 13:07:16 +00:00
" The head of a group stack must point to QSE_NULL for management purpose. " ) ;
2009-12-11 07:03:54 +00:00
if ( - - gs - > occ < = 0 )
{
freegroupstack ( gs , mmgr ) ;
}
}
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
static group_t * dupgroupstackmembers ( exec_t * e , group_t * g )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
group_t * yg , * xg = QSE_NULL ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
QSE_ASSERT ( g ! = QSE_NULL ) ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( g - > next ! = QSE_NULL )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
/* TODO: make it non recursive or
* implement stack overflow protection */
xg = dupgroupstackmembers ( e , g - > next ) ;
if ( xg = = QSE_NULL ) return QSE_NULL ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
yg = ( group_t * ) QSE_MMGR_ALLOC ( e - > rex - > mmgr , QSE_SIZEOF ( * yg ) ) ;
if ( yg = = QSE_NULL )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
if ( xg ! = QSE_NULL ) freegroupstack ( xg , e - > rex - > mmgr ) ;
e - > rex - > errnum = QSE_REX_ENOMEM ;
return QSE_NULL ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
QSE_MEMCPY ( yg , g , QSE_SIZEOF ( * yg ) ) ;
yg - > next = xg ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
return yg ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
static group_t * dupgroupstack ( exec_t * e , group_t * gs )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
group_t * head ;
QSE_ASSERT ( gs ! = QSE_NULL ) ;
QSE_ASSERTX ( gs - > node = = QSE_NULL ,
2012-12-13 13:07:16 +00:00
" The head of a group stack must point to QSE_NULL for management purpose. " ) ;
2009-12-11 07:03:54 +00:00
head = dupgroupstackmembers ( e , gs ) ;
if ( head = = QSE_NULL ) return QSE_NULL ;
QSE_ASSERTX (
head - > node = = QSE_NULL & &
head - > node = = gs - > node & &
head - > occ = = gs - > occ ,
" The duplicated stack head must not be corrupted "
) ;
/* reset the reference count of a duplicated stack */
head - > occ = 0 ;
return head ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
/* push 'gn' to the group stack 'gs'.
* if dup is non - zero , the group stack is duplicated and ' gn ' is pushed to
* its top */
static group_t * __groupstackpush (
exec_t * e , group_t * gs , qse_rex_node_t * gn , int dup )
{
group_t * head , * elem ;
2009-09-07 07:18:08 +00:00
2009-12-11 07:03:54 +00:00
QSE_ASSERT ( gn - > id = = QSE_REX_NODE_GROUP ) ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( gs = = QSE_NULL )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
/* gn is the first group pushed. no stack yet.
* create the head to store management info . */
head = ( group_t * ) QSE_MMGR_ALLOC ( e - > rex - > mmgr , QSE_SIZEOF ( * head ) ) ;
if ( head = = QSE_NULL )
{
e - > rex - > errnum = QSE_REX_ENOMEM ;
return QSE_NULL ;
}
2009-09-07 07:18:08 +00:00
2009-12-11 07:03:54 +00:00
/* the head does not point to any group node. */
head - > node = QSE_NULL ;
/* the occ field is used for reference counting.
* refupgroupstack and refdowngroupstack update it . */
head - > occ = 0 ;
/* the head links to the first actual group */
head - > next = QSE_NULL ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
else
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
if ( dup )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
/* duplicate existing stack */
head = dupgroupstack ( e , gs ) ;
if ( head = = QSE_NULL ) return QSE_NULL ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
else
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
head = gs ;
2007-05-02 01:07:00 +00:00
}
}
2009-12-11 07:03:54 +00:00
/* create a new stack element */
elem = ( group_t * ) QSE_MMGR_ALLOC ( e - > rex - > mmgr , QSE_SIZEOF ( * elem ) ) ;
if ( elem = = QSE_NULL )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
/* rollback */
if ( gs = = QSE_NULL )
QSE_MMGR_FREE ( e - > rex - > mmgr , head ) ;
else if ( dup )
freegroupstack ( head , e - > rex - > mmgr ) ;
e - > rex - > errnum = QSE_REX_ENOMEM ;
return QSE_NULL ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
/* initialize the element */
elem - > node = gn ;
elem - > occ = 0 ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
/* make it the top */
elem - > next = head - > next ;
head - > next = elem ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
return head ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
# define dupgroupstackpush(e,gs,gn) __groupstackpush(e,gs,gn,1)
# define groupstackpush(e,gs,gn) __groupstackpush(e,gs,gn,0)
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
/* duplidate a group stack excluding the top data element */
static group_t * dupgroupstackpop ( exec_t * e , group_t * gs )
{
group_t * dupg , * head ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
QSE_ASSERT ( gs ! = QSE_NULL ) ;
QSE_ASSERTX ( gs - > node = = QSE_NULL ,
2012-12-13 13:07:16 +00:00
" The head of a group stack must point to QSE_NULL for management purpose. " ) ;
2009-12-11 07:03:54 +00:00
QSE_ASSERTX ( gs - > next ! = QSE_NULL & & gs - > next - > next ! = QSE_NULL ,
" dupgroupstackpop() needs at least two data elements " ) ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
dupg = dupgroupstackmembers ( e , gs - > next - > next ) ;
if ( dupg = = QSE_NULL ) return QSE_NULL ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
head = ( group_t * ) QSE_MMGR_ALLOC ( e - > rex - > mmgr , QSE_SIZEOF ( * head ) ) ;
if ( head = = QSE_NULL )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
if ( dupg ! = QSE_NULL ) freegroupstackmembers ( dupg , e - > rex - > mmgr ) ;
e - > rex - > errnum = QSE_REX_ENOMEM ;
return QSE_NULL ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
head - > node = QSE_NULL ;
head - > occ = 0 ;
head - > next = dupg ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
return head ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
static group_t * groupstackpop ( exec_t * e , group_t * gs )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
group_t * top ;
2009-06-23 07:01:28 +00:00
2009-12-11 07:03:54 +00:00
QSE_ASSERT ( gs ! = QSE_NULL ) ;
QSE_ASSERTX ( gs - > node = = QSE_NULL ,
2012-12-13 13:07:16 +00:00
" The head of a group stack must point to QSE_NULL for management purpose. " ) ;
2009-12-11 07:03:54 +00:00
QSE_ASSERTX ( gs - > next ! = QSE_NULL & & gs - > next - > next ! = QSE_NULL ,
" groupstackpop() needs at least two data elements " ) ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
top = gs - > next ;
gs - > next = top - > next ;
QSE_MMGR_FREE ( e - > rex - > mmgr , top ) ;
return gs ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
static int addsimplecand (
exec_t * e , group_t * group , qse_rex_node_t * node ,
qse_size_t occ , const qse_char_t * mptr )
{
2010-11-04 08:24:29 +00:00
cand_t cand ;
2009-12-11 07:03:54 +00:00
QSE_ASSERT (
node - > id = = QSE_REX_NODE_NOP | |
node - > id = = QSE_REX_NODE_BOL | |
node - > id = = QSE_REX_NODE_EOL | |
node - > id = = QSE_REX_NODE_ANY | |
node - > id = = QSE_REX_NODE_CHAR | |
node - > id = = QSE_REX_NODE_CSET
) ;
cand . node = node ;
cand . occ = occ ;
cand . group = group ;
cand . mptr = mptr ;
if ( qse_lda_search (
& e - > cand . set [ e - > cand . pending ] ,
0 , & cand , 1 ) ! = QSE_LDA_NIL )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
/* exclude any existing entries in the array.
* see comp_cand ( ) for the equality test used .
* note this linear search may be a performance bottle neck
* if the arrary grows large . not so sure if it should be
* switched to a different data structure such as a hash table .
* the problem is that most practical regular expressions
* won ' t have many candidates for a particular match point .
* so i ' m a bit skeptical about data struct switching .
*/
return 0 ;
2007-05-02 01:07:00 +00:00
}
2009-06-23 07:01:28 +00:00
2009-12-11 07:03:54 +00:00
if ( qse_lda_insert (
& e - > cand . set [ e - > cand . pending ] ,
QSE_LDA_SIZE ( & e - > cand . set [ e - > cand . pending ] ) ,
& cand , 1 ) = = QSE_LDA_NIL )
{
e - > rex - > errnum = QSE_REX_ENOMEM ;
return - 1 ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
/* the reference must be decremented by the freeer */
refupgroupstack ( group ) ;
return 0 ;
}
/* addcands() function add a candicate from candnode.
* if candnode is not a simple node , it traverses further
* until it reaches a simple node . prevnode is the last
* GROUPEND node visited during traversal . If no GROUPEND
* is visited yet , it can be any starting node */
static int addcands (
exec_t * e , group_t * group , qse_rex_node_t * prevnode ,
qse_rex_node_t * candnode , const qse_char_t * mptr )
{
2010-05-11 07:15:55 +00:00
qse_rex_node_t * curcand = candnode ;
2009-12-11 07:03:54 +00:00
warpback :
/* skip all NOP nodes */
2010-05-11 07:15:55 +00:00
while ( curcand ! = QSE_NULL & & curcand - > id = = QSE_REX_NODE_NOP )
curcand = curcand - > next ;
2009-12-11 07:03:54 +00:00
/* nothing to add */
2010-05-11 07:15:55 +00:00
if ( curcand = = QSE_NULL ) return 0 ;
2007-05-02 01:07:00 +00:00
2010-05-11 07:15:55 +00:00
switch ( curcand - > id )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
case QSE_REX_NODE_END :
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
if ( e - > matchend = = QSE_NULL | | mptr > = e - > matchend )
e - > matchend = mptr ;
e - > nmatches + + ;
break ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
case QSE_REX_NODE_BRANCH :
2009-06-23 07:01:28 +00:00
{
2009-12-11 07:03:54 +00:00
group_t * gx = group ;
int n ;
2009-06-23 07:01:28 +00:00
2009-12-11 07:03:54 +00:00
if ( group ! = QSE_NULL )
2009-06-23 07:01:28 +00:00
{
2009-12-11 07:03:54 +00:00
gx = dupgroupstack ( e , group ) ;
if ( gx = = QSE_NULL ) return - 1 ;
2009-06-23 07:01:28 +00:00
}
2010-05-10 07:44:39 +00:00
refupgroupstack ( gx ) ;
n = addcands ( e , gx ,
2010-05-11 07:15:55 +00:00
prevnode , curcand - > u . b . alter , mptr ) ;
2010-05-10 07:44:39 +00:00
refdowngroupstack ( gx , e - > rex - > mmgr ) ;
if ( n < = - 1 ) return - 1 ;
2010-05-11 07:15:55 +00:00
curcand = curcand - > next ;
2010-05-10 07:44:39 +00:00
goto warpback ;
2009-10-20 07:33:40 +00:00
}
2009-12-11 07:03:54 +00:00
case QSE_REX_NODE_GROUP :
{
qse_rex_node_t * front ;
2010-05-10 07:44:39 +00:00
group_t * gx ;
2009-12-11 07:03:54 +00:00
2010-05-10 07:44:39 +00:00
# ifdef XTRA_DEBUG
qse_printf ( QSE_T ( " DEBUG: GROUP %p(pseudo=%d) PREV %p \n " ) ,
2010-05-11 07:15:55 +00:00
curcand , curcand - > u . g . pseudo , prevnode ) ;
2010-05-10 07:44:39 +00:00
# endif
2010-05-11 07:15:55 +00:00
if ( curcand - > u . g . pseudo )
2010-05-10 07:44:39 +00:00
{
2010-05-11 07:15:55 +00:00
curcand = curcand - > u . g . head ;
2010-05-10 07:44:39 +00:00
goto warpback ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
/* skip all NOP nodes */
2010-05-11 07:15:55 +00:00
front = curcand - > u . g . head ;
2010-05-10 07:44:39 +00:00
2009-12-11 07:03:54 +00:00
while ( front - > id = = QSE_REX_NODE_NOP )
front = front - > next ;
if ( front - > id = = QSE_REX_NODE_GROUPEND )
{
/* if GROUPEND is reached, the group
* is empty . jump to the next node
* regardless of its occurrence .
* however , this will never be reached
* as it has been removed in comp ( ) */
2010-05-11 07:15:55 +00:00
curcand = curcand - > next ;
2009-12-11 07:03:54 +00:00
goto warpback ;
}
2007-05-02 01:07:00 +00:00
2010-05-11 07:15:55 +00:00
gx = groupstackpush ( e , group , curcand ) ;
2010-05-10 07:44:39 +00:00
if ( gx = = QSE_NULL ) return - 1 ;
2009-12-11 07:03:54 +00:00
2010-05-10 07:44:39 +00:00
/* add the first node in the group to
* the candidate array */
2010-05-11 07:15:55 +00:00
group = gx ;
curcand = front ;
goto warpback ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
case QSE_REX_NODE_GROUPEND :
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
int n ;
group_t * top ;
qse_rex_node_t * node ;
qse_size_t occ ;
2009-06-23 07:01:28 +00:00
2010-05-10 07:44:39 +00:00
# ifdef XTRA_DEBUG
qse_printf ( QSE_T ( " DEBUG: GROUPEND %p(pseudo=%d) PREV %p \n " ) ,
2010-05-11 07:15:55 +00:00
curcand , curcand - > u . ge . pseudo , prevnode ) ;
2010-05-10 07:44:39 +00:00
# endif
2010-05-11 07:15:55 +00:00
if ( curcand - > u . ge . pseudo )
2010-05-10 07:44:39 +00:00
{
2010-05-11 07:15:55 +00:00
curcand = curcand - > u . ge . group - > next ;
2010-05-10 07:44:39 +00:00
goto warpback ;
}
2009-12-11 07:03:54 +00:00
QSE_ASSERTX (
group ! = QSE_NULL & & group - > next ! = QSE_NULL ,
" GROUPEND must be paired up with GROUP " ) ;
2007-05-02 01:07:00 +00:00
2010-05-11 07:15:55 +00:00
if ( prevnode = = curcand )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
/* consider a pattern like (x*)*.
* when GROUPEND is reached , an ' if ' block
* below tries to add the first node
* ( node - > u . g . head ) in the group again .
* however , it ( ' x ' ) is optional , a possible
* path reach GROUPEND directly without
* adding a candidate . this check is needed to
* avoid the infinite loop , which otherwise is
* not avoidable . */
break ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
top = group - > next ;
top - > occ + + ;
occ = top - > occ ;
node = top - > node ;
2010-05-11 07:15:55 +00:00
QSE_ASSERTX ( node = = curcand - > u . ge . group ,
2012-12-13 13:07:16 +00:00
" The GROUP node in the group stack must be the one pairing up with the GROUPEND node. "
2009-12-11 07:03:54 +00:00
) ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( occ > = node - > occ . min )
{
group_t * gx ;
/* the lower bound has been met.
* for a pattern ( abc ) { 3 , 4 } , ' abc ' has been
* repeated 3 times . in this case , the next
* node can be added to the candiate array .
* it is actually a branch case . move on . */
if ( top - > next = = QSE_NULL )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
/* only one element in the stack.
* falls back to QSE_NULL regardless
* of the need to reuse it */
gx = QSE_NULL ;
}
else if ( occ < node - > occ . max )
{
/* check if the group will be repeated.
* if so , duplicate the group stack
* excluding the top . it goes along a
* different path and hence requires
* duplication . */
gx = dupgroupstackpop ( e , group ) ;
if ( gx = = QSE_NULL ) return - 1 ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
else
{
/* reuse the group stack. pop the top
* data element off the stack */
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
gx = groupstackpop ( e , group ) ;
/* this function always succeeds and
* returns the same head */
QSE_ASSERT ( gx = = group ) ;
}
refupgroupstack ( gx ) ;
2010-05-10 07:44:39 +00:00
2009-12-11 07:03:54 +00:00
if ( prevnode ! = QSE_NULL & &
prevnode - > id = = QSE_REX_NODE_GROUPEND )
n = addcands ( e , gx , prevnode , node - > next , mptr ) ;
else
2010-05-11 07:15:55 +00:00
n = addcands ( e , gx , curcand , node - > next , mptr ) ;
2010-05-10 07:44:39 +00:00
2009-12-11 07:03:54 +00:00
refdowngroupstack ( gx , e - > rex - > mmgr ) ;
if ( n < = - 1 ) return - 1 ;
}
if ( occ < node - > occ . max )
{
/* repeat itself. */
2010-05-11 07:15:55 +00:00
prevnode = curcand ;
curcand = node - > u . g . head ;
2009-12-11 07:03:54 +00:00
goto warpback ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
break ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
default :
{
int n ;
2010-05-10 07:44:39 +00:00
if ( group ) refupgroupstack ( group ) ;
2010-05-11 07:15:55 +00:00
n = addsimplecand ( e , group , curcand , 1 , mptr ) ;
2010-05-10 07:44:39 +00:00
if ( group ) refdowngroupstack ( group , e - > rex - > mmgr ) ;
2009-12-11 07:03:54 +00:00
2010-05-10 07:44:39 +00:00
if ( n < = - 1 ) return - 1 ;
2009-12-11 07:03:54 +00:00
break ;
}
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
return 0 ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
static int charset_matched ( exec_t * e , qse_rex_node_t * node , qse_char_t c )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
const qse_char_t * ptr , * end ;
int matched = 0 ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
QSE_ASSERT ( node - > u . cset . member ! = QSE_NULL ) ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
ptr = QSE_STR_PTR ( node - > u . cset . member ) ;
end = ptr + QSE_STR_LEN ( node - > u . cset . member ) ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
while ( ptr < end & & ! matched )
{
switch ( * ptr )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
case QSE_REX_CSET_CHAR :
{
ptr + + ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( e - > rex - > option & QSE_REX_IGNORECASE )
{
if ( QSE_TOUPPER ( c ) = = QSE_TOUPPER ( * ptr ) ) matched = ! 0 ;
}
else
{
if ( c = = * ptr ) matched = ! 0 ;
}
break ;
}
case QSE_REX_CSET_RANGE :
{
qse_char_t c1 , c2 ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( e - > rex - > option & QSE_REX_IGNORECASE )
{
qse_char_t c3 ;
ptr + + ; c1 = QSE_TOUPPER ( * ptr ) ;
ptr + + ; c2 = QSE_TOUPPER ( * ptr ) ;
c3 = QSE_TOUPPER ( c ) ;
if ( c3 > = c1 & & c3 < = c2 ) matched = ! 0 ;
}
else
{
c1 = * + + ptr ; c2 = * + + ptr ;
if ( c > = c1 & & c < = c2 ) matched = ! 0 ;
}
break ;
}
case QSE_REX_CSET_CLASS :
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
qse_char_t c1 ;
c1 = * + + ptr ;
QSE_ASSERT ( c1 < QSE_COUNTOF ( ccinfo ) ) ;
if ( ccinfo [ c1 ] . func ( e , c ) ) matched = ! 0 ;
2007-05-02 01:07:00 +00:00
break ;
}
2009-12-11 07:03:54 +00:00
default :
{
QSE_ASSERTX ( 0 ,
2012-12-13 13:07:16 +00:00
" SHOUL NEVER HAPPEN - membership code for a character set must be one of QSE_REX_CSET_CHAR, QSE_REX_CSET_RANGE, QSE_REX_CSET_CLASS " ) ;
2009-12-11 07:03:54 +00:00
/* return no match if this part is reached.
* however , something is totally wrong if it
* happens . */
return 0 ;
}
}
ptr + + ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
if ( node - > u . cset . negated ) matched = ! matched ;
return matched ;
2007-05-02 01:07:00 +00:00
}
2010-05-11 07:15:55 +00:00
static qse_lda_walk_t walk_cands_for_match (
qse_lda_t * lda , qse_size_t index , void * ctx )
2007-05-02 01:07:00 +00:00
{
2010-05-11 07:15:55 +00:00
exec_t * e = ( exec_t * ) ctx ;
cand_t * cand = QSE_LDA_DPTR ( lda , index ) ;
qse_rex_node_t * node = cand - > node ;
const qse_char_t * nmptr = QSE_NULL ;
2009-12-11 07:03:54 +00:00
2010-05-11 07:15:55 +00:00
switch ( node - > id )
2007-05-02 01:07:00 +00:00
{
2010-05-11 07:15:55 +00:00
case QSE_REX_NODE_BOL :
if ( cand - > mptr = = e - > str . ptr )
{
/* the next match pointer remains
* the same as ^ matches a position ,
* not a character . */
nmptr = cand - > mptr ;
# ifdef XTRA_DEBUG
qse_printf ( QSE_T ( " DEBUG: matched <^> \n " ) ) ;
# endif
}
break ;
2007-05-02 01:07:00 +00:00
2010-05-11 07:15:55 +00:00
case QSE_REX_NODE_EOL :
if ( cand - > mptr > = e - > str . end )
{
/* the next match pointer remains
* the same as $ matches a position ,
* not a character . */
2009-12-11 07:03:54 +00:00
nmptr = cand - > mptr ;
2010-05-11 07:15:55 +00:00
# ifdef XTRA_DEBUG
qse_printf ( QSE_T ( " DEBUG: matched <$> \n " ) ) ;
# endif
}
break ;
2009-12-11 07:03:54 +00:00
2010-05-11 07:15:55 +00:00
case QSE_REX_NODE_ANY :
if ( cand - > mptr < e - > sub . end )
{
/* advance the match pointer to the
* next chracter . */
nmptr = cand - > mptr + 1 ;
# ifdef XTRA_DEBUG
qse_printf ( QSE_T ( " DEBUG: matched <.> \n " ) ) ;
# endif
}
break ;
2009-12-11 07:03:54 +00:00
2010-05-11 07:15:55 +00:00
case QSE_REX_NODE_CHAR :
{
if ( cand - > mptr < e - > sub . end )
{
int equal ;
2012-06-20 15:12:18 +00:00
equal = ( e - > rex - > option & QSE_REX_IGNORECASE ) ?
2010-05-11 07:15:55 +00:00
( QSE_TOUPPER ( node - > u . c ) = = QSE_TOUPPER ( * cand - > mptr ) ) :
( node - > u . c = = * cand - > mptr ) ;
2009-12-11 07:03:54 +00:00
2010-05-11 07:15:55 +00:00
if ( equal )
2009-12-11 07:03:54 +00:00
{
/* advance the match pointer to the
* next chracter . */
nmptr = cand - > mptr + 1 ;
}
2010-05-11 07:15:55 +00:00
# ifdef XTRA_DEBUG
qse_printf ( QSE_T ( " DEBUG: matched %c \n " ) , node - > u . c ) ;
# endif
2009-12-11 07:03:54 +00:00
}
2010-05-11 07:15:55 +00:00
break ;
}
2009-12-11 07:03:54 +00:00
2010-05-11 07:15:55 +00:00
case QSE_REX_NODE_CSET :
{
if ( cand - > mptr < e - > sub . end & &
charset_matched ( e , node , * cand - > mptr ) )
2009-12-11 07:03:54 +00:00
{
2010-05-11 07:15:55 +00:00
/* advance the match pointer
* to the next chracter . */
nmptr = cand - > mptr + 1 ;
2009-12-11 07:03:54 +00:00
}
2007-05-02 01:07:00 +00:00
2010-05-11 07:15:55 +00:00
break ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
2010-05-11 07:15:55 +00:00
default :
2007-05-02 01:07:00 +00:00
{
2010-05-11 07:15:55 +00:00
QSE_ASSERTX ( 0 ,
2012-12-13 13:07:16 +00:00
" SHOULD NEVER HAPPEN - node ID must be one of QSE_REX_NODE_BOL, QSE_REX_NODE_EOL, QSE_REX_NODE_ANY, QSE_REX_NODE_CHAR, QSE_REX_NODE_CSET, QSE_REX_NODE_NOP " ) ;
2010-05-11 07:15:55 +00:00
break ;
}
}
2009-12-11 07:03:54 +00:00
2010-05-11 07:15:55 +00:00
if ( nmptr ! = QSE_NULL )
{
int n ;
2009-12-11 07:03:54 +00:00
2010-05-11 07:15:55 +00:00
if ( cand - > occ > = node - > occ . min )
{
group_t * gx ;
2009-12-11 07:03:54 +00:00
2010-05-11 07:15:55 +00:00
if ( cand - > occ < node - > occ . max & & cand - > group ! = QSE_NULL )
2009-12-11 07:03:54 +00:00
{
2010-05-11 07:15:55 +00:00
gx = dupgroupstack ( e , cand - > group ) ;
if ( gx = = QSE_NULL ) return QSE_LDA_WALK_STOP ;
2009-12-11 07:03:54 +00:00
}
2010-05-11 07:15:55 +00:00
else gx = cand - > group ;
/* move on to the next candidate */
refupgroupstack ( gx ) ;
n = addcands ( e , gx , node , node - > next , nmptr ) ;
refdowngroupstack ( gx , e - > rex - > mmgr ) ;
if ( n < = - 1 ) return QSE_LDA_WALK_STOP ;
}
if ( cand - > occ < node - > occ . max )
{
/* repeat itself more */
refupgroupstack ( cand - > group ) ;
n = addsimplecand (
e , cand - > group ,
node , cand - > occ + 1 , nmptr ) ;
refdowngroupstack ( cand - > group , e - > rex - > mmgr ) ;
if ( n < = - 1 ) return QSE_LDA_WALK_STOP ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
}
2010-05-11 07:15:55 +00:00
return QSE_LDA_WALK_FORWARD ;
2009-12-11 07:03:54 +00:00
}
static int exec ( exec_t * e )
{
int n ;
e - > nmatches = 0 ;
e - > matchend = QSE_NULL ;
e - > cand . pending = 0 ;
e - > cand . active = 1 ;
/* empty the pending set to collect the initial candidates */
qse_lda_clear ( & e - > cand . set [ e - > cand . pending ] ) ;
/* the first node must be the START node */
QSE_ASSERT ( e - > rex - > code - > id = = QSE_REX_NODE_START ) ;
2010-05-11 07:15:55 +00:00
/* collect an initial set of candidates into the pending set */
2009-12-11 07:03:54 +00:00
n = addcands (
e , /* execution structure */
QSE_NULL , /* doesn't belong to any groups yet */
e - > rex - > code , /* dummy previous node, the start node */
e - > rex - > code - > next , /* start from the second node */
e - > sub . ptr /* current match pointer */
) ;
if ( n < = - 1 ) return - 1 ;
do
{
2010-05-11 07:15:55 +00:00
qse_size_t ncands_active ;
2009-12-11 07:03:54 +00:00
/* swap the pending and active set indices.
* the pending set becomes active after which the match ( )
* function tries each candidate in it . New candidates
* are added into the pending set which will become active
* later when the loop reaches here again */
int tmp = e - > cand . pending ;
e - > cand . pending = e - > cand . active ;
e - > cand . active = tmp ;
2010-05-11 07:15:55 +00:00
ncands_active = QSE_LDA_SIZE ( & e - > cand . set [ e - > cand . active ] ) ;
if ( ncands_active < = 0 )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
/* we can't go on with no candidates in the
* active set . */
2007-05-02 01:07:00 +00:00
break ;
}
2009-12-11 07:03:54 +00:00
/* clear the pending set */
qse_lda_clear ( & e - > cand . set [ e - > cand . pending ] ) ;
2007-05-02 01:07:00 +00:00
2010-05-10 07:44:39 +00:00
# ifdef XTRA_DEBUG
{
int i ;
qse_printf ( QSE_T ( " SET= " ) ) ;
2010-05-11 07:15:55 +00:00
for ( i = 0 ; i < ncands_active ; i + + )
2010-05-10 07:44:39 +00:00
{
cand_t * cand = QSE_LDA_DPTR ( & e - > cand . set [ e - > cand . active ] , i ) ;
qse_rex_node_t * node = cand - > node ;
if ( node - > id = = QSE_REX_NODE_CHAR )
qse_printf ( QSE_T ( " %c " ) , node - > u . c ) ;
else if ( node - > id = = QSE_REX_NODE_ANY )
qse_printf ( QSE_T ( " . " ) , node - > u . c ) ;
else if ( node - > id = = QSE_REX_NODE_BOL )
qse_printf ( QSE_T ( " ^ " ) ) ;
else if ( node - > id = = QSE_REX_NODE_EOL )
qse_printf ( QSE_T ( " $ " ) ) ;
}
qse_printf ( QSE_T ( " \n " ) ) ;
}
2009-12-11 07:03:54 +00:00
# endif
2010-05-11 07:15:55 +00:00
if ( qse_lda_walk (
& e - > cand . set [ e - > cand . active ] ,
walk_cands_for_match , e ) ! = ncands_active )
{
/* if the number of walks is different the number of
* candidates , traversal must have been aborted for
* an error . */
return - 1 ;
}
2009-12-11 07:03:54 +00:00
}
while ( 1 ) ;
2007-05-02 01:07:00 +00:00
2010-05-10 07:44:39 +00:00
# ifdef XTRA_DEBUG
if ( e - > nmatches > 0 )
{
qse_printf ( QSE_T ( " MATCH: %d [%.*s] \n " ) ,
( int ) ( e - > matchend - e - > sub . ptr ) ,
( int ) ( e - > matchend - e - > sub . ptr ) , e - > sub . ptr ) ;
}
qse_printf ( QSE_T ( " TOTAL MATCHES FOUND... %d \n " ) , e - > nmatches ) ;
2009-12-11 07:03:54 +00:00
# endif
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
return ( e - > nmatches > 0 ) ? 1 : 0 ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
static void refdowngroupstack_incand ( qse_lda_t * lda , void * dptr , qse_size_t dlen )
{
QSE_ASSERT ( dlen = = 1 ) ;
refdowngroupstack ( ( ( cand_t * ) dptr ) - > group , lda - > mmgr ) ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
static int comp_cand ( qse_lda_t * lda ,
const void * dptr1 , qse_size_t dlen1 ,
const void * dptr2 , qse_size_t dlen2 )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
cand_t * c1 = ( cand_t * ) dptr1 ;
cand_t * c2 = ( cand_t * ) dptr2 ;
2011-11-15 15:58:32 +00:00
/*qse_printf (QSE_T("%p(%d) %p(%d), %p %p, %d %d\n"), c1->node,c1->node->id, c2->node,c1->node->id, c1->mptr, c2->mptr, (int)c1->occ, (int)c2->occ);*/
2009-12-11 07:03:54 +00:00
return ( c1 - > node = = c2 - > node & &
c1 - > mptr = = c2 - > mptr & &
c1 - > occ = = c2 - > occ ) ? 0 : 1 ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
static int init_exec_dds ( exec_t * e , qse_mmgr_t * mmgr )
{
/* initializes dynamic data structures */
2011-09-01 09:43:46 +00:00
if ( qse_lda_init ( & e - > cand . set [ 0 ] , mmgr , 100 ) < = - 1 )
2009-12-11 07:03:54 +00:00
{
2011-09-01 09:43:46 +00:00
e - > rex - > errnum = QSE_REX_ENOMEM ;
2009-12-11 07:03:54 +00:00
return - 1 ;
}
2011-09-01 09:43:46 +00:00
if ( qse_lda_init ( & e - > cand . set [ 1 ] , mmgr , 100 ) < = - 1 )
2007-05-02 01:07:00 +00:00
{
2011-09-01 09:43:46 +00:00
e - > rex - > errnum = QSE_REX_ENOMEM ;
2009-12-11 07:03:54 +00:00
qse_lda_fini ( & e - > cand . set [ 0 ] ) ;
return - 1 ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
qse_lda_setscale ( & e - > cand . set [ 0 ] , QSE_SIZEOF ( cand_t ) ) ;
qse_lda_setscale ( & e - > cand . set [ 1 ] , QSE_SIZEOF ( cand_t ) ) ;
qse_lda_setcopier ( & e - > cand . set [ 0 ] , QSE_LDA_COPIER_INLINE ) ;
qse_lda_setcopier ( & e - > cand . set [ 1 ] , QSE_LDA_COPIER_INLINE ) ;
qse_lda_setfreeer ( & e - > cand . set [ 0 ] , refdowngroupstack_incand ) ;
qse_lda_setfreeer ( & e - > cand . set [ 1 ] , refdowngroupstack_incand ) ;
qse_lda_setcomper ( & e - > cand . set [ 0 ] , comp_cand ) ;
qse_lda_setcomper ( & e - > cand . set [ 1 ] , comp_cand ) ;
return 0 ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
static void fini_exec_dds ( exec_t * e )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
qse_lda_fini ( & e - > cand . set [ 1 ] ) ;
qse_lda_fini ( & e - > cand . set [ 0 ] ) ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
int qse_rex_exec (
2014-07-08 14:30:42 +00:00
qse_rex_t * rex , const qse_cstr_t * str ,
const qse_cstr_t * substr , qse_cstr_t * matstr )
2009-12-11 07:03:54 +00:00
{
exec_t e ;
int n = 0 ;
if ( rex - > code = = QSE_NULL )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
rex - > errnum = QSE_REX_ENOCOMP ;
return - 1 ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
QSE_MEMSET ( & e , 0 , QSE_SIZEOF ( e ) ) ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
e . rex = rex ;
e . str . ptr = str - > ptr ;
e . str . end = str - > ptr + str - > len ;
e . sub . ptr = substr - > ptr ;
e . sub . end = substr - > ptr + substr - > len ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( init_exec_dds ( & e , rex - > mmgr ) < = - 1 ) return - 1 ;
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
while ( e . sub . ptr < = e . sub . end )
{
n = exec ( & e ) ;
if ( n < = - 1 )
{
n = - 1 ;
break ;
}
2007-05-02 01:07:00 +00:00
2009-12-11 07:03:54 +00:00
if ( n > = 1 )
{
QSE_ASSERT ( e . nmatches > 0 ) ;
QSE_ASSERT ( e . matchend ! = QSE_NULL ) ;
if ( matstr )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
matstr - > ptr = e . sub . ptr ;
matstr - > len = e . matchend - e . sub . ptr ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
break ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
e . sub . ptr + + ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
fini_exec_dds ( & e ) ;
return n ;
}
void * qse_buildrex (
qse_mmgr_t * mmgr , qse_size_t depth , int option ,
const qse_char_t * ptn , qse_size_t len , qse_rex_errnum_t * errnum )
{
qse_rex_t rex ;
qse_rex_node_t * code ;
qse_rex_init ( & rex , mmgr , QSE_NULL ) ;
qse_rex_setoption ( & rex , option ) ;
if ( qse_rex_comp ( & rex , ptn , len ) = = QSE_NULL )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
* errnum = rex . errnum ;
qse_rex_fini ( & rex ) ;
return QSE_NULL ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
code = qse_rex_yield ( & rex ) ;
qse_rex_fini ( & rex ) ;
return code ;
}
int qse_matchrex (
2011-05-19 08:36:40 +00:00
qse_mmgr_t * mmgr , qse_size_t depth ,
void * code , int option ,
2014-07-08 14:30:42 +00:00
const qse_cstr_t * str , const qse_cstr_t * substr ,
qse_cstr_t * match , qse_rex_errnum_t * errnum )
2009-12-11 07:03:54 +00:00
{
qse_rex_t rex ;
int n ;
qse_rex_init ( & rex , mmgr , code ) ;
qse_rex_setoption ( & rex , option ) ;
2011-05-19 08:36:40 +00:00
if ( ( n = qse_rex_exec ( & rex , str , substr , match ) ) < = - 1 )
2007-05-02 01:07:00 +00:00
{
2009-12-11 07:03:54 +00:00
* errnum = rex . errnum ;
qse_rex_yield ( & rex ) ;
qse_rex_fini ( & rex ) ;
return - 1 ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
qse_rex_yield ( & rex ) ;
qse_rex_fini ( & rex ) ;
return n ;
2007-05-02 01:07:00 +00:00
}
2009-12-11 07:03:54 +00:00
void qse_freerex ( qse_mmgr_t * mmgr , void * code )
{
qse_rex_t rex ;
qse_rex_init ( & rex , mmgr , code ) ;
qse_rex_fini ( & rex ) ;
}