improved tokenization by rex

This commit is contained in:
2011-05-19 08:36:40 +00:00
parent 845b4ae4f0
commit c7a49615a2
22 changed files with 547 additions and 548 deletions

View File

@ -1,5 +1,5 @@
/*
* $Id: opt.c 441 2011-04-22 14:28:43Z hyunghwan.chung $
* $Id: opt.c 462 2011-05-18 14:36:40Z hyunghwan.chung $
*
Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE.
@ -123,9 +123,10 @@ qse_cint_t qse_getopt (int argc, qse_char_t* const* argv, qse_opt_t* opt)
while (*end != QSE_T('\0') && *end != QSE_T('=')) end++;
for (o = opt->lng; o->str != QSE_NULL; o++)
for (o = opt->lng; o->str; o++)
{
const qse_char_t* str = o->str;
if (*str == QSE_T(':')) str++;
if (qse_strxcmp (opt->cur, end-opt->cur, str) != 0) continue;

View File

@ -1,5 +1,5 @@
/*
* $Id: rex.c 441 2011-04-22 14:28:43Z hyunghwan.chung $
* $Id: rex.c 462 2011-05-18 14:36:40Z hyunghwan.chung $
*
Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE.
@ -2070,25 +2070,18 @@ void* qse_buildrex (
int qse_matchrex (
qse_mmgr_t* mmgr, qse_size_t depth,
void* code, int option,
const qse_char_t* str, qse_size_t len,
const qse_char_t* substr, qse_size_t sublen,
qse_cstr_t* match, qse_rex_errnum_t* errnum)
qse_mmgr_t* mmgr, qse_size_t depth,
void* code, int option,
const qse_cstr_t* str, const qse_cstr_t* substr,
qse_cstr_t* match, qse_rex_errnum_t* errnum)
{
qse_rex_t rex;
qse_cstr_t s, ss;
int n;
qse_rex_init (&rex, mmgr, code);
qse_rex_setoption (&rex, option);
s.ptr = str;
s.len = len;
ss.ptr = substr;
ss.len = sublen;
if ((n = qse_rex_exec (&rex, &s, &ss, match)) <= -1)
if ((n = qse_rex_exec (&rex, str, substr, match)) <= -1)
{
*errnum = rex.errnum;
qse_rex_yield (&rex);

View File

@ -1,5 +1,5 @@
/*
* $Id: str_dynm.c 443 2011-04-25 14:56:05Z hyunghwan.chung $
* $Id: str_dynm.c 462 2011-05-18 14:36:40Z hyunghwan.chung $
*
Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE.
@ -64,16 +64,16 @@ qse_mbs_t* qse_mbs_init (qse_mbs_t* str, qse_mmgr_t* mmgr, qse_size_t capa)
str->mmgr = mmgr;
str->sizer = QSE_NULL;
if (capa == 0) str->ptr = QSE_NULL;
if (capa == 0) str->val.ptr = QSE_NULL;
else
{
str->ptr = (qse_mchar_t*) QSE_MMGR_ALLOC (
str->val.ptr = (qse_mchar_t*) QSE_MMGR_ALLOC (
mmgr, QSE_SIZEOF(qse_mchar_t) * (capa + 1));
if (str->ptr == QSE_NULL) return QSE_NULL;
str->ptr[0] = QSE_MT('\0');
if (str->val.ptr == QSE_NULL) return QSE_NULL;
str->val.ptr[0] = QSE_MT('\0');
}
str->len = 0;
str->val.len = 0;
str->capa = capa;
return str;
@ -81,7 +81,7 @@ qse_mbs_t* qse_mbs_init (qse_mbs_t* str, qse_mmgr_t* mmgr, qse_size_t capa)
void qse_mbs_fini (qse_mbs_t* str)
{
if (str->ptr != QSE_NULL) QSE_MMGR_FREE (str->mmgr, str->ptr);
if (str->val.ptr != QSE_NULL) QSE_MMGR_FREE (str->mmgr, str->val.ptr);
}
int qse_mbs_yield (qse_mbs_t* str, qse_mxstr_t* buf, qse_size_t new_capa)
@ -99,12 +99,12 @@ int qse_mbs_yield (qse_mbs_t* str, qse_mxstr_t* buf, qse_size_t new_capa)
if (buf != QSE_NULL)
{
buf->ptr = str->ptr;
buf->len = str->len;
buf->ptr = str->val.ptr;
buf->len = str->val.len;
}
str->ptr = tmp;
str->len = 0;
str->val.ptr = tmp;
str->val.len = 0;
str->capa = new_capa;
return 0;
@ -131,10 +131,10 @@ qse_size_t qse_mbs_setcapa (qse_mbs_t* str, qse_size_t capa)
if (capa == str->capa) return capa;
if (str->mmgr->realloc != QSE_NULL && str->ptr != QSE_NULL)
if (str->mmgr->realloc != QSE_NULL && str->val.ptr != QSE_NULL)
{
tmp = (qse_mchar_t*) QSE_MMGR_REALLOC (
str->mmgr, str->ptr,
str->mmgr, str->val.ptr,
QSE_SIZEOF(qse_mchar_t)*(capa+1));
if (tmp == QSE_NULL) return (qse_size_t)-1;
}
@ -144,23 +144,23 @@ qse_size_t qse_mbs_setcapa (qse_mbs_t* str, qse_size_t capa)
str->mmgr, QSE_SIZEOF(qse_mchar_t)*(capa+1));
if (tmp == QSE_NULL) return (qse_size_t)-1;
if (str->ptr != QSE_NULL)
if (str->val.ptr != QSE_NULL)
{
qse_size_t ncopy = (str->len <= capa)? str->len: capa;
QSE_MEMCPY (tmp, str->ptr,
qse_size_t ncopy = (str->val.len <= capa)? str->val.len: capa;
QSE_MEMCPY (tmp, str->val.ptr,
QSE_SIZEOF(qse_mchar_t)*(ncopy+1));
QSE_MMGR_FREE (str->mmgr, str->ptr);
QSE_MMGR_FREE (str->mmgr, str->val.ptr);
}
}
if (capa < str->len)
if (capa < str->val.len)
{
str->len = capa;
str->val.len = capa;
tmp[capa] = QSE_MT('\0');
}
str->capa = capa;
str->ptr = tmp;
str->val.ptr = tmp;
return str->capa;
}
@ -172,11 +172,11 @@ qse_size_t qse_mbs_getlen (qse_mbs_t* str)
qse_size_t qse_mbs_setlen (qse_mbs_t* str, qse_size_t len)
{
if (len == str->len) return len;
if (len < str->len)
if (len == str->val.len) return len;
if (len < str->val.len)
{
str->len = len;
str->ptr[len] = QSE_MT('\0');
str->val.len = len;
str->val.ptr[len] = QSE_MT('\0');
return len;
}
@ -186,17 +186,17 @@ qse_size_t qse_mbs_setlen (qse_mbs_t* str, qse_size_t len)
return (qse_size_t)-1;
}
while (str->len < len) str->ptr[str->len++] = QSE_MT(' ');
return str->len;
while (str->val.len < len) str->val.ptr[str->val.len++] = QSE_MT(' ');
return str->val.len;
}
void qse_mbs_clear (qse_mbs_t* str)
{
str->len = 0;
if (str->ptr != QSE_NULL)
str->val.len = 0;
if (str->val.ptr != QSE_NULL)
{
QSE_ASSERT (str->capa >= 1);
str->ptr[0] = QSE_MT('\0');
str->val.ptr[0] = QSE_MT('\0');
}
}
@ -204,18 +204,18 @@ void qse_mbs_swap (qse_mbs_t* str, qse_mbs_t* str1)
{
qse_mbs_t tmp;
tmp.ptr = str->ptr;
tmp.len = str->len;
tmp.val.ptr = str->val.ptr;
tmp.val.len = str->val.len;
tmp.capa = str->capa;
tmp.mmgr = str->mmgr;
str->ptr = str1->ptr;
str->len = str1->len;
str->val.ptr = str1->val.ptr;
str->val.len = str1->val.len;
str->capa = str1->capa;
str->mmgr = str1->mmgr;
str1->ptr = tmp.ptr;
str1->len = tmp.len;
str1->val.ptr = tmp.val.ptr;
str1->val.len = tmp.val.len;
str1->capa = tmp.capa;
str1->mmgr = tmp.mmgr;
}
@ -228,7 +228,7 @@ qse_size_t qse_mbs_cpy (qse_mbs_t* str, const qse_mchar_t* s)
qse_size_t qse_mbs_ncpy (qse_mbs_t* str, const qse_mchar_t* s, qse_size_t len)
{
if (len > str->capa || str->ptr == QSE_NULL)
if (len > str->capa || str->val.ptr == QSE_NULL)
{
qse_mchar_t* buf;
@ -236,14 +236,14 @@ qse_size_t qse_mbs_ncpy (qse_mbs_t* str, const qse_mchar_t* s, qse_size_t len)
str->mmgr, QSE_SIZEOF(qse_mchar_t) * (len + 1));
if (buf == QSE_NULL) return (qse_size_t)-1;
if (str->ptr != QSE_NULL) QSE_MMGR_FREE (str->mmgr, str->ptr);
if (str->val.ptr != QSE_NULL) QSE_MMGR_FREE (str->mmgr, str->val.ptr);
str->capa = len;
str->ptr = buf;
str->val.ptr = buf;
}
str->len = qse_mbsncpy (str->ptr, s, len);
str->ptr[str->len] = QSE_MT('\0');
return str->len;
str->val.len = qse_mbsncpy (str->val.ptr, s, len);
str->val.ptr[str->val.len] = QSE_MT('\0');
return str->val.len;
}
qse_size_t qse_mbs_cat (qse_mbs_t* str, const qse_mchar_t* s)
@ -254,13 +254,13 @@ qse_size_t qse_mbs_cat (qse_mbs_t* str, const qse_mchar_t* s)
qse_size_t qse_mbs_ncat (qse_mbs_t* str, const qse_mchar_t* s, qse_size_t len)
{
if (len > str->capa - str->len)
if (len > str->capa - str->val.len)
{
qse_size_t ncapa, mincapa;
/* let the minimum capacity be as large as
* to fit in the new substring */
mincapa = str->len + len;
mincapa = str->val.len + len;
if (str->sizer == QSE_NULL)
{
@ -276,7 +276,7 @@ qse_size_t qse_mbs_ncat (qse_mbs_t* str, const qse_mchar_t* s, qse_size_t len)
* pass the minimum capacity required as a hint */
ncapa = str->sizer (str, mincapa);
/* if no change in capacity, return current length */
if (ncapa == str->capa) return str->len;
if (ncapa == str->capa) return str->val.len;
}
/* change the capacity */
@ -289,21 +289,21 @@ qse_size_t qse_mbs_ncat (qse_mbs_t* str, const qse_mchar_t* s, qse_size_t len)
while (1);
}
if (len > str->capa - str->len)
if (len > str->capa - str->val.len)
{
/* copy as many characters as the number of cells available.
* if the capacity has been decreased, len is adjusted here */
len = str->capa - str->len;
len = str->capa - str->val.len;
}
if (len > 0)
{
QSE_MEMCPY (&str->ptr[str->len], s, len*QSE_SIZEOF(*s));
str->len += len;
str->ptr[str->len] = QSE_MT('\0');
QSE_MEMCPY (&str->val.ptr[str->val.len], s, len*QSE_SIZEOF(*s));
str->val.len += len;
str->val.ptr[str->val.len] = QSE_MT('\0');
}
return str->len;
return str->val.len;
}
qse_size_t qse_mbs_ccat (qse_mbs_t* str, qse_mchar_t c)
@ -322,49 +322,49 @@ qse_size_t qse_mbs_nccat (qse_mbs_t* str, qse_mchar_t c, qse_size_t len)
len--;
}
return str->len;
return str->val.len;
}
qse_size_t qse_mbs_del (qse_mbs_t* str, qse_size_t index, qse_size_t size)
{
if (str->ptr != QSE_NULL && index < str->len && size > 0)
if (str->val.ptr != QSE_NULL && index < str->val.len && size > 0)
{
qse_size_t nidx = index + size;
if (nidx >= str->len)
if (nidx >= str->val.len)
{
str->ptr[index] = QSE_MT('\0');
str->len = index;
str->val.ptr[index] = QSE_MT('\0');
str->val.len = index;
}
else
{
qse_mbsncpy (
&str->ptr[index], &str->ptr[nidx],
str->len - nidx);
str->len -= size;
&str->val.ptr[index], &str->val.ptr[nidx],
str->val.len - nidx);
str->val.len -= size;
}
}
return str->len;
return str->val.len;
}
qse_size_t qse_mbs_trm (qse_mbs_t* str)
{
if (str->ptr != QSE_NULL)
if (str->val.ptr != QSE_NULL)
{
str->len = qse_mbsxtrm (str->ptr, str->len);
str->val.len = qse_mbsxtrm (str->val.ptr, str->val.len);
}
return str->len;
return str->val.len;
}
qse_size_t qse_mbs_pac (qse_mbs_t* str)
{
if (str->ptr != QSE_NULL)
if (str->val.ptr != QSE_NULL)
{
str->len = qse_mbsxpac (str->ptr, str->len);
str->val.len = qse_mbsxpac (str->val.ptr, str->val.len);
}
return str->len;
return str->val.len;
}

View File

@ -1,5 +1,5 @@
/*
* $Id: str_dynw.c 443 2011-04-25 14:56:05Z hyunghwan.chung $
* $Id: str_dynw.c 462 2011-05-18 14:36:40Z hyunghwan.chung $
*
Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE.
@ -64,16 +64,16 @@ qse_wcs_t* qse_wcs_init (qse_wcs_t* str, qse_mmgr_t* mmgr, qse_size_t capa)
str->mmgr = mmgr;
str->sizer = QSE_NULL;
if (capa == 0) str->ptr = QSE_NULL;
if (capa == 0) str->val.ptr = QSE_NULL;
else
{
str->ptr = (qse_wchar_t*) QSE_MMGR_ALLOC (
str->val.ptr = (qse_wchar_t*) QSE_MMGR_ALLOC (
mmgr, QSE_SIZEOF(qse_wchar_t) * (capa + 1));
if (str->ptr == QSE_NULL) return QSE_NULL;
str->ptr[0] = QSE_WT('\0');
if (str->val.ptr == QSE_NULL) return QSE_NULL;
str->val.ptr[0] = QSE_WT('\0');
}
str->len = 0;
str->val.len = 0;
str->capa = capa;
return str;
@ -81,7 +81,7 @@ qse_wcs_t* qse_wcs_init (qse_wcs_t* str, qse_mmgr_t* mmgr, qse_size_t capa)
void qse_wcs_fini (qse_wcs_t* str)
{
if (str->ptr != QSE_NULL) QSE_MMGR_FREE (str->mmgr, str->ptr);
if (str->val.ptr != QSE_NULL) QSE_MMGR_FREE (str->mmgr, str->val.ptr);
}
int qse_wcs_yield (qse_wcs_t* str, qse_wxstr_t* buf, qse_size_t new_capa)
@ -99,12 +99,12 @@ int qse_wcs_yield (qse_wcs_t* str, qse_wxstr_t* buf, qse_size_t new_capa)
if (buf != QSE_NULL)
{
buf->ptr = str->ptr;
buf->len = str->len;
buf->ptr = str->val.ptr;
buf->len = str->val.len;
}
str->ptr = tmp;
str->len = 0;
str->val.ptr = tmp;
str->val.len = 0;
str->capa = new_capa;
return 0;
@ -131,10 +131,10 @@ qse_size_t qse_wcs_setcapa (qse_wcs_t* str, qse_size_t capa)
if (capa == str->capa) return capa;
if (str->mmgr->realloc != QSE_NULL && str->ptr != QSE_NULL)
if (str->mmgr->realloc != QSE_NULL && str->val.ptr != QSE_NULL)
{
tmp = (qse_wchar_t*) QSE_MMGR_REALLOC (
str->mmgr, str->ptr,
str->mmgr, str->val.ptr,
QSE_SIZEOF(qse_wchar_t)*(capa+1));
if (tmp == QSE_NULL) return (qse_size_t)-1;
}
@ -144,23 +144,23 @@ qse_size_t qse_wcs_setcapa (qse_wcs_t* str, qse_size_t capa)
str->mmgr, QSE_SIZEOF(qse_wchar_t)*(capa+1));
if (tmp == QSE_NULL) return (qse_size_t)-1;
if (str->ptr != QSE_NULL)
if (str->val.ptr != QSE_NULL)
{
qse_size_t ncopy = (str->len <= capa)? str->len: capa;
QSE_MEMCPY (tmp, str->ptr,
qse_size_t ncopy = (str->val.len <= capa)? str->val.len: capa;
QSE_MEMCPY (tmp, str->val.ptr,
QSE_SIZEOF(qse_wchar_t)*(ncopy+1));
QSE_MMGR_FREE (str->mmgr, str->ptr);
QSE_MMGR_FREE (str->mmgr, str->val.ptr);
}
}
if (capa < str->len)
if (capa < str->val.len)
{
str->len = capa;
str->val.len = capa;
tmp[capa] = QSE_WT('\0');
}
str->capa = capa;
str->ptr = tmp;
str->val.ptr = tmp;
return str->capa;
}
@ -172,11 +172,11 @@ qse_size_t qse_wcs_getlen (qse_wcs_t* str)
qse_size_t qse_wcs_setlen (qse_wcs_t* str, qse_size_t len)
{
if (len == str->len) return len;
if (len < str->len)
if (len == str->val.len) return len;
if (len < str->val.len)
{
str->len = len;
str->ptr[len] = QSE_WT('\0');
str->val.len = len;
str->val.ptr[len] = QSE_WT('\0');
return len;
}
@ -186,17 +186,17 @@ qse_size_t qse_wcs_setlen (qse_wcs_t* str, qse_size_t len)
return (qse_size_t)-1;
}
while (str->len < len) str->ptr[str->len++] = QSE_WT(' ');
return str->len;
while (str->val.len < len) str->val.ptr[str->val.len++] = QSE_WT(' ');
return str->val.len;
}
void qse_wcs_clear (qse_wcs_t* str)
{
str->len = 0;
if (str->ptr != QSE_NULL)
str->val.len = 0;
if (str->val.ptr != QSE_NULL)
{
QSE_ASSERT (str->capa >= 1);
str->ptr[0] = QSE_WT('\0');
str->val.ptr[0] = QSE_WT('\0');
}
}
@ -204,18 +204,18 @@ void qse_wcs_swap (qse_wcs_t* str, qse_wcs_t* str1)
{
qse_wcs_t tmp;
tmp.ptr = str->ptr;
tmp.len = str->len;
tmp.val.ptr = str->val.ptr;
tmp.val.len = str->val.len;
tmp.capa = str->capa;
tmp.mmgr = str->mmgr;
str->ptr = str1->ptr;
str->len = str1->len;
str->val.ptr = str1->val.ptr;
str->val.len = str1->val.len;
str->capa = str1->capa;
str->mmgr = str1->mmgr;
str1->ptr = tmp.ptr;
str1->len = tmp.len;
str1->val.ptr = tmp.val.ptr;
str1->val.len = tmp.val.len;
str1->capa = tmp.capa;
str1->mmgr = tmp.mmgr;
}
@ -228,7 +228,7 @@ qse_size_t qse_wcs_cpy (qse_wcs_t* str, const qse_wchar_t* s)
qse_size_t qse_wcs_ncpy (qse_wcs_t* str, const qse_wchar_t* s, qse_size_t len)
{
if (len > str->capa || str->ptr == QSE_NULL)
if (len > str->capa || str->val.ptr == QSE_NULL)
{
qse_wchar_t* buf;
@ -236,14 +236,14 @@ qse_size_t qse_wcs_ncpy (qse_wcs_t* str, const qse_wchar_t* s, qse_size_t len)
str->mmgr, QSE_SIZEOF(qse_wchar_t) * (len + 1));
if (buf == QSE_NULL) return (qse_size_t)-1;
if (str->ptr != QSE_NULL) QSE_MMGR_FREE (str->mmgr, str->ptr);
if (str->val.ptr != QSE_NULL) QSE_MMGR_FREE (str->mmgr, str->val.ptr);
str->capa = len;
str->ptr = buf;
str->val.ptr = buf;
}
str->len = qse_wcsncpy (str->ptr, s, len);
str->ptr[str->len] = QSE_WT('\0');
return str->len;
str->val.len = qse_wcsncpy (str->val.ptr, s, len);
str->val.ptr[str->val.len] = QSE_WT('\0');
return str->val.len;
}
qse_size_t qse_wcs_cat (qse_wcs_t* str, const qse_wchar_t* s)
@ -254,13 +254,13 @@ qse_size_t qse_wcs_cat (qse_wcs_t* str, const qse_wchar_t* s)
qse_size_t qse_wcs_ncat (qse_wcs_t* str, const qse_wchar_t* s, qse_size_t len)
{
if (len > str->capa - str->len)
if (len > str->capa - str->val.len)
{
qse_size_t ncapa, mincapa;
/* let the minimum capacity be as large as
* to fit in the new substring */
mincapa = str->len + len;
mincapa = str->val.len + len;
if (str->sizer == QSE_NULL)
{
@ -276,7 +276,7 @@ qse_size_t qse_wcs_ncat (qse_wcs_t* str, const qse_wchar_t* s, qse_size_t len)
* pass the minimum capacity required as a hint */
ncapa = str->sizer (str, mincapa);
/* if no change in capacity, return current length */
if (ncapa == str->capa) return str->len;
if (ncapa == str->capa) return str->val.len;
}
/* change the capacity */
@ -289,21 +289,21 @@ qse_size_t qse_wcs_ncat (qse_wcs_t* str, const qse_wchar_t* s, qse_size_t len)
while (1);
}
if (len > str->capa - str->len)
if (len > str->capa - str->val.len)
{
/* copy as many characters as the number of cells available.
* if the capacity has been decreased, len is adjusted here */
len = str->capa - str->len;
len = str->capa - str->val.len;
}
if (len > 0)
{
QSE_MEMCPY (&str->ptr[str->len], s, len*QSE_SIZEOF(*s));
str->len += len;
str->ptr[str->len] = QSE_WT('\0');
QSE_MEMCPY (&str->val.ptr[str->val.len], s, len*QSE_SIZEOF(*s));
str->val.len += len;
str->val.ptr[str->val.len] = QSE_WT('\0');
}
return str->len;
return str->val.len;
}
qse_size_t qse_wcs_ccat (qse_wcs_t* str, qse_wchar_t c)
@ -322,49 +322,49 @@ qse_size_t qse_wcs_nccat (qse_wcs_t* str, qse_wchar_t c, qse_size_t len)
len--;
}
return str->len;
return str->val.len;
}
qse_size_t qse_wcs_del (qse_wcs_t* str, qse_size_t index, qse_size_t size)
{
if (str->ptr != QSE_NULL && index < str->len && size > 0)
if (str->val.ptr != QSE_NULL && index < str->val.len && size > 0)
{
qse_size_t nidx = index + size;
if (nidx >= str->len)
if (nidx >= str->val.len)
{
str->ptr[index] = QSE_WT('\0');
str->len = index;
str->val.ptr[index] = QSE_WT('\0');
str->val.len = index;
}
else
{
qse_wcsncpy (
&str->ptr[index], &str->ptr[nidx],
str->len - nidx);
str->len -= size;
&str->val.ptr[index], &str->val.ptr[nidx],
str->val.len - nidx);
str->val.len -= size;
}
}
return str->len;
return str->val.len;
}
qse_size_t qse_wcs_trm (qse_wcs_t* str)
{
if (str->ptr != QSE_NULL)
if (str->val.ptr != QSE_NULL)
{
str->len = qse_wcsxtrm (str->ptr, str->len);
str->val.len = qse_wcsxtrm (str->val.ptr, str->val.len);
}
return str->len;
return str->val.len;
}
qse_size_t qse_wcs_pac (qse_wcs_t* str)
{
if (str->ptr != QSE_NULL)
if (str->val.ptr != QSE_NULL)
{
str->len = qse_wcsxpac (str->ptr, str->len);
str->val.len = qse_wcsxpac (str->val.ptr, str->val.len);
}
return str->len;
return str->val.len;
}