qse/lib/awk/misc.c

1122 lines
26 KiB
C
Raw Normal View History

/*
2012-08-16 03:47:55 +00:00
* $Id$
*
Copyright (c) 2006-2019 Chung, Hyung-Hwan. All rights reserved.
2014-11-19 14:42:24 +00:00
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
2016-04-29 03:55:42 +00:00
#include "awk-prv.h"
/*#define USE_REX */
#if defined(USE_REX)
# include <qse/cmn/rex.h>
#else
# include <qse/cmn/tre.h>
#endif
#include <qse/cmn/mbwc.h>
void* qse_awk_allocmem (qse_awk_t* awk, qse_size_t size)
{
void* ptr = QSE_AWK_ALLOC(awk, size);
if (ptr == QSE_NULL) qse_awk_seterrnum (awk, QSE_AWK_ENOMEM, QSE_NULL);
return ptr;
}
void* qse_awk_callocmem (qse_awk_t* awk, qse_size_t size)
{
void* ptr = QSE_AWK_ALLOC(awk, size);
if (ptr) QSE_MEMSET (ptr, 0, size);
else qse_awk_seterrnum (awk, QSE_AWK_ENOMEM, QSE_NULL);
return ptr;
}
void* qse_awk_reallocmem (qse_awk_t* awk, void* ptr, qse_size_t size)
2009-07-02 07:14:39 +00:00
{
void* nptr = QSE_AWK_REALLOC(awk, ptr, size);
if (!nptr) qse_awk_seterrnum (awk, QSE_AWK_ENOMEM, QSE_NULL);
return nptr;
2009-07-02 07:14:39 +00:00
}
2008-12-21 21:35:07 +00:00
qse_char_t* qse_awk_strdup (qse_awk_t* awk, const qse_char_t* s)
{
qse_char_t* ptr = QSE_AWK_STRDUP(awk, s);
if (!ptr) qse_awk_seterrnum (awk, QSE_AWK_ENOMEM, QSE_NULL);
return ptr;
2008-07-20 23:53:29 +00:00
}
2008-12-21 21:35:07 +00:00
qse_char_t* qse_awk_strxdup (qse_awk_t* awk, const qse_char_t* s, qse_size_t l)
2008-07-20 23:53:29 +00:00
{
qse_char_t* ptr = QSE_AWK_STRXDUP(awk, s, l);
if (!ptr) qse_awk_seterrnum (awk, QSE_AWK_ENOMEM, QSE_NULL);
return ptr;
}
2014-07-08 14:30:42 +00:00
qse_char_t* qse_awk_cstrdup (qse_awk_t* awk, const qse_cstr_t* s)
{
qse_char_t* ptr = qse_cstrdup(s, qse_awk_getmmgr(awk));
if (!ptr) qse_awk_seterrnum (awk, QSE_AWK_ENOMEM, QSE_NULL);
return ptr;
}
qse_wchar_t* qse_awk_mbstowcsdup (qse_awk_t* awk, const qse_mchar_t* mbs, qse_size_t* _wcslen)
{
qse_size_t mbslen, wcslen;
qse_wchar_t* wcs;
/* if i use qse_mbstowcsdupwithcmgr(), i cannot pinpoint the exact failure cause.
* let's do it differently. */
if (qse_mbstowcswithcmgr(mbs, &mbslen, QSE_NULL, &wcslen, qse_awk_getcmgr(awk)) <= -1)
{
qse_awk_seterrnum (awk, QSE_AWK_EINVAL, QSE_NULL);
return QSE_NULL;
}
wcslen = wcslen + 1; /* for terminating null */
wcs = qse_awk_allocmem(awk, QSE_SIZEOF(*wcs) * wcslen);
if (!wcs) return QSE_NULL;
qse_mbstowcswithcmgr (mbs, &mbslen, wcs, &wcslen, qse_awk_getcmgr(awk));
if (_wcslen) *_wcslen = wcslen;
return wcs;
}
qse_mchar_t* qse_awk_wcstombsdup (qse_awk_t* awk, const qse_wchar_t* wcs, qse_size_t* _mbslen)
{
qse_size_t mbslen, wcslen;
qse_mchar_t* mbs;
if (qse_wcstombswithcmgr(wcs, &wcslen, QSE_NULL, &mbslen, qse_awk_getcmgr(awk)) <= -1)
{
qse_awk_seterrnum (awk, QSE_AWK_EINVAL, QSE_NULL);
return QSE_NULL;
}
mbslen = mbslen + 1; /* for terminating null */
mbs = qse_awk_allocmem(awk, QSE_SIZEOF(*mbs) * mbslen);
if (!mbs) return QSE_NULL;
qse_wcstombswithcmgr (wcs, &wcslen, mbs, &mbslen, qse_awk_getcmgr(awk));
if (_mbslen) *_mbslen = mbslen;
return mbs;
}
qse_wchar_t* qse_awk_mbsntowcsdup (qse_awk_t* awk, const qse_mchar_t* mbs, qse_size_t _mbslen, qse_size_t* _wcslen)
{
qse_size_t mbslen, wcslen;
qse_wchar_t* wcs;
/* if i use qse_mbstowcsdupwithcmgr(), i cannot pinpoint the exact failure cause.
* let's do it differently. */
mbslen = _mbslen;
if (qse_mbsntowcsnwithcmgr(mbs, &mbslen, QSE_NULL, &wcslen, qse_awk_getcmgr(awk)) <= -1)
{
qse_awk_seterrnum (awk, QSE_AWK_EINVAL, QSE_NULL);
return QSE_NULL;
}
wcs = qse_awk_allocmem(awk, QSE_SIZEOF(*wcs) * (wcslen + 1));
if (!wcs) return QSE_NULL;
mbslen= _mbslen;
qse_mbsntowcsnwithcmgr (mbs, &mbslen, wcs, &wcslen, qse_awk_getcmgr(awk));
wcs[wcslen] = QSE_WT('\0');
if (_wcslen) *_wcslen = wcslen;
return wcs;
}
qse_mchar_t* qse_awk_wcsntombsdup (qse_awk_t* awk, const qse_wchar_t* wcs, qse_size_t _wcslen, qse_size_t* _mbslen)
{
qse_size_t mbslen, wcslen;
qse_mchar_t* mbs;
wcslen = _wcslen;
if (qse_wcsntombsnwithcmgr(wcs, &wcslen, QSE_NULL, &mbslen, qse_awk_getcmgr(awk)) <= -1)
{
qse_awk_seterrnum (awk, QSE_AWK_EINVAL, QSE_NULL);
return QSE_NULL;
}
mbs = qse_awk_allocmem(awk, QSE_SIZEOF(*mbs) * (mbslen + 1));
if (!mbs) return QSE_NULL;
wcslen = _wcslen;
qse_wcsntombsnwithcmgr (wcs, &wcslen, mbs, &mbslen, qse_awk_getcmgr(awk));
mbs[mbslen] = QSE_MT('\0');
if (_mbslen) *_mbslen = mbslen;
return mbs;
}
/* ========================================================================= */
#undef awk_strxtoint
#undef awk_strtoflt
#undef awk_strxtoflt
#undef char_t
#undef cint_t
#undef AWK_ISSPACE
#undef AWK_ISDIGIT
#undef _T
#define awk_strxtoint qse_awk_mbsxtoint
#define awk_strtoflt qse_awk_mbstoflt
#define awk_strxtoflt qse_awk_mbsxtoflt
#define char_t qse_mchar_t
#define cint_t qse_mcint_t
#define AWK_ISSPACE QSE_AWK_ISMSPACE
#define AWK_ISDIGIT QSE_AWK_ISMDIGIT
#define _T QSE_MT
#include "misc-imp.h"
/* ------------------------------------------------------------------------- */
#undef awk_strxtoint
#undef awk_strtoflt
#undef awk_strxtoflt
#undef char_t
#undef cint_t
#undef AWK_ISSPACE
#undef AWK_ISDIGIT
#undef _T
/* ------------------------------------------------------------------------- */
#define awk_strxtoint qse_awk_wcsxtoint
#define awk_strtoflt qse_awk_wcstoflt
#define awk_strxtoflt qse_awk_wcsxtoflt
#define char_t qse_wchar_t
#define cint_t qse_wcint_t
#define AWK_ISSPACE QSE_AWK_ISWSPACE
#define AWK_ISDIGIT QSE_AWK_ISWDIGIT
#define _T QSE_WT
#include "misc-imp.h"
#undef awk_strxtoint
#undef awk_strtoflt
#undef awk_strxtoflt
#undef char_t
#undef cint_t
#undef AWK_ISSPACE
#undef AWK_ISDIGIT
#undef _T
/* ========================================================================= */
qse_size_t qse_awk_inttostr (
qse_awk_t* awk, qse_awk_int_t value,
int radix, const qse_char_t* prefix, qse_char_t* buf, qse_size_t size)
{
qse_awk_int_t t, rem;
2008-12-21 21:35:07 +00:00
qse_size_t len, ret, i;
qse_size_t prefix_len;
2008-12-21 21:35:07 +00:00
prefix_len = (prefix != QSE_NULL)? qse_strlen(prefix): 0;
t = value;
if (t == 0)
{
/* zero */
2008-12-21 21:35:07 +00:00
if (buf == QSE_NULL)
{
/* if buf is not given,
* return the number of bytes required */
return prefix_len + 1;
}
if (size < prefix_len+1)
{
/* buffer too small */
2008-12-21 21:35:07 +00:00
return (qse_size_t)-1;
}
for (i = 0; i < prefix_len; i++) buf[i] = prefix[i];
2008-12-21 21:35:07 +00:00
buf[prefix_len] = QSE_T('0');
if (size > prefix_len+1) buf[prefix_len+1] = QSE_T('\0');
return prefix_len+1;
}
/* non-zero values */
len = prefix_len;
if (t < 0) { t = -t; len++; }
while (t > 0) { len++; t /= radix; }
2008-12-21 21:35:07 +00:00
if (buf == QSE_NULL)
{
/* if buf is not given, return the number of bytes required */
return len;
}
2008-12-21 21:35:07 +00:00
if (size < len) return (qse_size_t)-1; /* buffer too small */
if (size > len) buf[len] = QSE_T('\0');
ret = len;
t = value;
if (t < 0) t = -t;
while (t > 0)
{
rem = t % radix;
if (rem >= 10)
2008-12-21 21:35:07 +00:00
buf[--len] = (qse_char_t)rem + QSE_T('a') - 10;
else
2008-12-21 21:35:07 +00:00
buf[--len] = (qse_char_t)rem + QSE_T('0');
t /= radix;
}
if (value < 0)
{
for (i = 1; i <= prefix_len; i++)
{
buf[i] = prefix[i-1];
len--;
}
2008-12-21 21:35:07 +00:00
buf[--len] = QSE_T('-');
}
else
{
for (i = 0; i < prefix_len; i++) buf[i] = prefix[i];
}
return ret;
}
qse_char_t* qse_awk_rtx_strtok (
qse_awk_rtx_t* rtx, const qse_char_t* s,
2014-07-08 14:30:42 +00:00
const qse_char_t* delim, qse_cstr_t* tok)
{
return qse_awk_rtx_strxntok (
2011-05-19 08:36:40 +00:00
rtx, s, qse_strlen(s), delim, qse_strlen(delim), tok);
}
qse_char_t* qse_awk_rtx_strxtok (
qse_awk_rtx_t* rtx, const qse_char_t* s, qse_size_t len,
2014-07-08 14:30:42 +00:00
const qse_char_t* delim, qse_cstr_t* tok)
{
return qse_awk_rtx_strxntok (
2011-05-19 08:36:40 +00:00
rtx, s, len, delim, qse_strlen(delim), tok);
}
qse_char_t* qse_awk_rtx_strntok (
qse_awk_rtx_t* rtx, const qse_char_t* s,
2008-12-21 21:35:07 +00:00
const qse_char_t* delim, qse_size_t delim_len,
2014-07-08 14:30:42 +00:00
qse_cstr_t* tok)
{
return qse_awk_rtx_strxntok (
2011-05-19 08:36:40 +00:00
rtx, s, qse_strlen(s), delim, delim_len, tok);
}
qse_char_t* qse_awk_rtx_strxntok (
qse_awk_rtx_t* rtx, const qse_char_t* s, qse_size_t len,
2014-07-08 14:30:42 +00:00
const qse_char_t* delim, qse_size_t delim_len, qse_cstr_t* tok)
{
2008-12-21 21:35:07 +00:00
const qse_char_t* p = s, *d;
const qse_char_t* end = s + len;
const qse_char_t* sp = QSE_NULL, * ep = QSE_NULL;
const qse_char_t* delim_end = delim + delim_len;
qse_char_t c;
int delim_mode;
#define __DELIM_NULL 0
#define __DELIM_EMPTY 1
#define __DELIM_SPACES 2
#define __DELIM_NOSPACES 3
#define __DELIM_COMPOSITE 4
2008-12-21 21:35:07 +00:00
if (delim == QSE_NULL) delim_mode = __DELIM_NULL;
else
{
delim_mode = __DELIM_EMPTY;
for (d = delim; d < delim_end; d++)
{
if (QSE_AWK_ISSPACE(rtx->awk,*d))
{
if (delim_mode == __DELIM_EMPTY)
delim_mode = __DELIM_SPACES;
else if (delim_mode == __DELIM_NOSPACES)
{
delim_mode = __DELIM_COMPOSITE;
break;
}
}
else
{
if (delim_mode == __DELIM_EMPTY)
delim_mode = __DELIM_NOSPACES;
else if (delim_mode == __DELIM_SPACES)
{
delim_mode = __DELIM_COMPOSITE;
break;
}
}
}
/* TODO: verify the following statement... */
if (delim_mode == __DELIM_SPACES &&
delim_len == 1 &&
2008-12-21 21:35:07 +00:00
delim[0] != QSE_T(' ')) delim_mode = __DELIM_NOSPACES;
}
if (delim_mode == __DELIM_NULL)
{
2008-12-21 21:35:07 +00:00
/* when QSE_NULL is given as "delim", it trims off the
* leading and trailing spaces characters off the source
* string "s" eventually. */
while (p < end && QSE_AWK_ISSPACE(rtx->awk,*p)) p++;
while (p < end)
{
c = *p;
if (!QSE_AWK_ISSPACE(rtx->awk,c))
{
2008-12-21 21:35:07 +00:00
if (sp == QSE_NULL) sp = p;
ep = p;
}
p++;
}
}
else if (delim_mode == __DELIM_EMPTY)
{
/* each character in the source string "s" becomes a token. */
if (p < end)
{
c = *p;
sp = p;
ep = p++;
}
}
else if (delim_mode == __DELIM_SPACES)
{
/* each token is delimited by space characters. all leading
* and trailing spaces are removed. */
while (p < end && QSE_AWK_ISSPACE(rtx->awk,*p)) p++;
while (p < end)
{
c = *p;
if (QSE_AWK_ISSPACE(rtx->awk,c)) break;
2008-12-21 21:35:07 +00:00
if (sp == QSE_NULL) sp = p;
ep = p++;
}
while (p < end && QSE_AWK_ISSPACE(rtx->awk,*p)) p++;
}
else if (delim_mode == __DELIM_NOSPACES)
{
/* each token is delimited by one of charaters
* in the delimeter set "delim". */
if (rtx->gbl.ignorecase)
{
while (p < end)
{
c = QSE_AWK_TOUPPER(rtx->awk, *p);
for (d = delim; d < delim_end; d++)
{
if (c == QSE_AWK_TOUPPER(rtx->awk,*d)) goto exit_loop;
}
2008-12-21 21:35:07 +00:00
if (sp == QSE_NULL) sp = p;
ep = p++;
}
}
else
{
while (p < end)
{
c = *p;
for (d = delim; d < delim_end; d++)
{
if (c == *d) goto exit_loop;
}
2008-12-21 21:35:07 +00:00
if (sp == QSE_NULL) sp = p;
ep = p++;
}
}
}
else /* if (delim_mode == __DELIM_COMPOSITE) */
{
/* each token is delimited by one of non-space charaters
* in the delimeter set "delim". however, all space characters
* surrounding the token are removed */
while (p < end && QSE_AWK_ISSPACE(rtx->awk,*p)) p++;
if (rtx->gbl.ignorecase)
{
while (p < end)
{
c = QSE_AWK_TOUPPER(rtx->awk, *p);
if (QSE_AWK_ISSPACE(rtx->awk,c))
{
p++;
continue;
}
for (d = delim; d < delim_end; d++)
{
2009-09-22 07:28:18 +00:00
if (c == QSE_AWK_TOUPPER(rtx->awk,*d))
goto exit_loop;
}
2008-12-21 21:35:07 +00:00
if (sp == QSE_NULL) sp = p;
ep = p++;
}
}
else
{
while (p < end)
{
c = *p;
if (QSE_AWK_ISSPACE(rtx->awk,c))
{
p++;
continue;
}
for (d = delim; d < delim_end; d++)
{
if (c == *d) goto exit_loop;
}
2008-12-21 21:35:07 +00:00
if (sp == QSE_NULL) sp = p;
ep = p++;
}
}
}
exit_loop:
2008-12-21 21:35:07 +00:00
if (sp == QSE_NULL)
{
2011-05-19 08:36:40 +00:00
tok->ptr = QSE_NULL;
tok->len = (qse_size_t)0;
}
else
{
tok->ptr = (qse_char_t*)sp;
2011-05-19 08:36:40 +00:00
tok->len = ep - sp + 1;
}
2009-09-22 07:28:18 +00:00
/* if QSE_NULL is returned, this function should not be called again */
2008-12-21 21:35:07 +00:00
if (p >= end) return QSE_NULL;
if (delim_mode == __DELIM_EMPTY ||
2008-12-21 21:35:07 +00:00
delim_mode == __DELIM_SPACES) return (qse_char_t*)p;
return (qse_char_t*)++p;
}
qse_char_t* qse_awk_rtx_strxntokbyrex (
qse_awk_rtx_t* rtx,
const qse_char_t* str, qse_size_t len,
const qse_char_t* substr, qse_size_t sublen,
2014-07-08 14:30:42 +00:00
void* rex, qse_cstr_t* tok,
qse_awk_errnum_t* errnum)
{
int n;
2011-05-19 08:36:40 +00:00
qse_size_t i;
2014-07-08 14:30:42 +00:00
qse_cstr_t match, s, cursub, realsub;
s.ptr = (qse_char_t*)str;
2011-05-19 08:36:40 +00:00
s.len = len;
cursub.ptr = (qse_char_t*)substr;
2011-05-19 08:36:40 +00:00
cursub.len = sublen;
realsub.ptr = (qse_char_t*)substr;
2011-05-19 08:36:40 +00:00
realsub.len = sublen;
while (cursub.len > 0)
{
n = qse_awk_matchrex (
rtx->awk, rex, rtx->gbl.ignorecase,
&s, &cursub, &match, QSE_NULL, errnum);
2008-12-21 21:35:07 +00:00
if (n == -1) return QSE_NULL;
if (n == 0)
{
/* no match has been found.
* return the entire string as a token */
2011-05-19 08:36:40 +00:00
tok->ptr = realsub.ptr;
tok->len = realsub.len;
2008-12-21 21:35:07 +00:00
*errnum = QSE_AWK_ENOERR;
return QSE_NULL;
}
2008-12-21 21:35:07 +00:00
QSE_ASSERT (n == 1);
if (match.len == 0)
{
2011-05-19 08:36:40 +00:00
/* the match length is zero. */
cursub.ptr++;
cursub.len--;
}
else if (rtx->gbl.striprecspc > 0 || (rtx->gbl.striprecspc < 0 && (rtx->awk->opt.trait & QSE_AWK_STRIPRECSPC)))
{
/* match at the beginning of the input string */
if (match.ptr == substr)
{
for (i = 0; i < match.len; i++)
{
if (!QSE_AWK_ISSPACE(rtx->awk, match.ptr[i]))
goto exit_loop;
}
2011-05-19 08:36:40 +00:00
/* the match that is all spaces at the
* beginning of the input string is skipped */
2011-05-19 08:36:40 +00:00
cursub.ptr += match.len;
cursub.len -= match.len;
/* adjust the substring by skipping the leading
* spaces and retry matching */
realsub.ptr = (qse_char_t*)substr + match.len;
2011-05-19 08:36:40 +00:00
realsub.len -= match.len;
}
else break;
}
else break;
}
exit_loop:
2011-05-19 08:36:40 +00:00
if (cursub.len <= 0)
{
2011-05-19 08:36:40 +00:00
tok->ptr = realsub.ptr;
tok->len = realsub.len;
2008-12-21 21:35:07 +00:00
*errnum = QSE_AWK_ENOERR;
return QSE_NULL;
}
2011-05-19 08:36:40 +00:00
tok->ptr = realsub.ptr;
tok->len = match.ptr - realsub.ptr;
for (i = 0; i < match.len; i++)
{
if (!QSE_AWK_ISSPACE(rtx->awk, match.ptr[i]))
{
2011-05-19 08:36:40 +00:00
/* the match contains a non-space character. */
2008-12-21 21:35:07 +00:00
*errnum = QSE_AWK_ENOERR;
return (qse_char_t*)match.ptr+match.len;
}
}
2011-05-19 08:36:40 +00:00
/* the match is all spaces */
2008-12-21 21:35:07 +00:00
*errnum = QSE_AWK_ENOERR;
if (rtx->gbl.striprecspc > 0 || (rtx->gbl.striprecspc < 0 && (rtx->awk->opt.trait & QSE_AWK_STRIPRECSPC)))
{
2011-05-19 08:36:40 +00:00
/* if the match reached the last character in the input string,
* it returns QSE_NULL to terminate tokenization. */
return (match.ptr+match.len >= substr+sublen)?
QSE_NULL: ((qse_char_t*)match.ptr+match.len);
}
else
{
2011-05-19 08:36:40 +00:00
/* if the match went beyond the the last character in the input
* string, it returns QSE_NULL to terminate tokenization. */
return (match.ptr+match.len > substr+sublen)?
QSE_NULL: ((qse_char_t*)match.ptr+match.len);
}
}
2008-03-04 03:31:41 +00:00
2009-09-22 07:28:18 +00:00
qse_char_t* qse_awk_rtx_strxnfld (
qse_awk_rtx_t* rtx, qse_char_t* str, qse_size_t len,
qse_char_t fs, qse_char_t ec, qse_char_t lq, qse_char_t rq,
2014-07-08 14:30:42 +00:00
qse_cstr_t* tok)
2009-09-22 07:28:18 +00:00
{
qse_char_t* p = str;
qse_char_t* end = str + len;
int escaped = 0, quoted = 0;
qse_char_t* ts; /* token start */
qse_char_t* tp; /* points to one char past the last token char */
qse_char_t* xp; /* points to one char past the last effective char */
/* skip leading spaces */
while (p < end && QSE_ISSPACE(*p)) p++;
/* initialize token pointers */
ts = tp = xp = p;
while (p < end)
{
char c = *p;
if (escaped)
{
*tp++ = c; xp = tp; p++;
escaped = 0;
}
else
{
if (c == ec)
{
escaped = 1;
p++;
}
else if (quoted)
{
if (c == rq)
{
quoted = 0;
p++;
}
else
{
*tp++ = c; xp = tp; p++;
}
}
else
{
if (c == fs)
{
2011-05-19 08:36:40 +00:00
tok->ptr = ts;
tok->len = xp - ts;
p++;
if (QSE_ISSPACE(fs))
{
while (p < end && *p == fs) p++;
if (p >= end) return QSE_NULL;
}
return p;
}
if (c == lq)
{
quoted = 1;
p++;
}
else
{
*tp++ = c; p++;
if (!QSE_ISSPACE(c)) xp = tp;
}
}
}
}
if (escaped)
{
/* if it is still escaped, the last character must be
* the escaper itself. treat it as a normal character */
*xp++ = ec;
}
2011-05-19 08:36:40 +00:00
tok->ptr = ts;
tok->len = xp - ts;
2009-09-22 07:28:18 +00:00
return QSE_NULL;
}
static QSE_INLINE int rexerr_to_errnum (int err)
{
switch (err)
{
case QSE_REX_ENOERR: return QSE_AWK_ENOERR;
case QSE_REX_ENOMEM: return QSE_AWK_ENOMEM;
case QSE_REX_ENOCOMP: return QSE_AWK_EREXBL;
case QSE_REX_ERECUR: return QSE_AWK_EREXRECUR;
case QSE_REX_ERPAREN: return QSE_AWK_EREXRPAREN;
case QSE_REX_ERBRACK: return QSE_AWK_EREXRBRACK;
case QSE_REX_ERBRACE: return QSE_AWK_EREXRBRACE;
case QSE_REX_ECOLON: return QSE_AWK_EREXCOLON;
case QSE_REX_ECRANGE: return QSE_AWK_EREXCRANGE;
case QSE_REX_ECCLASS: return QSE_AWK_EREXCCLASS;
case QSE_REX_EBOUND: return QSE_AWK_EREXBOUND;
case QSE_REX_ESPCAWP: return QSE_AWK_EREXSPCAWP;
case QSE_REX_EPREEND: return QSE_AWK_EREXPREEND;
default: return QSE_AWK_EINTERN;
}
}
2008-12-21 21:35:07 +00:00
int qse_awk_buildrex (
qse_awk_t* awk, const qse_char_t* ptn, qse_size_t len,
qse_awk_errnum_t* errnum, void** code, void** icode)
2008-03-04 03:31:41 +00:00
{
#if defined(USE_REX)
qse_rex_errnum_t err;
2008-03-04 03:31:41 +00:00
void* p;
if (code || icode)
{
p = qse_buildrex (
qse_awk_getmmgr(awk), awk->opt.depth.s.rex_build,
((awk->opt.trait & QSE_AWK_REXBOUND)? 0: QSE_REX_NOBOUND),
ptn, len, &err
);
if (p == QSE_NULL)
{
*errnum = rexerr_to_errnum(err);
return -1;
}
if (code) *code = p;
if (icode) *icode = p;
}
return 0;
#else
qse_tre_t* tre = QSE_NULL;
qse_tre_t* itre = QSE_NULL;
int opt = QSE_TRE_EXTENDED;
if (!(awk->opt.trait & QSE_AWK_REXBOUND)) opt |= QSE_TRE_NOBOUND;
if (code)
{
tre = qse_tre_open(qse_awk_getmmgr(awk), 0);
if (tre == QSE_NULL)
{
*errnum = QSE_AWK_ENOMEM;
return -1;
}
if (qse_tre_compx (tre, ptn, len, QSE_NULL, opt) <= -1)
{
#if 0 /* TODO */
if (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM) *errnum = QSE_AWK_ENOMEM;
else
SETERR1 (awk, QSE_AWK_EREXBL, str->ptr, str->len, loc);
#endif
*errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)?
QSE_AWK_ENOMEM: QSE_AWK_EREXBL;
qse_tre_close (tre);
return -1;
}
}
if (icode)
{
itre = qse_tre_open(qse_awk_getmmgr(awk), 0);
if (itre == QSE_NULL)
{
if (tre) qse_tre_close (tre);
*errnum = QSE_AWK_ENOMEM;
return -1;
}
/* ignorecase is a compile option for TRE */
if (qse_tre_compx (itre, ptn, len, QSE_NULL, opt | QSE_TRE_IGNORECASE) <= -1)
{
#if 0 /* TODO */
if (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM) *errnum = QSE_AWK_ENOMEM;
else
SETERR1 (awk, QSE_AWK_EREXBL, str->ptr, str->len, loc);
#endif
*errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)?
QSE_AWK_ENOMEM: QSE_AWK_EREXBL;
qse_tre_close (itre);
if (tre) qse_tre_close (tre);
return -1;
}
}
if (code) *code = tre;
if (icode) *icode = itre;
2014-07-02 13:59:32 +00:00
return 0;
#endif
}
#if !defined(USE_REX)
static int matchtre (
qse_awk_t* awk, qse_tre_t* tre, int opt,
2014-07-08 14:30:42 +00:00
const qse_cstr_t* str, qse_cstr_t* mat,
qse_cstr_t submat[9], qse_awk_errnum_t* errnum)
{
int n;
2014-11-18 16:10:12 +00:00
/*qse_tre_match_t match[10] = { { 0, 0 }, };*/
qse_tre_match_t match[10];
2014-11-18 16:10:12 +00:00
QSE_MEMSET (match, 0, QSE_SIZEOF(match));
n = qse_tre_execx(tre, str->ptr, str->len, match, QSE_COUNTOF(match), opt);
if (n <= -1)
{
if (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMATCH) return 0;
#if 0 /* TODO: */
*errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)?
QSE_AWK_ENOMEM: QSE_AWK_EREXMA;
SETERR0 (sed, errnum, loc);
#endif
*errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)?
QSE_AWK_ENOMEM: QSE_AWK_EREXMA;
return -1;
}
QSE_ASSERT (match[0].rm_so != -1);
if (mat)
{
mat->ptr = &str->ptr[match[0].rm_so];
mat->len = match[0].rm_eo - match[0].rm_so;
}
if (submat)
{
int i;
/* you must intialize submat before you pass into this
* function because it can abort filling */
for (i = 1; i < QSE_COUNTOF(match); i++)
{
if (match[i].rm_so != -1)
{
submat[i-1].ptr = &str->ptr[match[i].rm_so];
submat[i-1].len = match[i].rm_eo - match[i].rm_so;
}
}
}
return 1;
2008-03-04 03:31:41 +00:00
}
#endif
2008-03-04 03:31:41 +00:00
2008-12-21 21:35:07 +00:00
int qse_awk_matchrex (
qse_awk_t* awk, void* code, int icase,
2014-07-08 14:30:42 +00:00
const qse_cstr_t* str, const qse_cstr_t* substr,
qse_cstr_t* match, qse_cstr_t submat[9], qse_awk_errnum_t* errnum)
2008-03-04 03:31:41 +00:00
{
#if defined(USE_REX)
int x;
qse_rex_errnum_t err;
2008-03-04 03:31:41 +00:00
/* submatch is not supported */
2008-12-21 21:35:07 +00:00
x = qse_matchrex (
qse_awk_getmmgr(awk), awk->opt.depth.s.rex_match, code,
(icase? QSE_REX_IGNORECASE: 0), str, substr, match, &err);
if (x <= -1) *errnum = rexerr_to_errnum(err);
return x;
#else
int x;
int opt = QSE_TRE_BACKTRACKING; /* TODO: option... QSE_TRE_BACKTRACKING ??? */
x = matchtre (
awk, code,
((str->ptr == substr->ptr)? opt: (opt | QSE_TRE_NOTBOL)),
substr, match, submat, errnum
);
2008-03-04 03:31:41 +00:00
return x;
#endif
}
void qse_awk_freerex (qse_awk_t* awk, void* code, void* icode)
{
if (code)
{
#if defined(USE_REX)
qse_freerex ((awk)->mmgr, code);
#else
qse_tre_close (code);
#endif
}
if (icode && icode != code)
{
#if defined(USE_REX)
qse_freerex ((awk)->mmgr, icode);
#else
qse_tre_close (icode);
#endif
}
}
int qse_awk_rtx_matchrex (
qse_awk_rtx_t* rtx, qse_awk_val_t* val,
const qse_cstr_t* str, const qse_cstr_t* substr, qse_cstr_t* match, qse_cstr_t submat[9])
{
void* code;
int icase, x;
qse_awk_errnum_t awkerr;
#if defined(USE_REX)
qse_rex_errnum_t rexerr;
#endif
icase = rtx->gbl.ignorecase;
if (QSE_AWK_RTX_GETVALTYPE (rtx, val) == QSE_AWK_VAL_REX)
{
code = ((qse_awk_val_rex_t*)val)->code[icase];
}
else
{
/* convert to a string and build a regular expression */
2014-07-08 14:30:42 +00:00
qse_cstr_t tmp;
tmp.ptr = qse_awk_rtx_getvalstr (rtx, val, &tmp.len);
if (tmp.ptr == QSE_NULL) return -1;
x = icase? qse_awk_buildrex (rtx->awk, tmp.ptr, tmp.len, &awkerr, QSE_NULL, &code):
qse_awk_buildrex (rtx->awk, tmp.ptr, tmp.len, &awkerr, &code, QSE_NULL);
qse_awk_rtx_freevalstr (rtx, val, tmp.ptr);
if (x <= -1)
{
qse_awk_rtx_seterrnum (rtx, awkerr, QSE_NULL);
return -1;
}
}
#if defined(USE_REX)
/* submatch not supported */
x = qse_matchrex (
qse_awk_rtx_getmmgr(rtx), rtx->awk->opt.depth.s.rex_match,
code, (icase? QSE_REX_IGNORECASE: 0),
str, substr, match, &rexerr);
if (x <= -1) qse_awk_rtx_seterrnum (rtx, rexerr_to_errnum(rexerr), QSE_NULL);
#else
x = matchtre (
rtx->awk, code,
((str->ptr == substr->ptr)? QSE_TRE_BACKTRACKING: (QSE_TRE_BACKTRACKING | QSE_TRE_NOTBOL)),
substr, match, submat, &awkerr
);
if (x <= -1) qse_awk_rtx_seterrnum (rtx, awkerr, QSE_NULL);
#endif
if (QSE_AWK_RTX_GETVALTYPE(rtx, val) == QSE_AWK_VAL_REX)
{
/* nothing to free */
}
else
{
if (icase)
qse_awk_freerex (rtx->awk, QSE_NULL, code);
else
qse_awk_freerex (rtx->awk, code, QSE_NULL);
}
return x;
2008-03-04 03:31:41 +00:00
}
void* qse_awk_rtx_allocmem (qse_awk_rtx_t* rtx, qse_size_t size)
{
void* ptr = QSE_AWK_ALLOC (rtx->awk, size);
if (ptr == QSE_NULL) qse_awk_rtx_seterrnum (rtx, QSE_AWK_ENOMEM, QSE_NULL);
return ptr;
}
void* qse_awk_rtx_reallocmem (qse_awk_rtx_t* rtx, void* ptr, qse_size_t size)
{
void* nptr = QSE_AWK_REALLOC (rtx->awk, ptr, size);
if (nptr == QSE_NULL) qse_awk_rtx_seterrnum (rtx, QSE_AWK_ENOMEM, QSE_NULL);
return nptr;
}
void* qse_awk_rtx_callocmem (qse_awk_rtx_t* rtx, qse_size_t size)
{
void* ptr = QSE_AWK_ALLOC (rtx->awk, size);
if (ptr) QSE_MEMSET (ptr, 0, size);
else qse_awk_rtx_seterrnum (rtx, QSE_AWK_ENOMEM, QSE_NULL);
return ptr;
}
qse_wchar_t* qse_awk_rtx_mbstowcsdup (qse_awk_rtx_t* rtx, const qse_mchar_t* mbs, qse_size_t* _wcslen)
{
qse_size_t mbslen, wcslen;
qse_wchar_t* wcs;
/* if i use qse_mbstowcsdupwithcmgr(), i cannot pinpoint the exact failure cause.
* let's do it differently. */
if (qse_mbstowcswithcmgr(mbs, &mbslen, QSE_NULL, &wcslen, qse_awk_rtx_getcmgr(rtx)) <= -1)
{
qse_awk_rtx_seterrnum (rtx, QSE_AWK_EINVAL, QSE_NULL);
return QSE_NULL;
}
wcslen = wcslen + 1; /* for terminating null */
wcs = qse_awk_rtx_allocmem(rtx, QSE_SIZEOF(*wcs) * wcslen);
if (!wcs) return QSE_NULL;
qse_mbstowcswithcmgr (mbs, &mbslen, wcs, &wcslen, qse_awk_rtx_getcmgr(rtx));
if (_wcslen) *_wcslen = wcslen;
return wcs;
}
qse_mchar_t* qse_awk_rtx_wcstombsdup (qse_awk_rtx_t* rtx, const qse_wchar_t* wcs, qse_size_t* _mbslen)
{
qse_size_t mbslen, wcslen;
qse_mchar_t* mbs;
if (qse_wcstombswithcmgr(wcs, &wcslen, QSE_NULL, &mbslen, qse_awk_rtx_getcmgr(rtx)) <= -1)
{
qse_awk_rtx_seterrnum (rtx, QSE_AWK_EINVAL, QSE_NULL);
return QSE_NULL;
}
mbslen = mbslen + 1; /* for terminating null */
mbs = qse_awk_rtx_allocmem(rtx, QSE_SIZEOF(*mbs) * mbslen);
if (!mbs) return QSE_NULL;
qse_wcstombswithcmgr (wcs, &wcslen, mbs, &mbslen, qse_awk_rtx_getcmgr(rtx));
if (_mbslen) *_mbslen = mbslen;
return mbs;
}
qse_wchar_t* qse_awk_rtx_mbsntowcsdup (qse_awk_rtx_t* rtx, const qse_mchar_t* mbs, qse_size_t _mbslen, qse_size_t* _wcslen)
{
qse_size_t mbslen, wcslen;
qse_wchar_t* wcs;
/* if i use qse_mbstowcsdupwithcmgr(), i cannot pinpoint the exact failure cause.
* let's do it differently. */
mbslen = _mbslen;
if (qse_mbsntowcsnwithcmgr(mbs, &mbslen, QSE_NULL, &wcslen, qse_awk_rtx_getcmgr(rtx)) <= -1)
{
qse_awk_rtx_seterrnum (rtx, QSE_AWK_EINVAL, QSE_NULL);
return QSE_NULL;
}
wcs = qse_awk_rtx_allocmem(rtx, QSE_SIZEOF(*wcs) * (wcslen + 1));
if (!wcs) return QSE_NULL;
mbslen= _mbslen;
qse_mbsntowcsnwithcmgr (mbs, &mbslen, wcs, &wcslen, qse_awk_rtx_getcmgr(rtx));
wcs[wcslen] = QSE_WT('\0');
if (_wcslen) *_wcslen = wcslen;
return wcs;
}
qse_mchar_t* qse_awk_rtx_wcsntombsdup (qse_awk_rtx_t* rtx, const qse_wchar_t* wcs, qse_size_t _wcslen, qse_size_t* _mbslen)
{
qse_size_t mbslen, wcslen;
qse_mchar_t* mbs;
wcslen = _wcslen;
if (qse_wcsntombsnwithcmgr(wcs, &wcslen, QSE_NULL, &mbslen, qse_awk_rtx_getcmgr(rtx)) <= -1)
{
qse_awk_rtx_seterrnum (rtx, QSE_AWK_EINVAL, QSE_NULL);
return QSE_NULL;
}
mbs = qse_awk_rtx_allocmem(rtx, QSE_SIZEOF(*mbs) * (mbslen + 1));
if (!mbs) return QSE_NULL;
wcslen = _wcslen;
qse_wcsntombsnwithcmgr (wcs, &wcslen, mbs, &mbslen, qse_awk_rtx_getcmgr(rtx));
mbs[mbslen] = QSE_MT('\0');
if (_mbslen) *_mbslen = mbslen;
return mbs;
}