2019-12-13 04:29:58 +00:00
|
|
|
/*
|
|
|
|
* $Id$
|
|
|
|
*
|
|
|
|
Copyright (c) 2006-2019 Chung, Hyung-Hwan. All rights reserved.
|
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
modification, are permitted provided that the following conditions
|
|
|
|
are met:
|
|
|
|
1. Redistributions of source code must retain the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer in the
|
|
|
|
documentation and/or other materials provided with the distribution.
|
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
|
|
|
|
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
|
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
|
|
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
|
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
|
|
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
|
|
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "hawk-prv.h"
|
|
|
|
|
|
|
|
/*#define USE_REX */
|
|
|
|
|
|
|
|
#if defined(USE_REX)
|
|
|
|
# include <hawk-rex.h>
|
|
|
|
#else
|
|
|
|
# include <hawk-tre.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
hawk_ooch_t* hawk_rtx_strtok (
|
|
|
|
hawk_rtx_t* rtx, const hawk_ooch_t* s,
|
|
|
|
const hawk_ooch_t* delim, hawk_oocs_t* tok)
|
|
|
|
{
|
|
|
|
return hawk_rtx_strxntok(rtx, s, hawk_count_oocstr(s), delim, hawk_count_oocstr(delim), tok);
|
|
|
|
}
|
|
|
|
|
|
|
|
hawk_ooch_t* hawk_rtx_strxtok (
|
|
|
|
hawk_rtx_t* rtx, const hawk_ooch_t* s, hawk_oow_t len,
|
|
|
|
const hawk_ooch_t* delim, hawk_oocs_t* tok)
|
|
|
|
{
|
|
|
|
return hawk_rtx_strxntok(rtx, s, len, delim, hawk_count_oocstr(delim), tok);
|
|
|
|
}
|
|
|
|
|
|
|
|
hawk_ooch_t* hawk_rtx_strntok (
|
|
|
|
hawk_rtx_t* rtx, const hawk_ooch_t* s,
|
|
|
|
const hawk_ooch_t* delim, hawk_oow_t delim_len,
|
|
|
|
hawk_oocs_t* tok)
|
|
|
|
{
|
|
|
|
return hawk_rtx_strxntok(rtx, s, hawk_count_oocstr(s), delim, delim_len, tok);
|
|
|
|
}
|
|
|
|
|
|
|
|
hawk_ooch_t* hawk_rtx_strxntok (
|
|
|
|
hawk_rtx_t* rtx, const hawk_ooch_t* s, hawk_oow_t len,
|
|
|
|
const hawk_ooch_t* delim, hawk_oow_t delim_len, hawk_oocs_t* tok)
|
|
|
|
{
|
|
|
|
const hawk_ooch_t* p = s, *d;
|
|
|
|
const hawk_ooch_t* end = s + len;
|
|
|
|
const hawk_ooch_t* sp = HAWK_NULL, * ep = HAWK_NULL;
|
|
|
|
const hawk_ooch_t* delim_end = delim + delim_len;
|
|
|
|
hawk_ooch_t c;
|
|
|
|
int delim_mode;
|
|
|
|
|
|
|
|
#define __DELIM_NULL 0
|
|
|
|
#define __DELIM_EMPTY 1
|
|
|
|
#define __DELIM_SPACES 2
|
|
|
|
#define __DELIM_NOSPACES 3
|
|
|
|
#define __DELIM_COMPOSITE 4
|
|
|
|
if (delim == HAWK_NULL) delim_mode = __DELIM_NULL;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
delim_mode = __DELIM_EMPTY;
|
|
|
|
|
|
|
|
for (d = delim; d < delim_end; d++)
|
|
|
|
{
|
|
|
|
if (hawk_is_ooch_space(*d))
|
|
|
|
{
|
|
|
|
if (delim_mode == __DELIM_EMPTY)
|
|
|
|
delim_mode = __DELIM_SPACES;
|
|
|
|
else if (delim_mode == __DELIM_NOSPACES)
|
|
|
|
{
|
|
|
|
delim_mode = __DELIM_COMPOSITE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (delim_mode == __DELIM_EMPTY)
|
|
|
|
delim_mode = __DELIM_NOSPACES;
|
|
|
|
else if (delim_mode == __DELIM_SPACES)
|
|
|
|
{
|
|
|
|
delim_mode = __DELIM_COMPOSITE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* TODO: verify the following statement... */
|
|
|
|
if (delim_mode == __DELIM_SPACES &&
|
|
|
|
delim_len == 1 &&
|
|
|
|
delim[0] != HAWK_T(' ')) delim_mode = __DELIM_NOSPACES;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (delim_mode == __DELIM_NULL)
|
|
|
|
{
|
|
|
|
/* when HAWK_NULL is given as "delim", it trims off the
|
|
|
|
* leading and trailing spaces characters off the source
|
|
|
|
* string "s" eventually. */
|
|
|
|
|
|
|
|
while (p < end && hawk_is_ooch_space(*p)) p++;
|
|
|
|
while (p < end)
|
|
|
|
{
|
|
|
|
c = *p;
|
|
|
|
|
|
|
|
if (!hawk_is_ooch_space(c))
|
|
|
|
{
|
|
|
|
if (sp == HAWK_NULL) sp = p;
|
|
|
|
ep = p;
|
|
|
|
}
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (delim_mode == __DELIM_EMPTY)
|
|
|
|
{
|
|
|
|
/* each character in the source string "s" becomes a token. */
|
|
|
|
if (p < end)
|
|
|
|
{
|
|
|
|
c = *p;
|
|
|
|
sp = p;
|
|
|
|
ep = p++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (delim_mode == __DELIM_SPACES)
|
|
|
|
{
|
|
|
|
/* each token is delimited by space characters. all leading
|
|
|
|
* and trailing spaces are removed. */
|
|
|
|
|
|
|
|
while (p < end && hawk_is_ooch_space(*p)) p++;
|
|
|
|
while (p < end)
|
|
|
|
{
|
|
|
|
c = *p;
|
|
|
|
if (hawk_is_ooch_space(c)) break;
|
|
|
|
if (sp == HAWK_NULL) sp = p;
|
|
|
|
ep = p++;
|
|
|
|
}
|
|
|
|
while (p < end && hawk_is_ooch_space(*p)) p++;
|
|
|
|
}
|
|
|
|
else if (delim_mode == __DELIM_NOSPACES)
|
|
|
|
{
|
|
|
|
/* each token is delimited by one of charaters
|
|
|
|
* in the delimeter set "delim". */
|
|
|
|
if (rtx->gbl.ignorecase)
|
|
|
|
{
|
|
|
|
while (p < end)
|
|
|
|
{
|
|
|
|
c = hawk_to_ooch_upper(*p);
|
|
|
|
for (d = delim; d < delim_end; d++)
|
|
|
|
{
|
|
|
|
if (c == hawk_to_ooch_upper(*d)) goto exit_loop;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sp == HAWK_NULL) sp = p;
|
|
|
|
ep = p++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
while (p < end)
|
|
|
|
{
|
|
|
|
c = *p;
|
|
|
|
for (d = delim; d < delim_end; d++)
|
|
|
|
{
|
|
|
|
if (c == *d) goto exit_loop;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sp == HAWK_NULL) sp = p;
|
|
|
|
ep = p++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else /* if (delim_mode == __DELIM_COMPOSITE) */
|
|
|
|
{
|
|
|
|
/* each token is delimited by one of non-space charaters
|
|
|
|
* in the delimeter set "delim". however, all space characters
|
|
|
|
* surrounding the token are removed */
|
|
|
|
while (p < end && hawk_is_ooch_space(*p)) p++;
|
|
|
|
if (rtx->gbl.ignorecase)
|
|
|
|
{
|
|
|
|
while (p < end)
|
|
|
|
{
|
|
|
|
c = hawk_to_ooch_upper(*p);
|
|
|
|
if (hawk_is_ooch_space(c))
|
|
|
|
{
|
|
|
|
p++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
for (d = delim; d < delim_end; d++)
|
|
|
|
{
|
|
|
|
if (c == hawk_to_ooch_upper(*d))
|
|
|
|
goto exit_loop;
|
|
|
|
}
|
|
|
|
if (sp == HAWK_NULL) sp = p;
|
|
|
|
ep = p++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
while (p < end)
|
|
|
|
{
|
|
|
|
c = *p;
|
|
|
|
if (hawk_is_ooch_space(c))
|
|
|
|
{
|
|
|
|
p++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
for (d = delim; d < delim_end; d++)
|
|
|
|
{
|
|
|
|
if (c == *d) goto exit_loop;
|
|
|
|
}
|
|
|
|
if (sp == HAWK_NULL) sp = p;
|
|
|
|
ep = p++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
exit_loop:
|
|
|
|
if (sp == HAWK_NULL)
|
|
|
|
{
|
|
|
|
tok->ptr = HAWK_NULL;
|
|
|
|
tok->len = (hawk_oow_t)0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
tok->ptr = (hawk_ooch_t*)sp;
|
|
|
|
tok->len = ep - sp + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* if HAWK_NULL is returned, this function should not be called again */
|
|
|
|
if (p >= end) return HAWK_NULL;
|
|
|
|
if (delim_mode == __DELIM_EMPTY ||
|
|
|
|
delim_mode == __DELIM_SPACES) return (hawk_ooch_t*)p;
|
|
|
|
return (hawk_ooch_t*)++p;
|
|
|
|
}
|
|
|
|
|
|
|
|
hawk_ooch_t* hawk_rtx_strxntokbyrex (
|
|
|
|
hawk_rtx_t* rtx,
|
|
|
|
const hawk_ooch_t* str, hawk_oow_t len,
|
|
|
|
const hawk_ooch_t* substr, hawk_oow_t sublen,
|
|
|
|
void* rex, hawk_oocs_t* tok,
|
|
|
|
hawk_errnum_t* errnum)
|
|
|
|
{
|
|
|
|
int n;
|
|
|
|
hawk_oow_t i;
|
|
|
|
hawk_oocs_t match, s, cursub, realsub;
|
|
|
|
|
|
|
|
s.ptr = (hawk_ooch_t*)str;
|
|
|
|
s.len = len;
|
|
|
|
|
|
|
|
cursub.ptr = (hawk_ooch_t*)substr;
|
|
|
|
cursub.len = sublen;
|
|
|
|
|
|
|
|
realsub.ptr = (hawk_ooch_t*)substr;
|
|
|
|
realsub.len = sublen;
|
|
|
|
|
|
|
|
while (cursub.len > 0)
|
|
|
|
{
|
|
|
|
n = hawk_matchrex (
|
|
|
|
rtx->awk, rex, rtx->gbl.ignorecase,
|
|
|
|
&s, &cursub, &match, HAWK_NULL, errnum);
|
|
|
|
if (n == -1) return HAWK_NULL;
|
|
|
|
if (n == 0)
|
|
|
|
{
|
|
|
|
/* no match has been found.
|
|
|
|
* return the entire string as a token */
|
|
|
|
tok->ptr = realsub.ptr;
|
|
|
|
tok->len = realsub.len;
|
|
|
|
*errnum = HAWK_ENOERR;
|
|
|
|
return HAWK_NULL;
|
|
|
|
}
|
|
|
|
|
2019-12-13 08:26:54 +00:00
|
|
|
HAWK_ASSERT (hawk_rtx_gethawk(rtx), n == 1);
|
2019-12-13 04:29:58 +00:00
|
|
|
|
|
|
|
if (match.len == 0)
|
|
|
|
{
|
|
|
|
/* the match length is zero. */
|
|
|
|
cursub.ptr++;
|
|
|
|
cursub.len--;
|
|
|
|
}
|
|
|
|
else if (rtx->gbl.striprecspc > 0 || (rtx->gbl.striprecspc < 0 && (rtx->awk->opt.trait & HAWK_STRIPRECSPC)))
|
|
|
|
{
|
|
|
|
/* match at the beginning of the input string */
|
|
|
|
if (match.ptr == substr)
|
|
|
|
{
|
|
|
|
for (i = 0; i < match.len; i++)
|
|
|
|
{
|
|
|
|
if (!hawk_is_ooch_space(match.ptr[i])) goto exit_loop;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* the match that is all spaces at the
|
|
|
|
* beginning of the input string is skipped */
|
|
|
|
cursub.ptr += match.len;
|
|
|
|
cursub.len -= match.len;
|
|
|
|
|
|
|
|
/* adjust the substring by skipping the leading
|
|
|
|
* spaces and retry matching */
|
|
|
|
realsub.ptr = (hawk_ooch_t*)substr + match.len;
|
|
|
|
realsub.len -= match.len;
|
|
|
|
}
|
|
|
|
else break;
|
|
|
|
}
|
|
|
|
else break;
|
|
|
|
}
|
|
|
|
|
|
|
|
exit_loop:
|
|
|
|
if (cursub.len <= 0)
|
|
|
|
{
|
|
|
|
tok->ptr = realsub.ptr;
|
|
|
|
tok->len = realsub.len;
|
|
|
|
*errnum = HAWK_ENOERR;
|
|
|
|
return HAWK_NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
tok->ptr = realsub.ptr;
|
|
|
|
tok->len = match.ptr - realsub.ptr;
|
|
|
|
|
|
|
|
for (i = 0; i < match.len; i++)
|
|
|
|
{
|
|
|
|
if (!hawk_is_ooch_space(match.ptr[i]))
|
|
|
|
{
|
|
|
|
/* the match contains a non-space character. */
|
|
|
|
*errnum = HAWK_ENOERR;
|
|
|
|
return (hawk_ooch_t*)match.ptr+match.len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* the match is all spaces */
|
|
|
|
*errnum = HAWK_ENOERR;
|
|
|
|
if (rtx->gbl.striprecspc > 0 || (rtx->gbl.striprecspc < 0 && (rtx->awk->opt.trait & HAWK_STRIPRECSPC)))
|
|
|
|
{
|
|
|
|
/* if the match reached the last character in the input string,
|
|
|
|
* it returns HAWK_NULL to terminate tokenization. */
|
|
|
|
return (match.ptr+match.len >= substr+sublen)?
|
|
|
|
HAWK_NULL: ((hawk_ooch_t*)match.ptr+match.len);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* if the match went beyond the the last character in the input
|
|
|
|
* string, it returns HAWK_NULL to terminate tokenization. */
|
|
|
|
return (match.ptr+match.len > substr+sublen)?
|
|
|
|
HAWK_NULL: ((hawk_ooch_t*)match.ptr+match.len);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
hawk_ooch_t* hawk_rtx_strxnfld (
|
|
|
|
hawk_rtx_t* rtx, hawk_ooch_t* str, hawk_oow_t len,
|
|
|
|
hawk_ooch_t fs, hawk_ooch_t ec, hawk_ooch_t lq, hawk_ooch_t rq,
|
|
|
|
hawk_oocs_t* tok)
|
|
|
|
{
|
|
|
|
hawk_ooch_t* p = str;
|
|
|
|
hawk_ooch_t* end = str + len;
|
|
|
|
int escaped = 0, quoted = 0;
|
|
|
|
hawk_ooch_t* ts; /* token start */
|
|
|
|
hawk_ooch_t* tp; /* points to one char past the last token char */
|
|
|
|
hawk_ooch_t* xp; /* points to one char past the last effective char */
|
|
|
|
|
|
|
|
/* skip leading spaces */
|
|
|
|
while (p < end && hawk_is_ooch_space(*p)) p++;
|
|
|
|
|
|
|
|
/* initialize token pointers */
|
|
|
|
ts = tp = xp = p;
|
|
|
|
|
|
|
|
while (p < end)
|
|
|
|
{
|
|
|
|
char c = *p;
|
|
|
|
|
|
|
|
if (escaped)
|
|
|
|
{
|
|
|
|
*tp++ = c; xp = tp; p++;
|
|
|
|
escaped = 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (c == ec)
|
|
|
|
{
|
|
|
|
escaped = 1;
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
else if (quoted)
|
|
|
|
{
|
|
|
|
if (c == rq)
|
|
|
|
{
|
|
|
|
quoted = 0;
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
*tp++ = c; xp = tp; p++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (c == fs)
|
|
|
|
{
|
|
|
|
tok->ptr = ts;
|
|
|
|
tok->len = xp - ts;
|
|
|
|
p++;
|
|
|
|
|
|
|
|
if (hawk_is_ooch_space(fs))
|
|
|
|
{
|
|
|
|
while (p < end && *p == fs) p++;
|
|
|
|
if (p >= end) return HAWK_NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (c == lq)
|
|
|
|
{
|
|
|
|
quoted = 1;
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
*tp++ = c; p++;
|
|
|
|
if (!hawk_is_ooch_space(c)) xp = tp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (escaped)
|
|
|
|
{
|
|
|
|
/* if it is still escaped, the last character must be
|
|
|
|
* the escaper itself. treat it as a normal character */
|
|
|
|
*xp++ = ec;
|
|
|
|
}
|
|
|
|
|
|
|
|
tok->ptr = ts;
|
|
|
|
tok->len = xp - ts;
|
|
|
|
return HAWK_NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if defined(USE_REX)
|
|
|
|
static HAWK_INLINE int rexerr_to_errnum (int err)
|
|
|
|
{
|
|
|
|
switch (err)
|
|
|
|
{
|
|
|
|
case HAWK_REX_ENOERR: return HAWK_ENOERR;
|
|
|
|
case HAWK_REX_ENOMEM: return HAWK_ENOMEM;
|
|
|
|
case HAWK_REX_ENOCOMP: return HAWK_EREXBL;
|
|
|
|
case HAWK_REX_ERECUR: return HAWK_EREXRECUR;
|
|
|
|
case HAWK_REX_ERPAREN: return HAWK_EREXRPAREN;
|
|
|
|
case HAWK_REX_ERBRACK: return HAWK_EREXRBRACK;
|
|
|
|
case HAWK_REX_ERBRACE: return HAWK_EREXRBRACE;
|
|
|
|
case HAWK_REX_ECOLON: return HAWK_EREXCOLON;
|
|
|
|
case HAWK_REX_ECRANGE: return HAWK_EREXCRANGE;
|
|
|
|
case HAWK_REX_ECCLASS: return HAWK_EREXCCLASS;
|
|
|
|
case HAWK_REX_EBOUND: return HAWK_EREXBOUND;
|
|
|
|
case HAWK_REX_ESPCAWP: return HAWK_EREXSPCAWP;
|
|
|
|
case HAWK_REX_EPREEND: return HAWK_EREXPREEND;
|
|
|
|
default: return HAWK_EINTERN;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
int hawk_buildrex (
|
|
|
|
hawk_t* awk, const hawk_ooch_t* ptn, hawk_oow_t len,
|
|
|
|
hawk_errnum_t* errnum, void** code, void** icode)
|
|
|
|
{
|
|
|
|
#if defined(USE_REX)
|
|
|
|
hawk_rex_errnum_t err;
|
|
|
|
void* p;
|
|
|
|
|
|
|
|
if (code || icode)
|
|
|
|
{
|
|
|
|
p = hawk_buildrex (
|
|
|
|
hawk_getmmgr(awk), awk->opt.depth.s.rex_build,
|
|
|
|
((awk->opt.trait & HAWK_REXBOUND)? 0: HAWK_REX_NOBOUND),
|
|
|
|
ptn, len, &err
|
|
|
|
);
|
|
|
|
if (p == HAWK_NULL)
|
|
|
|
{
|
|
|
|
*errnum = rexerr_to_errnum(err);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (code) *code = p;
|
|
|
|
if (icode) *icode = p;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
#else
|
|
|
|
hawk_tre_t* tre = HAWK_NULL;
|
|
|
|
hawk_tre_t* itre = HAWK_NULL;
|
|
|
|
int opt = HAWK_TRE_EXTENDED;
|
|
|
|
|
|
|
|
if (code)
|
|
|
|
{
|
|
|
|
tre = hawk_tre_open(awk, 0);
|
|
|
|
if (tre == HAWK_NULL)
|
|
|
|
{
|
|
|
|
*errnum = HAWK_ENOMEM;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(awk->opt.trait & HAWK_REXBOUND)) opt |= HAWK_TRE_NOBOUND;
|
|
|
|
|
|
|
|
if (hawk_tre_compx (tre, ptn, len, HAWK_NULL, opt) <= -1)
|
|
|
|
{
|
|
|
|
#if 0 /* TODO */
|
|
|
|
|
|
|
|
if (HAWK_TRE_ERRNUM(tre) == HAWK_TRE_ENOMEM) *errnum = HAWK_ENOMEM;
|
|
|
|
else
|
|
|
|
SETERR1 (awk, HAWK_EREXBL, str->ptr, str->len, loc);
|
|
|
|
#endif
|
|
|
|
*errnum = (HAWK_TRE_ERRNUM(tre) == HAWK_TRE_ENOMEM)?
|
|
|
|
HAWK_ENOMEM: HAWK_EREXBL;
|
|
|
|
hawk_tre_close (tre);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (icode)
|
|
|
|
{
|
|
|
|
itre = hawk_tre_open(awk, 0);
|
|
|
|
if (itre == HAWK_NULL)
|
|
|
|
{
|
|
|
|
if (tre) hawk_tre_close (tre);
|
|
|
|
*errnum = HAWK_ENOMEM;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ignorecase is a compile option for TRE */
|
|
|
|
if (hawk_tre_compx (itre, ptn, len, HAWK_NULL, opt | HAWK_TRE_IGNORECASE) <= -1)
|
|
|
|
{
|
|
|
|
#if 0 /* TODO */
|
|
|
|
|
|
|
|
if (HAWK_TRE_ERRNUM(tre) == HAWK_TRE_ENOMEM) *errnum = HAWK_ENOMEM;
|
|
|
|
else
|
|
|
|
SETERR1 (awk, HAWK_EREXBL, str->ptr, str->len, loc);
|
|
|
|
#endif
|
|
|
|
*errnum = (HAWK_TRE_ERRNUM(tre) == HAWK_TRE_ENOMEM)?
|
|
|
|
HAWK_ENOMEM: HAWK_EREXBL;
|
|
|
|
hawk_tre_close (itre);
|
|
|
|
if (tre) hawk_tre_close (tre);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (code) *code = tre;
|
|
|
|
if (icode) *icode = itre;
|
|
|
|
return 0;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
#if !defined(USE_REX)
|
|
|
|
static int matchtre (
|
|
|
|
hawk_t* awk, hawk_tre_t* tre, int opt,
|
|
|
|
const hawk_oocs_t* str, hawk_oocs_t* mat,
|
|
|
|
hawk_oocs_t submat[9], hawk_errnum_t* errnum)
|
|
|
|
{
|
|
|
|
int n;
|
|
|
|
/*hawk_tre_match_t match[10] = { { 0, 0 }, };*/
|
|
|
|
hawk_tre_match_t match[10];
|
|
|
|
|
|
|
|
HAWK_MEMSET (match, 0, HAWK_SIZEOF(match));
|
|
|
|
n = hawk_tre_execx(tre, str->ptr, str->len, match, HAWK_COUNTOF(match), opt);
|
|
|
|
if (n <= -1)
|
|
|
|
{
|
|
|
|
if (HAWK_TRE_ERRNUM(tre) == HAWK_TRE_ENOMATCH) return 0;
|
|
|
|
|
|
|
|
#if 0 /* TODO: */
|
|
|
|
*errnum = (HAWK_TRE_ERRNUM(tre) == HAWK_TRE_ENOMEM)?
|
|
|
|
HAWK_ENOMEM: HAWK_EREXMA;
|
|
|
|
SETERR0 (sed, errnum, loc);
|
|
|
|
#endif
|
|
|
|
*errnum = (HAWK_TRE_ERRNUM(tre) == HAWK_TRE_ENOMEM)?
|
|
|
|
HAWK_ENOMEM: HAWK_EREXMA;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
HAWK_ASSERT (awk, match[0].rm_so != -1);
|
|
|
|
if (mat)
|
|
|
|
{
|
|
|
|
mat->ptr = &str->ptr[match[0].rm_so];
|
|
|
|
mat->len = match[0].rm_eo - match[0].rm_so;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (submat)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* you must intialize submat before you pass into this
|
|
|
|
* function because it can abort filling */
|
|
|
|
for (i = 1; i < HAWK_COUNTOF(match); i++)
|
|
|
|
{
|
|
|
|
if (match[i].rm_so != -1)
|
|
|
|
{
|
|
|
|
submat[i-1].ptr = &str->ptr[match[i].rm_so];
|
|
|
|
submat[i-1].len = match[i].rm_eo - match[i].rm_so;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
int hawk_matchrex (
|
|
|
|
hawk_t* awk, void* code, int icase,
|
|
|
|
const hawk_oocs_t* str, const hawk_oocs_t* substr,
|
|
|
|
hawk_oocs_t* match, hawk_oocs_t submat[9], hawk_errnum_t* errnum)
|
|
|
|
{
|
|
|
|
#if defined(USE_REX)
|
|
|
|
int x;
|
|
|
|
hawk_rex_errnum_t err;
|
|
|
|
|
|
|
|
/* submatch is not supported */
|
|
|
|
x = hawk_matchrex (
|
|
|
|
hawk_getmmgr(awk), awk->opt.depth.s.rex_match, code,
|
|
|
|
(icase? HAWK_REX_IGNORECASE: 0), str, substr, match, &err);
|
|
|
|
if (x <= -1) *errnum = rexerr_to_errnum(err);
|
|
|
|
return x;
|
|
|
|
#else
|
|
|
|
int x;
|
|
|
|
int opt = HAWK_TRE_BACKTRACKING; /* TODO: option... HAWK_TRE_BACKTRACKING ??? */
|
|
|
|
|
|
|
|
x = matchtre (
|
|
|
|
awk, code,
|
|
|
|
((str->ptr == substr->ptr)? opt: (opt | HAWK_TRE_NOTBOL)),
|
|
|
|
substr, match, submat, errnum
|
|
|
|
);
|
|
|
|
return x;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
void hawk_freerex (hawk_t* awk, void* code, void* icode)
|
|
|
|
{
|
|
|
|
if (code)
|
|
|
|
{
|
|
|
|
#if defined(USE_REX)
|
|
|
|
hawk_freerex ((awk)->mmgr, code);
|
|
|
|
#else
|
|
|
|
hawk_tre_close (code);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
if (icode && icode != code)
|
|
|
|
{
|
|
|
|
#if defined(USE_REX)
|
|
|
|
hawk_freerex ((awk)->mmgr, icode);
|
|
|
|
#else
|
|
|
|
hawk_tre_close (icode);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int hawk_rtx_matchrex (
|
|
|
|
hawk_rtx_t* rtx, hawk_val_t* val,
|
|
|
|
const hawk_oocs_t* str, const hawk_oocs_t* substr, hawk_oocs_t* match, hawk_oocs_t submat[9])
|
|
|
|
{
|
|
|
|
void* code;
|
|
|
|
int icase, x;
|
|
|
|
hawk_errnum_t awkerr;
|
|
|
|
#if defined(USE_REX)
|
|
|
|
hawk_rex_errnum_t rexerr;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
icase = rtx->gbl.ignorecase;
|
|
|
|
|
|
|
|
if (HAWK_RTX_GETVALTYPE (rtx, val) == HAWK_VAL_REX)
|
|
|
|
{
|
|
|
|
code = ((hawk_val_rex_t*)val)->code[icase];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* convert to a string and build a regular expression */
|
|
|
|
hawk_oocs_t tmp;
|
|
|
|
|
|
|
|
tmp.ptr = hawk_rtx_getvaloocstr (rtx, val, &tmp.len);
|
|
|
|
if (tmp.ptr == HAWK_NULL) return -1;
|
|
|
|
|
|
|
|
x = icase? hawk_buildrex(rtx->awk, tmp.ptr, tmp.len, &awkerr, HAWK_NULL, &code):
|
|
|
|
hawk_buildrex(rtx->awk, tmp.ptr, tmp.len, &awkerr, &code, HAWK_NULL);
|
|
|
|
hawk_rtx_freevaloocstr (rtx, val, tmp.ptr);
|
|
|
|
if (x <= -1)
|
|
|
|
{
|
|
|
|
hawk_rtx_seterrnum (rtx, awkerr, HAWK_NULL);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#if defined(USE_REX)
|
|
|
|
/* submatch not supported */
|
|
|
|
x = hawk_matchrex (
|
|
|
|
hawk_rtx_getmmgr(rtx), rtx->awk->opt.depth.s.rex_match,
|
|
|
|
code, (icase? HAWK_REX_IGNORECASE: 0),
|
|
|
|
str, substr, match, &rexerr);
|
|
|
|
if (x <= -1) hawk_rtx_seterrnum (rtx, rexerr_to_errnum(rexerr), HAWK_NULL);
|
|
|
|
#else
|
|
|
|
x = matchtre (
|
|
|
|
rtx->awk, code,
|
|
|
|
((str->ptr == substr->ptr)? HAWK_TRE_BACKTRACKING: (HAWK_TRE_BACKTRACKING | HAWK_TRE_NOTBOL)),
|
|
|
|
substr, match, submat, &awkerr
|
|
|
|
);
|
|
|
|
if (x <= -1) hawk_rtx_seterrnum (rtx, awkerr, HAWK_NULL);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (HAWK_RTX_GETVALTYPE(rtx, val) == HAWK_VAL_REX)
|
|
|
|
{
|
|
|
|
/* nothing to free */
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (icase)
|
|
|
|
hawk_freerex (rtx->awk, HAWK_NULL, code);
|
|
|
|
else
|
|
|
|
hawk_freerex (rtx->awk, code, HAWK_NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return x;
|
|
|
|
}
|