2006-07-17 06:21:39 +00:00
|
|
|
/*
|
2006-07-21 05:05:03 +00:00
|
|
|
* $Id: rex.c,v 1.8 2006-07-21 05:05:03 bacon Exp $
|
2006-07-17 06:21:39 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <xp/awk/awk_i.h>
|
|
|
|
|
|
|
|
#ifndef XP_AWK_STAND_ALONE
|
|
|
|
#include <xp/bas/memory.h>
|
2006-07-20 03:41:00 +00:00
|
|
|
#include <xp/bas/string.h>
|
2006-07-17 06:21:39 +00:00
|
|
|
#include <xp/bas/assert.h>
|
|
|
|
#endif
|
|
|
|
|
2006-07-20 03:41:00 +00:00
|
|
|
enum
|
|
|
|
{
|
2006-07-21 05:05:03 +00:00
|
|
|
CT_EOF,
|
|
|
|
CT_SPECIAL,
|
|
|
|
CT_NORMAL
|
2006-07-20 03:41:00 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
enum
|
|
|
|
{
|
2006-07-21 05:05:03 +00:00
|
|
|
LEVEL_TOP,
|
|
|
|
LEVEL_CHARSET,
|
|
|
|
LEVEL_RANGE,
|
2006-07-20 03:41:00 +00:00
|
|
|
};
|
|
|
|
|
2006-07-17 14:27:09 +00:00
|
|
|
enum
|
|
|
|
{
|
2006-07-20 16:21:54 +00:00
|
|
|
CMD_BOL,
|
|
|
|
CMD_EOL,
|
2006-07-17 14:27:09 +00:00
|
|
|
CMD_ANY_CHAR,
|
2006-07-20 16:21:54 +00:00
|
|
|
CMD_ORD_CHAR,
|
2006-07-21 05:05:03 +00:00
|
|
|
CMD_CHARSET,
|
2006-07-20 16:21:54 +00:00
|
|
|
CMD_GROUP
|
2006-07-17 14:27:09 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
enum
|
|
|
|
{
|
2006-07-21 05:05:03 +00:00
|
|
|
CHARSET_ONE,
|
|
|
|
CHARSET_RANGE,
|
|
|
|
CHARSET_CLASS
|
|
|
|
};
|
|
|
|
|
|
|
|
enum
|
|
|
|
{
|
|
|
|
CHARSET_CLASS_PUNCT,
|
|
|
|
CHARSET_CLASS_SPACE,
|
|
|
|
CHARSET_CLASS_DIGIT,
|
|
|
|
CHARSET_CLASS_ALNUM
|
2006-07-17 14:27:09 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
#define PC_CMD(rex,base) (rex)->code[(base)].dc.cmd
|
|
|
|
#define PC_BFLAG(rex,base) (rex)->code[(base)].dc.bflag
|
|
|
|
#define PC_LBOUND(rex,base) (rex)->code[(base)].dc.lbound
|
|
|
|
#define PC_UBOUND(rex,base) (rex)->code[(base)].dc.ubound
|
|
|
|
#define PC_VALUE(rex,base) (rex)->code[(base)].cc
|
|
|
|
|
2006-07-19 11:45:24 +00:00
|
|
|
#define BOUND_MIN 0
|
|
|
|
#define BOUND_MAX (XP_TYPE_MAX(xp_size_t))
|
|
|
|
|
|
|
|
struct __code
|
|
|
|
{
|
2006-07-20 16:21:54 +00:00
|
|
|
//xp_byte_t cmd;
|
2006-07-21 05:05:03 +00:00
|
|
|
short cmd;
|
|
|
|
short negate; /* only for CMD_CHARSET */
|
2006-07-19 11:45:24 +00:00
|
|
|
xp_size_t lbound;
|
|
|
|
xp_size_t ubound;
|
|
|
|
};
|
|
|
|
|
2006-07-21 05:05:03 +00:00
|
|
|
#define NCHARS_REMAINING(rex) ((rex)->ptn.end - (rex)->ptn.curp)
|
|
|
|
|
2006-07-20 03:41:00 +00:00
|
|
|
#define NEXT_CHAR(rex,level) \
|
|
|
|
do { if (__next_char(rex,level) == -1) return -1; } while (0)
|
2006-07-19 11:45:24 +00:00
|
|
|
|
|
|
|
#define ADD_CODE(rex,data,len) \
|
|
|
|
do { if (__add_code(rex,data,len) == -1) return -1; } while (0)
|
|
|
|
|
|
|
|
static int __compile_expression (xp_awk_rex_t* rex);
|
|
|
|
static int __compile_branch (xp_awk_rex_t* rex);
|
|
|
|
static int __compile_atom (xp_awk_rex_t* rex);
|
2006-07-21 05:05:03 +00:00
|
|
|
static int __compile_charset (xp_awk_rex_t* rex, struct __code* cmd);
|
2006-07-20 16:21:54 +00:00
|
|
|
static int __compile_bound (xp_awk_rex_t* rex, struct __code* cmd);
|
|
|
|
static int __compile_range (xp_awk_rex_t* rex, struct __code* cmd);
|
2006-07-20 03:41:00 +00:00
|
|
|
static int __next_char (xp_awk_rex_t* rex, int level);
|
2006-07-19 11:45:24 +00:00
|
|
|
static int __add_code (xp_awk_rex_t* rex, void* data, xp_size_t len);
|
|
|
|
|
2006-07-20 16:21:54 +00:00
|
|
|
static const xp_byte_t* __print_expression (const xp_byte_t* p);
|
|
|
|
static const xp_byte_t* __print_branch (const xp_byte_t* p);
|
|
|
|
static const xp_byte_t* __print_atom (const xp_byte_t* p);
|
|
|
|
|
2006-07-17 06:21:39 +00:00
|
|
|
xp_awk_rex_t* xp_awk_rex_open (xp_awk_rex_t* rex)
|
|
|
|
{
|
|
|
|
if (rex == XP_NULL)
|
|
|
|
{
|
|
|
|
rex = (xp_awk_rex_t*) xp_malloc (xp_sizeof(xp_awk_rex_t));
|
|
|
|
if (rex == XP_NULL) return XP_NULL;
|
|
|
|
rex->__dynamic = xp_true;
|
|
|
|
}
|
|
|
|
else rex->__dynamic = xp_false;
|
|
|
|
|
2006-07-19 11:45:24 +00:00
|
|
|
rex->code.capa = 512;
|
|
|
|
rex->code.size = 0;
|
|
|
|
rex->code.buf = (xp_byte_t*) xp_malloc (rex->code.capa);
|
|
|
|
if (rex->code.buf == XP_NULL)
|
|
|
|
{
|
|
|
|
if (rex->__dynamic) xp_free (rex);
|
|
|
|
return XP_NULL;
|
|
|
|
}
|
|
|
|
|
2006-07-17 06:21:39 +00:00
|
|
|
return rex;
|
|
|
|
}
|
|
|
|
|
|
|
|
void xp_awk_rex_close (xp_awk_rex_t* rex)
|
|
|
|
{
|
2006-07-19 11:45:24 +00:00
|
|
|
xp_free (rex->code.buf);
|
2006-07-17 06:21:39 +00:00
|
|
|
if (rex->__dynamic) xp_free (rex);
|
|
|
|
}
|
|
|
|
|
2006-07-19 11:45:24 +00:00
|
|
|
int xp_awk_rex_compile (xp_awk_rex_t* rex, const xp_char_t* ptn, xp_size_t len)
|
2006-07-17 06:21:39 +00:00
|
|
|
{
|
2006-07-19 11:45:24 +00:00
|
|
|
rex->ptn.ptr = ptn;
|
|
|
|
rex->ptn.end = rex->ptn.ptr + len;
|
|
|
|
rex->ptn.curp = rex->ptn.ptr;
|
2006-07-17 06:21:39 +00:00
|
|
|
|
2006-07-21 05:05:03 +00:00
|
|
|
rex->ptn.curc.type = CT_EOF;
|
2006-07-20 03:41:00 +00:00
|
|
|
rex->ptn.curc.value = XP_T('\0');
|
2006-07-18 15:28:26 +00:00
|
|
|
|
2006-07-20 03:41:00 +00:00
|
|
|
rex->code.size = 0;
|
2006-07-17 06:21:39 +00:00
|
|
|
|
2006-07-21 05:05:03 +00:00
|
|
|
NEXT_CHAR (rex, LEVEL_TOP);
|
2006-07-19 11:45:24 +00:00
|
|
|
if (__compile_expression (rex) == -1)
|
|
|
|
{
|
|
|
|
/* TODO: clear expression */
|
|
|
|
xp_printf (XP_T("fuck ........ \n"));
|
|
|
|
return -1;
|
|
|
|
}
|
2006-07-17 06:21:39 +00:00
|
|
|
|
2006-07-21 05:05:03 +00:00
|
|
|
if (rex->ptn.curc.type != CT_EOF)
|
2006-07-19 11:45:24 +00:00
|
|
|
{
|
|
|
|
/* TODO: error handling */
|
|
|
|
/* garbage after the expression */
|
|
|
|
xp_printf (XP_T("garbage after expression\n"));
|
|
|
|
return -1;
|
2006-07-17 06:21:39 +00:00
|
|
|
}
|
|
|
|
|
2006-07-21 05:05:03 +00:00
|
|
|
xp_printf (XP_T("code.capa = %u\n"), (unsigned int)rex->code.capa);
|
|
|
|
xp_printf (XP_T("code.size = %u\n"), (unsigned int)rex->code.size);
|
2006-07-19 11:45:24 +00:00
|
|
|
return 0;
|
2006-07-17 06:21:39 +00:00
|
|
|
}
|
2006-07-18 15:28:26 +00:00
|
|
|
|
2006-07-19 11:45:24 +00:00
|
|
|
static int __compile_expression (xp_awk_rex_t* rex)
|
2006-07-18 15:28:26 +00:00
|
|
|
{
|
2006-07-19 15:58:01 +00:00
|
|
|
xp_size_t zero = 0;
|
2006-07-20 16:21:54 +00:00
|
|
|
xp_size_t* nb, * el;
|
|
|
|
xp_size_t old_size;
|
2006-07-20 03:41:00 +00:00
|
|
|
int n;
|
2006-07-19 15:58:01 +00:00
|
|
|
|
2006-07-20 16:21:54 +00:00
|
|
|
old_size = rex->code.size;
|
|
|
|
|
2006-07-20 03:41:00 +00:00
|
|
|
/* secure space for header and set the header fields to zero */
|
2006-07-19 15:58:01 +00:00
|
|
|
nb = (xp_size_t*)&rex->code.buf[rex->code.size];
|
|
|
|
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
|
|
|
|
|
|
|
el = (xp_size_t*)&rex->code.buf[rex->code.size];
|
|
|
|
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
|
|
|
|
2006-07-20 03:41:00 +00:00
|
|
|
/* handle the first branch */
|
|
|
|
n = __compile_branch (rex);
|
|
|
|
if (n == -1) return -1;
|
|
|
|
if (n == 0)
|
|
|
|
{
|
|
|
|
/* TODO: what if the expression starts with a vertical bar??? */
|
|
|
|
return 0;
|
|
|
|
}
|
2006-07-18 15:28:26 +00:00
|
|
|
|
2006-07-19 15:58:01 +00:00
|
|
|
(*nb) += 1;
|
|
|
|
|
2006-07-20 03:41:00 +00:00
|
|
|
/* handle subsequent branches if any */
|
2006-07-21 05:05:03 +00:00
|
|
|
while (rex->ptn.curc.type == CT_SPECIAL &&
|
2006-07-20 03:41:00 +00:00
|
|
|
rex->ptn.curc.value == XP_T('|'))
|
2006-07-18 15:28:26 +00:00
|
|
|
{
|
2006-07-21 05:05:03 +00:00
|
|
|
NEXT_CHAR (rex, LEVEL_TOP);
|
2006-07-18 15:28:26 +00:00
|
|
|
|
2006-07-20 03:41:00 +00:00
|
|
|
n = __compile_branch(rex);
|
|
|
|
if (n == -1) return -1;
|
|
|
|
if (n == 0)
|
|
|
|
{
|
|
|
|
/* if the pattern ends with a vertical bar(|),
|
|
|
|
* this block can be reached. however, the use
|
|
|
|
* of such an expression is highly discouraged */
|
|
|
|
|
|
|
|
/* TODO: should it return an error???? */
|
|
|
|
break;
|
|
|
|
}
|
2006-07-18 15:28:26 +00:00
|
|
|
|
2006-07-19 15:58:01 +00:00
|
|
|
(*nb) += 1;
|
2006-07-18 15:28:26 +00:00
|
|
|
}
|
2006-07-19 11:45:24 +00:00
|
|
|
|
2006-07-20 16:21:54 +00:00
|
|
|
*el = rex->code.size - old_size;
|
2006-07-20 03:41:00 +00:00
|
|
|
return 1;
|
2006-07-18 15:28:26 +00:00
|
|
|
}
|
|
|
|
|
2006-07-19 11:45:24 +00:00
|
|
|
static int __compile_branch (xp_awk_rex_t* rex)
|
2006-07-18 15:28:26 +00:00
|
|
|
{
|
2006-07-19 11:45:24 +00:00
|
|
|
int n;
|
2006-07-20 16:21:54 +00:00
|
|
|
xp_size_t* na, * bl;
|
2006-07-19 15:58:01 +00:00
|
|
|
xp_size_t old_size;
|
2006-07-20 03:41:00 +00:00
|
|
|
xp_size_t zero = 0;
|
2006-07-20 16:21:54 +00:00
|
|
|
struct __code* cmd;
|
2006-07-19 15:58:01 +00:00
|
|
|
|
|
|
|
old_size = rex->code.size;
|
|
|
|
|
2006-07-20 16:21:54 +00:00
|
|
|
na = (xp_size_t*)&rex->code.buf[rex->code.size];
|
|
|
|
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
|
|
|
|
2006-07-19 15:58:01 +00:00
|
|
|
bl = (xp_size_t*)&rex->code.buf[rex->code.size];
|
2006-07-20 03:41:00 +00:00
|
|
|
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
2006-07-18 15:28:26 +00:00
|
|
|
|
2006-07-20 03:41:00 +00:00
|
|
|
while (1)
|
2006-07-18 15:28:26 +00:00
|
|
|
{
|
2006-07-20 16:21:54 +00:00
|
|
|
cmd = (struct __code*)&rex->code.buf[rex->code.size];
|
|
|
|
|
2006-07-19 11:45:24 +00:00
|
|
|
n = __compile_atom (rex);
|
2006-07-19 15:58:01 +00:00
|
|
|
if (n == -1)
|
|
|
|
{
|
|
|
|
rex->code.size = old_size;
|
|
|
|
return -1;
|
|
|
|
}
|
2006-07-18 15:28:26 +00:00
|
|
|
|
2006-07-20 03:41:00 +00:00
|
|
|
if (n == 0) break; /* no atom */
|
2006-07-19 11:45:24 +00:00
|
|
|
|
2006-07-20 16:21:54 +00:00
|
|
|
n = __compile_bound (rex, cmd);
|
2006-07-20 03:41:00 +00:00
|
|
|
if (n == -1)
|
2006-07-18 15:28:26 +00:00
|
|
|
{
|
2006-07-20 03:41:00 +00:00
|
|
|
rex->code.size = old_size;
|
|
|
|
return -1;
|
2006-07-18 15:28:26 +00:00
|
|
|
}
|
2006-07-20 03:41:00 +00:00
|
|
|
|
|
|
|
/* n == 0 no bound character. just continue */
|
|
|
|
/* n == 1 bound has been applied by compile_bound */
|
2006-07-20 16:21:54 +00:00
|
|
|
|
|
|
|
(*na) += 1;
|
2006-07-19 11:45:24 +00:00
|
|
|
}
|
|
|
|
|
2006-07-20 16:21:54 +00:00
|
|
|
*bl = rex->code.size - old_size;
|
|
|
|
return ((*na) == 0)? 0: 1;
|
2006-07-19 11:45:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int __compile_atom (xp_awk_rex_t* rex)
|
|
|
|
{
|
2006-07-20 16:21:54 +00:00
|
|
|
int n;
|
2006-07-21 05:05:03 +00:00
|
|
|
struct __code tmp;
|
2006-07-19 11:45:24 +00:00
|
|
|
|
2006-07-21 05:05:03 +00:00
|
|
|
if (rex->ptn.curc.type == CT_EOF) return 0;
|
2006-07-20 16:21:54 +00:00
|
|
|
|
2006-07-21 05:05:03 +00:00
|
|
|
if (rex->ptn.curc.type == CT_SPECIAL)
|
2006-07-19 11:45:24 +00:00
|
|
|
{
|
2006-07-20 03:41:00 +00:00
|
|
|
if (rex->ptn.curc.value == XP_T('('))
|
|
|
|
{
|
2006-07-20 16:21:54 +00:00
|
|
|
tmp.cmd = CMD_GROUP;
|
2006-07-21 05:05:03 +00:00
|
|
|
tmp.negate = 0;
|
2006-07-20 16:21:54 +00:00
|
|
|
tmp.lbound = 1;
|
|
|
|
tmp.ubound = 1;
|
|
|
|
ADD_CODE (rex, &tmp, xp_sizeof(tmp));
|
2006-07-21 05:05:03 +00:00
|
|
|
|
|
|
|
NEXT_CHAR (rex, LEVEL_TOP);
|
2006-07-20 16:21:54 +00:00
|
|
|
|
2006-07-20 03:41:00 +00:00
|
|
|
n = __compile_expression (rex);
|
|
|
|
if (n == -1) return -1;
|
|
|
|
|
2006-07-21 05:05:03 +00:00
|
|
|
if (rex->ptn.curc.type != CT_SPECIAL ||
|
2006-07-20 03:41:00 +00:00
|
|
|
rex->ptn.curc.value != XP_T(')'))
|
|
|
|
{
|
|
|
|
// rex->errnum = XP_AWK_REX_ERPAREN;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (rex->ptn.curc.value == XP_T('^'))
|
2006-07-19 11:45:24 +00:00
|
|
|
{
|
2006-07-20 16:21:54 +00:00
|
|
|
tmp.cmd = CMD_BOL;
|
2006-07-21 05:05:03 +00:00
|
|
|
tmp.negate = 0;
|
2006-07-19 11:45:24 +00:00
|
|
|
tmp.lbound = 1;
|
|
|
|
tmp.ubound = 1;
|
|
|
|
ADD_CODE (rex, &tmp, xp_sizeof(tmp));
|
|
|
|
}
|
2006-07-20 03:41:00 +00:00
|
|
|
else if (rex->ptn.curc.value == XP_T('$'))
|
2006-07-19 11:45:24 +00:00
|
|
|
{
|
2006-07-20 16:21:54 +00:00
|
|
|
tmp.cmd = CMD_EOL;
|
2006-07-21 05:05:03 +00:00
|
|
|
tmp.negate = 0;
|
2006-07-19 11:45:24 +00:00
|
|
|
tmp.lbound = 1;
|
|
|
|
tmp.ubound = 1;
|
|
|
|
ADD_CODE (rex, &tmp, xp_sizeof(tmp));
|
|
|
|
}
|
2006-07-20 03:41:00 +00:00
|
|
|
else if (rex->ptn.curc.value == XP_T('.'))
|
2006-07-18 15:28:26 +00:00
|
|
|
{
|
2006-07-19 11:45:24 +00:00
|
|
|
tmp.cmd = CMD_ANY_CHAR;
|
2006-07-21 05:05:03 +00:00
|
|
|
tmp.negate = 0;
|
2006-07-19 11:45:24 +00:00
|
|
|
tmp.lbound = 1;
|
|
|
|
tmp.ubound = 1;
|
|
|
|
ADD_CODE (rex, &tmp, xp_sizeof(tmp));
|
2006-07-18 15:28:26 +00:00
|
|
|
}
|
2006-07-20 03:41:00 +00:00
|
|
|
else if (rex->ptn.curc.value == XP_T('['))
|
2006-07-18 15:28:26 +00:00
|
|
|
{
|
2006-07-21 05:05:03 +00:00
|
|
|
struct __code* cmd;
|
|
|
|
|
|
|
|
cmd = (struct __code*)&rex->code.buf[rex->code.size];
|
|
|
|
|
|
|
|
tmp.cmd = CMD_CHARSET;
|
|
|
|
tmp.negate = 0;
|
|
|
|
tmp.lbound = 1;
|
|
|
|
tmp.ubound = 1;
|
|
|
|
ADD_CODE (rex, &tmp, xp_sizeof(tmp));
|
|
|
|
|
|
|
|
NEXT_CHAR (rex, LEVEL_CHARSET);
|
|
|
|
|
|
|
|
n = __compile_charset (rex, cmd);
|
|
|
|
if (n == -1) return -1;
|
|
|
|
|
|
|
|
xp_assert (n != 0);
|
|
|
|
|
|
|
|
if (rex->ptn.curc.type != CT_SPECIAL ||
|
|
|
|
rex->ptn.curc.value != XP_T(']'))
|
|
|
|
{
|
|
|
|
// TODO
|
|
|
|
/*rex->errnum = XP_AWK_REX_ERBRACKET;*/
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2006-07-19 11:45:24 +00:00
|
|
|
}
|
2006-07-20 16:21:54 +00:00
|
|
|
else return 0;
|
2006-07-19 11:45:24 +00:00
|
|
|
|
2006-07-21 05:05:03 +00:00
|
|
|
NEXT_CHAR (rex, LEVEL_TOP);
|
2006-07-20 03:41:00 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2006-07-21 05:05:03 +00:00
|
|
|
xp_assert (rex->ptn.curc.type == CT_NORMAL);
|
2006-07-20 16:21:54 +00:00
|
|
|
|
2006-07-20 03:41:00 +00:00
|
|
|
tmp.cmd = CMD_ORD_CHAR;
|
2006-07-21 05:05:03 +00:00
|
|
|
tmp.negate = 0;
|
2006-07-20 03:41:00 +00:00
|
|
|
tmp.lbound = 1;
|
|
|
|
tmp.ubound = 1;
|
|
|
|
ADD_CODE (rex, &tmp, xp_sizeof(tmp));
|
2006-07-21 05:05:03 +00:00
|
|
|
|
2006-07-20 16:21:54 +00:00
|
|
|
ADD_CODE (rex, &rex->ptn.curc.value, xp_sizeof(rex->ptn.curc.value));
|
2006-07-21 05:05:03 +00:00
|
|
|
NEXT_CHAR (rex, LEVEL_TOP);
|
2006-07-19 11:45:24 +00:00
|
|
|
|
2006-07-20 03:41:00 +00:00
|
|
|
return 1;
|
2006-07-18 15:28:26 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-07-21 05:05:03 +00:00
|
|
|
static int __compile_charset (xp_awk_rex_t* rex, struct __code* cmd)
|
2006-07-18 15:28:26 +00:00
|
|
|
{
|
2006-07-21 05:05:03 +00:00
|
|
|
xp_size_t zero = 0;
|
|
|
|
xp_size_t* csc, * csl;
|
|
|
|
xp_size_t old_size;
|
|
|
|
|
|
|
|
old_size = rex->code.size;
|
|
|
|
|
|
|
|
csc = (xp_size_t*)&rex->code.buf[rex->code.size];
|
|
|
|
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
|
|
|
csl = (xp_size_t*)&rex->code.buf[rex->code.size];
|
|
|
|
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
|
|
|
|
|
|
|
if (rex->ptn.curc.type == CT_NORMAL &&
|
|
|
|
rex->ptn.curc.value == XP_T('^'))
|
|
|
|
{
|
|
|
|
cmd->negate = 1;
|
|
|
|
NEXT_CHAR (rex, LEVEL_CHARSET);
|
|
|
|
}
|
|
|
|
|
|
|
|
while (rex->ptn.curc.type == CT_NORMAL)
|
|
|
|
{
|
|
|
|
xp_char_t c0, c1, c2;
|
|
|
|
|
|
|
|
c1 = rex->ptn.curc.value;
|
|
|
|
NEXT_CHAR(rex, LEVEL_CHARSET);
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
if (c1 == XP_T('[') &&
|
|
|
|
rex->ptn.curc.type == CT_NORMAL &&
|
|
|
|
rex->ptn.curc.value == XP_T(':'))
|
|
|
|
{
|
|
|
|
/* beginning of character class */
|
|
|
|
|
|
|
|
/* change c1 */
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
c2 = c1;
|
|
|
|
if (rex->ptn.curc.type == CT_NORMAL &&
|
|
|
|
rex->ptn.curc.value == XP_T('-'))
|
|
|
|
{
|
|
|
|
NEXT_CHAR (rex, LEVEL_CHARSET);
|
|
|
|
|
|
|
|
if (rex->ptn.curc.type == CT_NORMAL)
|
|
|
|
{
|
|
|
|
c2 = rex->ptn.curc.value;
|
|
|
|
NEXT_CHAR(rex, LEVEL_CHARSET);
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
if (c2 == XP_T('[') &&
|
|
|
|
rex->ptn.curc.type == CT_NORMAL &&
|
|
|
|
rex->ptn.curc.value == XP_T(':'))
|
|
|
|
{
|
|
|
|
/* beginning of character class */
|
|
|
|
/* change c2 */
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (c1 == c2)
|
|
|
|
{
|
|
|
|
c0 = CHARSET_ONE;
|
|
|
|
ADD_CODE (rex, &c0, xp_sizeof(c0));
|
|
|
|
ADD_CODE (rex, &c1, xp_sizeof(c1));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
c0 = CHARSET_RANGE;
|
|
|
|
ADD_CODE (rex, &c0, xp_sizeof(c0));
|
|
|
|
ADD_CODE (rex, &c1, xp_sizeof(c1));
|
|
|
|
ADD_CODE (rex, &c2, xp_sizeof(c2));
|
|
|
|
}
|
|
|
|
|
|
|
|
(*csc) += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
*csl = rex->code.size - old_size;
|
|
|
|
return 1;
|
2006-07-19 11:45:24 +00:00
|
|
|
}
|
2006-07-18 15:28:26 +00:00
|
|
|
|
2006-07-20 16:21:54 +00:00
|
|
|
static int __compile_bound (xp_awk_rex_t* rex, struct __code* cmd)
|
2006-07-20 03:41:00 +00:00
|
|
|
{
|
2006-07-21 05:05:03 +00:00
|
|
|
if (rex->ptn.curc.type != CT_SPECIAL) return 0;
|
2006-07-20 03:41:00 +00:00
|
|
|
|
|
|
|
switch (rex->ptn.curc.value)
|
|
|
|
{
|
|
|
|
case XP_T('+'):
|
|
|
|
{
|
2006-07-20 16:21:54 +00:00
|
|
|
cmd->lbound = 1;
|
|
|
|
cmd->ubound = BOUND_MAX;
|
2006-07-21 05:05:03 +00:00
|
|
|
NEXT_CHAR(rex, LEVEL_TOP);
|
2006-07-20 03:41:00 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
case XP_T('*'):
|
|
|
|
{
|
2006-07-20 16:21:54 +00:00
|
|
|
cmd->lbound = 0;
|
|
|
|
cmd->ubound = BOUND_MAX;
|
2006-07-21 05:05:03 +00:00
|
|
|
NEXT_CHAR(rex, LEVEL_TOP);
|
2006-07-20 03:41:00 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
case XP_T('?'):
|
|
|
|
{
|
2006-07-20 16:21:54 +00:00
|
|
|
cmd->lbound = 0;
|
|
|
|
cmd->ubound = 1;
|
2006-07-21 05:05:03 +00:00
|
|
|
NEXT_CHAR(rex, LEVEL_TOP);
|
2006-07-20 03:41:00 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
case XP_T('{'):
|
|
|
|
{
|
2006-07-21 05:05:03 +00:00
|
|
|
NEXT_CHAR (rex, LEVEL_RANGE);
|
2006-07-20 16:21:54 +00:00
|
|
|
|
|
|
|
if (__compile_range(rex, cmd) == -1) return -1;
|
|
|
|
|
2006-07-21 05:05:03 +00:00
|
|
|
if (rex->ptn.curc.type != CT_SPECIAL ||
|
2006-07-20 16:21:54 +00:00
|
|
|
rex->ptn.curc.value != XP_T('}'))
|
|
|
|
{
|
|
|
|
// rex->errnum = XP_AWK_REX_ERBRACE
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2006-07-21 05:05:03 +00:00
|
|
|
NEXT_CHAR (rex, LEVEL_TOP);
|
2006-07-20 03:41:00 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-07-20 16:21:54 +00:00
|
|
|
static int __compile_range (xp_awk_rex_t* rex, struct __code* cmd)
|
2006-07-19 11:45:24 +00:00
|
|
|
{
|
2006-07-20 16:21:54 +00:00
|
|
|
xp_size_t bound;
|
|
|
|
|
|
|
|
// TODO: should allow white spaces in the range???
|
|
|
|
// what if it is not in the raight format? convert it to ordinary characters??
|
|
|
|
bound = 0;
|
2006-07-21 05:05:03 +00:00
|
|
|
while (rex->ptn.curc.type == CT_NORMAL &&
|
2006-07-20 16:21:54 +00:00
|
|
|
xp_isdigit(rex->ptn.curc.value))
|
|
|
|
{
|
|
|
|
bound = bound * 10 + rex->ptn.curc.value - XP_T('0');
|
2006-07-21 05:05:03 +00:00
|
|
|
NEXT_CHAR (rex, LEVEL_RANGE);
|
2006-07-20 16:21:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
cmd->lbound = bound;
|
|
|
|
|
2006-07-21 05:05:03 +00:00
|
|
|
if (rex->ptn.curc.type == CT_SPECIAL &&
|
2006-07-20 16:21:54 +00:00
|
|
|
rex->ptn.curc.value == XP_T(','))
|
|
|
|
{
|
2006-07-21 05:05:03 +00:00
|
|
|
NEXT_CHAR (rex, LEVEL_RANGE);
|
2006-07-20 16:21:54 +00:00
|
|
|
|
|
|
|
bound = 0;
|
2006-07-21 05:05:03 +00:00
|
|
|
while (rex->ptn.curc.type == CT_NORMAL &&
|
2006-07-20 16:21:54 +00:00
|
|
|
xp_isdigit(rex->ptn.curc.value))
|
|
|
|
{
|
|
|
|
bound = bound * 10 + rex->ptn.curc.value - XP_T('0');
|
2006-07-21 05:05:03 +00:00
|
|
|
NEXT_CHAR (rex, LEVEL_RANGE);
|
2006-07-20 16:21:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
cmd->ubound = bound;
|
|
|
|
}
|
|
|
|
else cmd->ubound = BOUND_MAX;
|
|
|
|
|
|
|
|
return 0;
|
2006-07-19 11:45:24 +00:00
|
|
|
}
|
2006-07-18 15:28:26 +00:00
|
|
|
|
2006-07-20 03:41:00 +00:00
|
|
|
static int __next_char (xp_awk_rex_t* rex, int level)
|
2006-07-19 11:45:24 +00:00
|
|
|
{
|
2006-07-20 03:41:00 +00:00
|
|
|
if (rex->ptn.curp >= rex->ptn.end)
|
2006-07-18 15:28:26 +00:00
|
|
|
{
|
2006-07-21 05:05:03 +00:00
|
|
|
rex->ptn.curc.type = CT_EOF;
|
2006-07-20 03:41:00 +00:00
|
|
|
rex->ptn.curc.value = XP_T('\0');
|
|
|
|
return 0;
|
2006-07-18 15:28:26 +00:00
|
|
|
}
|
2006-07-19 11:45:24 +00:00
|
|
|
|
2006-07-21 05:05:03 +00:00
|
|
|
rex->ptn.curc.type = CT_NORMAL;
|
2006-07-20 03:41:00 +00:00
|
|
|
rex->ptn.curc.value = *rex->ptn.curp++;
|
|
|
|
|
|
|
|
xp_printf (XP_T("[%c]\n"), rex->ptn.curc.value);
|
|
|
|
if (rex->ptn.curc.value == XP_T('\\'))
|
2006-07-19 11:45:24 +00:00
|
|
|
{
|
|
|
|
if (rex->ptn.curp >= rex->ptn.end)
|
|
|
|
{
|
|
|
|
/* unexpected end of expression */
|
|
|
|
//rex->errnum = XP_AWK_REX_EEND;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2006-07-20 03:41:00 +00:00
|
|
|
rex->ptn.curc.value = *rex->ptn.curp++;
|
2006-07-19 11:45:24 +00:00
|
|
|
|
2006-07-20 03:41:00 +00:00
|
|
|
/* TODO: need this? */
|
|
|
|
/*
|
|
|
|
if (rex->ptn.curc.value == XP_T('n')) rex->ptn.curc = XP_T('\n');
|
|
|
|
else if (rex->ptn.curc.value == XP_T('r')) rex->ptn.curc = XP_T('\r');
|
|
|
|
else if (rex->ptn.curc.value == XP_T('t')) rex->ptn.curc = XP_T('\t');
|
|
|
|
*/
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2006-07-21 05:05:03 +00:00
|
|
|
if (level == LEVEL_TOP)
|
2006-07-20 03:41:00 +00:00
|
|
|
{
|
|
|
|
if (rex->ptn.curc.value == XP_T('[') ||
|
|
|
|
rex->ptn.curc.value == XP_T('|') ||
|
|
|
|
rex->ptn.curc.value == XP_T('^') ||
|
|
|
|
rex->ptn.curc.value == XP_T('$') ||
|
|
|
|
rex->ptn.curc.value == XP_T('{') ||
|
|
|
|
rex->ptn.curc.value == XP_T('+') ||
|
|
|
|
rex->ptn.curc.value == XP_T('?') ||
|
|
|
|
rex->ptn.curc.value == XP_T('*') ||
|
|
|
|
rex->ptn.curc.value == XP_T('.') ||
|
|
|
|
rex->ptn.curc.value == XP_T('(') ||
|
|
|
|
rex->ptn.curc.value == XP_T(')'))
|
|
|
|
{
|
2006-07-21 05:05:03 +00:00
|
|
|
rex->ptn.curc.type = CT_SPECIAL;
|
2006-07-20 03:41:00 +00:00
|
|
|
}
|
|
|
|
}
|
2006-07-21 05:05:03 +00:00
|
|
|
else if (level == LEVEL_CHARSET)
|
2006-07-20 03:41:00 +00:00
|
|
|
{
|
2006-07-21 05:05:03 +00:00
|
|
|
/*
|
2006-07-20 03:41:00 +00:00
|
|
|
if (rex->ptn.curc.value == XP_T('^') ||
|
|
|
|
rex->ptn.curc.value == XP_T('-') ||
|
|
|
|
rex->ptn.curc.value == XP_T(']'))
|
|
|
|
{
|
2006-07-21 05:05:03 +00:00
|
|
|
rex->ptn.curc.type = CT_SPECIAL;
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
if (rex->ptn.curc.value == XP_T(']'))
|
|
|
|
{
|
|
|
|
rex->ptn.curc.type = CT_SPECIAL;
|
2006-07-20 03:41:00 +00:00
|
|
|
}
|
|
|
|
}
|
2006-07-21 05:05:03 +00:00
|
|
|
else if (level == LEVEL_RANGE)
|
2006-07-20 03:41:00 +00:00
|
|
|
{
|
|
|
|
if (rex->ptn.curc.value == XP_T(',') ||
|
|
|
|
rex->ptn.curc.value == XP_T('}'))
|
|
|
|
{
|
2006-07-21 05:05:03 +00:00
|
|
|
rex->ptn.curc.type = CT_SPECIAL;
|
2006-07-20 03:41:00 +00:00
|
|
|
}
|
|
|
|
}
|
2006-07-18 15:28:26 +00:00
|
|
|
}
|
|
|
|
|
2006-07-19 11:45:24 +00:00
|
|
|
return 0;
|
2006-07-18 15:28:26 +00:00
|
|
|
}
|
|
|
|
|
2006-07-19 11:45:24 +00:00
|
|
|
static int __add_code (xp_awk_rex_t* rex, void* data, xp_size_t len)
|
2006-07-18 15:28:26 +00:00
|
|
|
{
|
2006-07-19 11:45:24 +00:00
|
|
|
if (len > rex->code.capa - rex->code.size)
|
|
|
|
{
|
|
|
|
xp_size_t capa = rex->code.capa * 2;
|
|
|
|
xp_byte_t* tmp;
|
|
|
|
|
|
|
|
if (capa == 0) capa = 1;
|
|
|
|
while (len > capa - rex->code.size) { capa = capa * 2; }
|
|
|
|
|
|
|
|
tmp = (xp_byte_t*) xp_realloc (rex->code.buf, capa);
|
|
|
|
if (tmp == XP_NULL)
|
|
|
|
{
|
|
|
|
/* TODO: */
|
|
|
|
/*rex->errnum = XP_AWK_REX_ENOMEM;*/
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
rex->code.buf = tmp;
|
|
|
|
rex->code.capa = capa;
|
|
|
|
}
|
|
|
|
|
|
|
|
xp_memcpy (&rex->code.buf[rex->code.size], data, len);
|
|
|
|
rex->code.size += len;
|
|
|
|
|
|
|
|
return 0;
|
2006-07-18 15:28:26 +00:00
|
|
|
}
|
2006-07-20 16:21:54 +00:00
|
|
|
|
|
|
|
void xp_awk_rex_print (xp_awk_rex_t* rex)
|
|
|
|
{
|
|
|
|
const xp_byte_t* p;
|
|
|
|
p = __print_expression (rex->code.buf);
|
|
|
|
xp_printf (XP_T("\n"));
|
|
|
|
xp_assert (p == rex->code.buf + rex->code.size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const xp_byte_t* __print_expression (const xp_byte_t* p)
|
|
|
|
{
|
|
|
|
xp_size_t nb, el, i;
|
|
|
|
|
|
|
|
nb = *(xp_size_t*)p; p += xp_sizeof(nb);
|
|
|
|
el = *(xp_size_t*)p; p += xp_sizeof(el);
|
|
|
|
//xp_printf (XP_T("NA = %u, EL = %u\n"),
|
|
|
|
// (unsigned int)nb, (unsigned int)el);
|
|
|
|
|
|
|
|
for (i = 0; i < nb; i++)
|
|
|
|
{
|
|
|
|
if (i != 0) xp_printf (XP_T("|"));
|
|
|
|
p = __print_branch (p);
|
|
|
|
}
|
|
|
|
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const xp_byte_t* __print_branch (const xp_byte_t* p)
|
|
|
|
{
|
|
|
|
xp_size_t na, bl, i;
|
|
|
|
|
|
|
|
na = *(xp_size_t*)p; p += xp_sizeof(na);
|
|
|
|
bl = *(xp_size_t*)p; p += xp_sizeof(bl);
|
|
|
|
//xp_printf (XP_T("NA = %u, BL = %u\n"),
|
|
|
|
// (unsigned int) na, (unsigned int)bl);
|
|
|
|
|
|
|
|
for (i = 0; i < na; i++)
|
|
|
|
{
|
|
|
|
p = __print_atom (p);
|
|
|
|
}
|
|
|
|
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const xp_byte_t* __print_atom (const xp_byte_t* p)
|
|
|
|
{
|
|
|
|
struct __code* cp = (struct __code*)p;
|
|
|
|
|
|
|
|
if (cp->cmd == CMD_BOL)
|
|
|
|
{
|
|
|
|
xp_printf (XP_T("^"));
|
|
|
|
p += xp_sizeof(*cp);
|
|
|
|
}
|
|
|
|
else if (cp->cmd == CMD_EOL)
|
|
|
|
{
|
|
|
|
xp_printf (XP_T("$"));
|
|
|
|
p += xp_sizeof(*cp);
|
|
|
|
}
|
|
|
|
else if (cp->cmd == CMD_ANY_CHAR)
|
|
|
|
{
|
|
|
|
xp_printf (XP_T("."));
|
|
|
|
p += xp_sizeof(*cp);
|
|
|
|
}
|
|
|
|
else if (cp->cmd == CMD_ORD_CHAR)
|
|
|
|
{
|
|
|
|
p += xp_sizeof(*cp);
|
|
|
|
xp_printf (XP_T("%c"), *(xp_char_t*)p);
|
|
|
|
p += xp_sizeof(xp_char_t);
|
|
|
|
}
|
|
|
|
else if (cp->cmd == CMD_GROUP)
|
|
|
|
{
|
|
|
|
p += xp_sizeof(*cp);
|
|
|
|
xp_printf (XP_T("("));
|
|
|
|
p = __print_expression (p);
|
|
|
|
xp_printf (XP_T(")"));
|
|
|
|
}
|
2006-07-21 05:05:03 +00:00
|
|
|
else if (cp->cmd == CMD_CHARSET)
|
|
|
|
{
|
|
|
|
xp_size_t csc, csl, i;
|
|
|
|
|
|
|
|
p += xp_sizeof(*cp);
|
|
|
|
xp_printf (XP_T("["));
|
|
|
|
if (cp->negate) xp_printf (XP_T("^"));
|
|
|
|
|
|
|
|
csc = *(xp_size_t*)p; p += xp_sizeof(csc);
|
|
|
|
csl = *(xp_size_t*)p; p += xp_sizeof(csl);
|
|
|
|
|
|
|
|
for (i = 0; i < csc; i++)
|
|
|
|
{
|
|
|
|
xp_char_t c0, c1, c2;
|
|
|
|
|
|
|
|
c0 = *(xp_char_t*)p;
|
|
|
|
p += xp_sizeof(c0);
|
|
|
|
if (c0 == CHARSET_ONE)
|
|
|
|
{
|
|
|
|
c1 = *(xp_char_t*)p;
|
|
|
|
xp_printf (XP_T("%c"), c1);
|
|
|
|
}
|
|
|
|
else if (c0 == CHARSET_RANGE)
|
|
|
|
{
|
|
|
|
c1 = *(xp_char_t*)p;
|
|
|
|
p += xp_sizeof(c1);
|
|
|
|
c2 = *(xp_char_t*)p;
|
|
|
|
xp_printf (XP_T("%c-%c"), c1, c2);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
xp_printf (XP_T("FUCK: WRONG CHARSET CODE\n"));
|
|
|
|
}
|
|
|
|
|
|
|
|
p += xp_sizeof(c1);
|
|
|
|
}
|
|
|
|
|
|
|
|
xp_printf (XP_T("]"));
|
|
|
|
}
|
2006-07-20 16:21:54 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
xp_printf (XP_T("FUCK FUCK FUCK\n"));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cp->lbound == 0 && cp->ubound == BOUND_MAX)
|
|
|
|
xp_printf (XP_T("*"));
|
|
|
|
else if (cp->lbound == 1 && cp->ubound == BOUND_MAX)
|
|
|
|
xp_printf (XP_T("+"));
|
|
|
|
else if (cp->lbound == 0 && cp->ubound == 1)
|
|
|
|
xp_printf (XP_T("?"));
|
|
|
|
else if (cp->lbound != 1 || cp->ubound != 1)
|
|
|
|
{
|
|
|
|
xp_printf (XP_T("{%lu,%lu}"),
|
|
|
|
(unsigned long)cp->lbound, (unsigned long)cp->ubound);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return p;
|
|
|
|
}
|