qse/ase/awk/rex.c

764 lines
15 KiB
C
Raw Normal View History

2006-07-17 06:21:39 +00:00
/*
2006-07-21 05:05:03 +00:00
* $Id: rex.c,v 1.8 2006-07-21 05:05:03 bacon Exp $
2006-07-17 06:21:39 +00:00
*/
#include <xp/awk/awk_i.h>
#ifndef XP_AWK_STAND_ALONE
#include <xp/bas/memory.h>
2006-07-20 03:41:00 +00:00
#include <xp/bas/string.h>
2006-07-17 06:21:39 +00:00
#include <xp/bas/assert.h>
#endif
2006-07-20 03:41:00 +00:00
enum
{
2006-07-21 05:05:03 +00:00
CT_EOF,
CT_SPECIAL,
CT_NORMAL
2006-07-20 03:41:00 +00:00
};
enum
{
2006-07-21 05:05:03 +00:00
LEVEL_TOP,
LEVEL_CHARSET,
LEVEL_RANGE,
2006-07-20 03:41:00 +00:00
};
2006-07-17 14:27:09 +00:00
enum
{
2006-07-20 16:21:54 +00:00
CMD_BOL,
CMD_EOL,
2006-07-17 14:27:09 +00:00
CMD_ANY_CHAR,
2006-07-20 16:21:54 +00:00
CMD_ORD_CHAR,
2006-07-21 05:05:03 +00:00
CMD_CHARSET,
2006-07-20 16:21:54 +00:00
CMD_GROUP
2006-07-17 14:27:09 +00:00
};
enum
{
2006-07-21 05:05:03 +00:00
CHARSET_ONE,
CHARSET_RANGE,
CHARSET_CLASS
};
enum
{
CHARSET_CLASS_PUNCT,
CHARSET_CLASS_SPACE,
CHARSET_CLASS_DIGIT,
CHARSET_CLASS_ALNUM
2006-07-17 14:27:09 +00:00
};
#define PC_CMD(rex,base) (rex)->code[(base)].dc.cmd
#define PC_BFLAG(rex,base) (rex)->code[(base)].dc.bflag
#define PC_LBOUND(rex,base) (rex)->code[(base)].dc.lbound
#define PC_UBOUND(rex,base) (rex)->code[(base)].dc.ubound
#define PC_VALUE(rex,base) (rex)->code[(base)].cc
2006-07-19 11:45:24 +00:00
#define BOUND_MIN 0
#define BOUND_MAX (XP_TYPE_MAX(xp_size_t))
struct __code
{
2006-07-20 16:21:54 +00:00
//xp_byte_t cmd;
2006-07-21 05:05:03 +00:00
short cmd;
short negate; /* only for CMD_CHARSET */
2006-07-19 11:45:24 +00:00
xp_size_t lbound;
xp_size_t ubound;
};
2006-07-21 05:05:03 +00:00
#define NCHARS_REMAINING(rex) ((rex)->ptn.end - (rex)->ptn.curp)
2006-07-20 03:41:00 +00:00
#define NEXT_CHAR(rex,level) \
do { if (__next_char(rex,level) == -1) return -1; } while (0)
2006-07-19 11:45:24 +00:00
#define ADD_CODE(rex,data,len) \
do { if (__add_code(rex,data,len) == -1) return -1; } while (0)
static int __compile_expression (xp_awk_rex_t* rex);
static int __compile_branch (xp_awk_rex_t* rex);
static int __compile_atom (xp_awk_rex_t* rex);
2006-07-21 05:05:03 +00:00
static int __compile_charset (xp_awk_rex_t* rex, struct __code* cmd);
2006-07-20 16:21:54 +00:00
static int __compile_bound (xp_awk_rex_t* rex, struct __code* cmd);
static int __compile_range (xp_awk_rex_t* rex, struct __code* cmd);
2006-07-20 03:41:00 +00:00
static int __next_char (xp_awk_rex_t* rex, int level);
2006-07-19 11:45:24 +00:00
static int __add_code (xp_awk_rex_t* rex, void* data, xp_size_t len);
2006-07-20 16:21:54 +00:00
static const xp_byte_t* __print_expression (const xp_byte_t* p);
static const xp_byte_t* __print_branch (const xp_byte_t* p);
static const xp_byte_t* __print_atom (const xp_byte_t* p);
2006-07-17 06:21:39 +00:00
xp_awk_rex_t* xp_awk_rex_open (xp_awk_rex_t* rex)
{
if (rex == XP_NULL)
{
rex = (xp_awk_rex_t*) xp_malloc (xp_sizeof(xp_awk_rex_t));
if (rex == XP_NULL) return XP_NULL;
rex->__dynamic = xp_true;
}
else rex->__dynamic = xp_false;
2006-07-19 11:45:24 +00:00
rex->code.capa = 512;
rex->code.size = 0;
rex->code.buf = (xp_byte_t*) xp_malloc (rex->code.capa);
if (rex->code.buf == XP_NULL)
{
if (rex->__dynamic) xp_free (rex);
return XP_NULL;
}
2006-07-17 06:21:39 +00:00
return rex;
}
void xp_awk_rex_close (xp_awk_rex_t* rex)
{
2006-07-19 11:45:24 +00:00
xp_free (rex->code.buf);
2006-07-17 06:21:39 +00:00
if (rex->__dynamic) xp_free (rex);
}
2006-07-19 11:45:24 +00:00
int xp_awk_rex_compile (xp_awk_rex_t* rex, const xp_char_t* ptn, xp_size_t len)
2006-07-17 06:21:39 +00:00
{
2006-07-19 11:45:24 +00:00
rex->ptn.ptr = ptn;
rex->ptn.end = rex->ptn.ptr + len;
rex->ptn.curp = rex->ptn.ptr;
2006-07-17 06:21:39 +00:00
2006-07-21 05:05:03 +00:00
rex->ptn.curc.type = CT_EOF;
2006-07-20 03:41:00 +00:00
rex->ptn.curc.value = XP_T('\0');
2006-07-18 15:28:26 +00:00
2006-07-20 03:41:00 +00:00
rex->code.size = 0;
2006-07-17 06:21:39 +00:00
2006-07-21 05:05:03 +00:00
NEXT_CHAR (rex, LEVEL_TOP);
2006-07-19 11:45:24 +00:00
if (__compile_expression (rex) == -1)
{
/* TODO: clear expression */
xp_printf (XP_T("fuck ........ \n"));
return -1;
}
2006-07-17 06:21:39 +00:00
2006-07-21 05:05:03 +00:00
if (rex->ptn.curc.type != CT_EOF)
2006-07-19 11:45:24 +00:00
{
/* TODO: error handling */
/* garbage after the expression */
xp_printf (XP_T("garbage after expression\n"));
return -1;
2006-07-17 06:21:39 +00:00
}
2006-07-21 05:05:03 +00:00
xp_printf (XP_T("code.capa = %u\n"), (unsigned int)rex->code.capa);
xp_printf (XP_T("code.size = %u\n"), (unsigned int)rex->code.size);
2006-07-19 11:45:24 +00:00
return 0;
2006-07-17 06:21:39 +00:00
}
2006-07-18 15:28:26 +00:00
2006-07-19 11:45:24 +00:00
static int __compile_expression (xp_awk_rex_t* rex)
2006-07-18 15:28:26 +00:00
{
2006-07-19 15:58:01 +00:00
xp_size_t zero = 0;
2006-07-20 16:21:54 +00:00
xp_size_t* nb, * el;
xp_size_t old_size;
2006-07-20 03:41:00 +00:00
int n;
2006-07-19 15:58:01 +00:00
2006-07-20 16:21:54 +00:00
old_size = rex->code.size;
2006-07-20 03:41:00 +00:00
/* secure space for header and set the header fields to zero */
2006-07-19 15:58:01 +00:00
nb = (xp_size_t*)&rex->code.buf[rex->code.size];
ADD_CODE (rex, &zero, xp_sizeof(zero));
el = (xp_size_t*)&rex->code.buf[rex->code.size];
ADD_CODE (rex, &zero, xp_sizeof(zero));
2006-07-20 03:41:00 +00:00
/* handle the first branch */
n = __compile_branch (rex);
if (n == -1) return -1;
if (n == 0)
{
/* TODO: what if the expression starts with a vertical bar??? */
return 0;
}
2006-07-18 15:28:26 +00:00
2006-07-19 15:58:01 +00:00
(*nb) += 1;
2006-07-20 03:41:00 +00:00
/* handle subsequent branches if any */
2006-07-21 05:05:03 +00:00
while (rex->ptn.curc.type == CT_SPECIAL &&
2006-07-20 03:41:00 +00:00
rex->ptn.curc.value == XP_T('|'))
2006-07-18 15:28:26 +00:00
{
2006-07-21 05:05:03 +00:00
NEXT_CHAR (rex, LEVEL_TOP);
2006-07-18 15:28:26 +00:00
2006-07-20 03:41:00 +00:00
n = __compile_branch(rex);
if (n == -1) return -1;
if (n == 0)
{
/* if the pattern ends with a vertical bar(|),
* this block can be reached. however, the use
* of such an expression is highly discouraged */
/* TODO: should it return an error???? */
break;
}
2006-07-18 15:28:26 +00:00
2006-07-19 15:58:01 +00:00
(*nb) += 1;
2006-07-18 15:28:26 +00:00
}
2006-07-19 11:45:24 +00:00
2006-07-20 16:21:54 +00:00
*el = rex->code.size - old_size;
2006-07-20 03:41:00 +00:00
return 1;
2006-07-18 15:28:26 +00:00
}
2006-07-19 11:45:24 +00:00
static int __compile_branch (xp_awk_rex_t* rex)
2006-07-18 15:28:26 +00:00
{
2006-07-19 11:45:24 +00:00
int n;
2006-07-20 16:21:54 +00:00
xp_size_t* na, * bl;
2006-07-19 15:58:01 +00:00
xp_size_t old_size;
2006-07-20 03:41:00 +00:00
xp_size_t zero = 0;
2006-07-20 16:21:54 +00:00
struct __code* cmd;
2006-07-19 15:58:01 +00:00
old_size = rex->code.size;
2006-07-20 16:21:54 +00:00
na = (xp_size_t*)&rex->code.buf[rex->code.size];
ADD_CODE (rex, &zero, xp_sizeof(zero));
2006-07-19 15:58:01 +00:00
bl = (xp_size_t*)&rex->code.buf[rex->code.size];
2006-07-20 03:41:00 +00:00
ADD_CODE (rex, &zero, xp_sizeof(zero));
2006-07-18 15:28:26 +00:00
2006-07-20 03:41:00 +00:00
while (1)
2006-07-18 15:28:26 +00:00
{
2006-07-20 16:21:54 +00:00
cmd = (struct __code*)&rex->code.buf[rex->code.size];
2006-07-19 11:45:24 +00:00
n = __compile_atom (rex);
2006-07-19 15:58:01 +00:00
if (n == -1)
{
rex->code.size = old_size;
return -1;
}
2006-07-18 15:28:26 +00:00
2006-07-20 03:41:00 +00:00
if (n == 0) break; /* no atom */
2006-07-19 11:45:24 +00:00
2006-07-20 16:21:54 +00:00
n = __compile_bound (rex, cmd);
2006-07-20 03:41:00 +00:00
if (n == -1)
2006-07-18 15:28:26 +00:00
{
2006-07-20 03:41:00 +00:00
rex->code.size = old_size;
return -1;
2006-07-18 15:28:26 +00:00
}
2006-07-20 03:41:00 +00:00
/* n == 0 no bound character. just continue */
/* n == 1 bound has been applied by compile_bound */
2006-07-20 16:21:54 +00:00
(*na) += 1;
2006-07-19 11:45:24 +00:00
}
2006-07-20 16:21:54 +00:00
*bl = rex->code.size - old_size;
return ((*na) == 0)? 0: 1;
2006-07-19 11:45:24 +00:00
}
static int __compile_atom (xp_awk_rex_t* rex)
{
2006-07-20 16:21:54 +00:00
int n;
2006-07-21 05:05:03 +00:00
struct __code tmp;
2006-07-19 11:45:24 +00:00
2006-07-21 05:05:03 +00:00
if (rex->ptn.curc.type == CT_EOF) return 0;
2006-07-20 16:21:54 +00:00
2006-07-21 05:05:03 +00:00
if (rex->ptn.curc.type == CT_SPECIAL)
2006-07-19 11:45:24 +00:00
{
2006-07-20 03:41:00 +00:00
if (rex->ptn.curc.value == XP_T('('))
{
2006-07-20 16:21:54 +00:00
tmp.cmd = CMD_GROUP;
2006-07-21 05:05:03 +00:00
tmp.negate = 0;
2006-07-20 16:21:54 +00:00
tmp.lbound = 1;
tmp.ubound = 1;
ADD_CODE (rex, &tmp, xp_sizeof(tmp));
2006-07-21 05:05:03 +00:00
NEXT_CHAR (rex, LEVEL_TOP);
2006-07-20 16:21:54 +00:00
2006-07-20 03:41:00 +00:00
n = __compile_expression (rex);
if (n == -1) return -1;
2006-07-21 05:05:03 +00:00
if (rex->ptn.curc.type != CT_SPECIAL ||
2006-07-20 03:41:00 +00:00
rex->ptn.curc.value != XP_T(')'))
{
// rex->errnum = XP_AWK_REX_ERPAREN;
return -1;
}
}
else if (rex->ptn.curc.value == XP_T('^'))
2006-07-19 11:45:24 +00:00
{
2006-07-20 16:21:54 +00:00
tmp.cmd = CMD_BOL;
2006-07-21 05:05:03 +00:00
tmp.negate = 0;
2006-07-19 11:45:24 +00:00
tmp.lbound = 1;
tmp.ubound = 1;
ADD_CODE (rex, &tmp, xp_sizeof(tmp));
}
2006-07-20 03:41:00 +00:00
else if (rex->ptn.curc.value == XP_T('$'))
2006-07-19 11:45:24 +00:00
{
2006-07-20 16:21:54 +00:00
tmp.cmd = CMD_EOL;
2006-07-21 05:05:03 +00:00
tmp.negate = 0;
2006-07-19 11:45:24 +00:00
tmp.lbound = 1;
tmp.ubound = 1;
ADD_CODE (rex, &tmp, xp_sizeof(tmp));
}
2006-07-20 03:41:00 +00:00
else if (rex->ptn.curc.value == XP_T('.'))
2006-07-18 15:28:26 +00:00
{
2006-07-19 11:45:24 +00:00
tmp.cmd = CMD_ANY_CHAR;
2006-07-21 05:05:03 +00:00
tmp.negate = 0;
2006-07-19 11:45:24 +00:00
tmp.lbound = 1;
tmp.ubound = 1;
ADD_CODE (rex, &tmp, xp_sizeof(tmp));
2006-07-18 15:28:26 +00:00
}
2006-07-20 03:41:00 +00:00
else if (rex->ptn.curc.value == XP_T('['))
2006-07-18 15:28:26 +00:00
{
2006-07-21 05:05:03 +00:00
struct __code* cmd;
cmd = (struct __code*)&rex->code.buf[rex->code.size];
tmp.cmd = CMD_CHARSET;
tmp.negate = 0;
tmp.lbound = 1;
tmp.ubound = 1;
ADD_CODE (rex, &tmp, xp_sizeof(tmp));
NEXT_CHAR (rex, LEVEL_CHARSET);
n = __compile_charset (rex, cmd);
if (n == -1) return -1;
xp_assert (n != 0);
if (rex->ptn.curc.type != CT_SPECIAL ||
rex->ptn.curc.value != XP_T(']'))
{
// TODO
/*rex->errnum = XP_AWK_REX_ERBRACKET;*/
return -1;
}
2006-07-19 11:45:24 +00:00
}
2006-07-20 16:21:54 +00:00
else return 0;
2006-07-19 11:45:24 +00:00
2006-07-21 05:05:03 +00:00
NEXT_CHAR (rex, LEVEL_TOP);
2006-07-20 03:41:00 +00:00
return 1;
}
else
{
2006-07-21 05:05:03 +00:00
xp_assert (rex->ptn.curc.type == CT_NORMAL);
2006-07-20 16:21:54 +00:00
2006-07-20 03:41:00 +00:00
tmp.cmd = CMD_ORD_CHAR;
2006-07-21 05:05:03 +00:00
tmp.negate = 0;
2006-07-20 03:41:00 +00:00
tmp.lbound = 1;
tmp.ubound = 1;
ADD_CODE (rex, &tmp, xp_sizeof(tmp));
2006-07-21 05:05:03 +00:00
2006-07-20 16:21:54 +00:00
ADD_CODE (rex, &rex->ptn.curc.value, xp_sizeof(rex->ptn.curc.value));
2006-07-21 05:05:03 +00:00
NEXT_CHAR (rex, LEVEL_TOP);
2006-07-19 11:45:24 +00:00
2006-07-20 03:41:00 +00:00
return 1;
2006-07-18 15:28:26 +00:00
}
}
2006-07-21 05:05:03 +00:00
static int __compile_charset (xp_awk_rex_t* rex, struct __code* cmd)
2006-07-18 15:28:26 +00:00
{
2006-07-21 05:05:03 +00:00
xp_size_t zero = 0;
xp_size_t* csc, * csl;
xp_size_t old_size;
old_size = rex->code.size;
csc = (xp_size_t*)&rex->code.buf[rex->code.size];
ADD_CODE (rex, &zero, xp_sizeof(zero));
csl = (xp_size_t*)&rex->code.buf[rex->code.size];
ADD_CODE (rex, &zero, xp_sizeof(zero));
if (rex->ptn.curc.type == CT_NORMAL &&
rex->ptn.curc.value == XP_T('^'))
{
cmd->negate = 1;
NEXT_CHAR (rex, LEVEL_CHARSET);
}
while (rex->ptn.curc.type == CT_NORMAL)
{
xp_char_t c0, c1, c2;
c1 = rex->ptn.curc.value;
NEXT_CHAR(rex, LEVEL_CHARSET);
#if 0
if (c1 == XP_T('[') &&
rex->ptn.curc.type == CT_NORMAL &&
rex->ptn.curc.value == XP_T(':'))
{
/* beginning of character class */
/* change c1 */
}
#endif
c2 = c1;
if (rex->ptn.curc.type == CT_NORMAL &&
rex->ptn.curc.value == XP_T('-'))
{
NEXT_CHAR (rex, LEVEL_CHARSET);
if (rex->ptn.curc.type == CT_NORMAL)
{
c2 = rex->ptn.curc.value;
NEXT_CHAR(rex, LEVEL_CHARSET);
#if 0
if (c2 == XP_T('[') &&
rex->ptn.curc.type == CT_NORMAL &&
rex->ptn.curc.value == XP_T(':'))
{
/* beginning of character class */
/* change c2 */
}
#endif
}
}
if (c1 == c2)
{
c0 = CHARSET_ONE;
ADD_CODE (rex, &c0, xp_sizeof(c0));
ADD_CODE (rex, &c1, xp_sizeof(c1));
}
else
{
c0 = CHARSET_RANGE;
ADD_CODE (rex, &c0, xp_sizeof(c0));
ADD_CODE (rex, &c1, xp_sizeof(c1));
ADD_CODE (rex, &c2, xp_sizeof(c2));
}
(*csc) += 1;
}
*csl = rex->code.size - old_size;
return 1;
2006-07-19 11:45:24 +00:00
}
2006-07-18 15:28:26 +00:00
2006-07-20 16:21:54 +00:00
static int __compile_bound (xp_awk_rex_t* rex, struct __code* cmd)
2006-07-20 03:41:00 +00:00
{
2006-07-21 05:05:03 +00:00
if (rex->ptn.curc.type != CT_SPECIAL) return 0;
2006-07-20 03:41:00 +00:00
switch (rex->ptn.curc.value)
{
case XP_T('+'):
{
2006-07-20 16:21:54 +00:00
cmd->lbound = 1;
cmd->ubound = BOUND_MAX;
2006-07-21 05:05:03 +00:00
NEXT_CHAR(rex, LEVEL_TOP);
2006-07-20 03:41:00 +00:00
return 1;
}
case XP_T('*'):
{
2006-07-20 16:21:54 +00:00
cmd->lbound = 0;
cmd->ubound = BOUND_MAX;
2006-07-21 05:05:03 +00:00
NEXT_CHAR(rex, LEVEL_TOP);
2006-07-20 03:41:00 +00:00
return 1;
}
case XP_T('?'):
{
2006-07-20 16:21:54 +00:00
cmd->lbound = 0;
cmd->ubound = 1;
2006-07-21 05:05:03 +00:00
NEXT_CHAR(rex, LEVEL_TOP);
2006-07-20 03:41:00 +00:00
return 1;
}
case XP_T('{'):
{
2006-07-21 05:05:03 +00:00
NEXT_CHAR (rex, LEVEL_RANGE);
2006-07-20 16:21:54 +00:00
if (__compile_range(rex, cmd) == -1) return -1;
2006-07-21 05:05:03 +00:00
if (rex->ptn.curc.type != CT_SPECIAL ||
2006-07-20 16:21:54 +00:00
rex->ptn.curc.value != XP_T('}'))
{
// rex->errnum = XP_AWK_REX_ERBRACE
return -1;
}
2006-07-21 05:05:03 +00:00
NEXT_CHAR (rex, LEVEL_TOP);
2006-07-20 03:41:00 +00:00
return 1;
}
}
return 0;
}
2006-07-20 16:21:54 +00:00
static int __compile_range (xp_awk_rex_t* rex, struct __code* cmd)
2006-07-19 11:45:24 +00:00
{
2006-07-20 16:21:54 +00:00
xp_size_t bound;
// TODO: should allow white spaces in the range???
// what if it is not in the raight format? convert it to ordinary characters??
bound = 0;
2006-07-21 05:05:03 +00:00
while (rex->ptn.curc.type == CT_NORMAL &&
2006-07-20 16:21:54 +00:00
xp_isdigit(rex->ptn.curc.value))
{
bound = bound * 10 + rex->ptn.curc.value - XP_T('0');
2006-07-21 05:05:03 +00:00
NEXT_CHAR (rex, LEVEL_RANGE);
2006-07-20 16:21:54 +00:00
}
cmd->lbound = bound;
2006-07-21 05:05:03 +00:00
if (rex->ptn.curc.type == CT_SPECIAL &&
2006-07-20 16:21:54 +00:00
rex->ptn.curc.value == XP_T(','))
{
2006-07-21 05:05:03 +00:00
NEXT_CHAR (rex, LEVEL_RANGE);
2006-07-20 16:21:54 +00:00
bound = 0;
2006-07-21 05:05:03 +00:00
while (rex->ptn.curc.type == CT_NORMAL &&
2006-07-20 16:21:54 +00:00
xp_isdigit(rex->ptn.curc.value))
{
bound = bound * 10 + rex->ptn.curc.value - XP_T('0');
2006-07-21 05:05:03 +00:00
NEXT_CHAR (rex, LEVEL_RANGE);
2006-07-20 16:21:54 +00:00
}
cmd->ubound = bound;
}
else cmd->ubound = BOUND_MAX;
return 0;
2006-07-19 11:45:24 +00:00
}
2006-07-18 15:28:26 +00:00
2006-07-20 03:41:00 +00:00
static int __next_char (xp_awk_rex_t* rex, int level)
2006-07-19 11:45:24 +00:00
{
2006-07-20 03:41:00 +00:00
if (rex->ptn.curp >= rex->ptn.end)
2006-07-18 15:28:26 +00:00
{
2006-07-21 05:05:03 +00:00
rex->ptn.curc.type = CT_EOF;
2006-07-20 03:41:00 +00:00
rex->ptn.curc.value = XP_T('\0');
return 0;
2006-07-18 15:28:26 +00:00
}
2006-07-19 11:45:24 +00:00
2006-07-21 05:05:03 +00:00
rex->ptn.curc.type = CT_NORMAL;
2006-07-20 03:41:00 +00:00
rex->ptn.curc.value = *rex->ptn.curp++;
xp_printf (XP_T("[%c]\n"), rex->ptn.curc.value);
if (rex->ptn.curc.value == XP_T('\\'))
2006-07-19 11:45:24 +00:00
{
if (rex->ptn.curp >= rex->ptn.end)
{
/* unexpected end of expression */
//rex->errnum = XP_AWK_REX_EEND;
return -1;
}
2006-07-20 03:41:00 +00:00
rex->ptn.curc.value = *rex->ptn.curp++;
2006-07-19 11:45:24 +00:00
2006-07-20 03:41:00 +00:00
/* TODO: need this? */
/*
if (rex->ptn.curc.value == XP_T('n')) rex->ptn.curc = XP_T('\n');
else if (rex->ptn.curc.value == XP_T('r')) rex->ptn.curc = XP_T('\r');
else if (rex->ptn.curc.value == XP_T('t')) rex->ptn.curc = XP_T('\t');
*/
return 0;
}
else
{
2006-07-21 05:05:03 +00:00
if (level == LEVEL_TOP)
2006-07-20 03:41:00 +00:00
{
if (rex->ptn.curc.value == XP_T('[') ||
rex->ptn.curc.value == XP_T('|') ||
rex->ptn.curc.value == XP_T('^') ||
rex->ptn.curc.value == XP_T('$') ||
rex->ptn.curc.value == XP_T('{') ||
rex->ptn.curc.value == XP_T('+') ||
rex->ptn.curc.value == XP_T('?') ||
rex->ptn.curc.value == XP_T('*') ||
rex->ptn.curc.value == XP_T('.') ||
rex->ptn.curc.value == XP_T('(') ||
rex->ptn.curc.value == XP_T(')'))
{
2006-07-21 05:05:03 +00:00
rex->ptn.curc.type = CT_SPECIAL;
2006-07-20 03:41:00 +00:00
}
}
2006-07-21 05:05:03 +00:00
else if (level == LEVEL_CHARSET)
2006-07-20 03:41:00 +00:00
{
2006-07-21 05:05:03 +00:00
/*
2006-07-20 03:41:00 +00:00
if (rex->ptn.curc.value == XP_T('^') ||
rex->ptn.curc.value == XP_T('-') ||
rex->ptn.curc.value == XP_T(']'))
{
2006-07-21 05:05:03 +00:00
rex->ptn.curc.type = CT_SPECIAL;
}
*/
if (rex->ptn.curc.value == XP_T(']'))
{
rex->ptn.curc.type = CT_SPECIAL;
2006-07-20 03:41:00 +00:00
}
}
2006-07-21 05:05:03 +00:00
else if (level == LEVEL_RANGE)
2006-07-20 03:41:00 +00:00
{
if (rex->ptn.curc.value == XP_T(',') ||
rex->ptn.curc.value == XP_T('}'))
{
2006-07-21 05:05:03 +00:00
rex->ptn.curc.type = CT_SPECIAL;
2006-07-20 03:41:00 +00:00
}
}
2006-07-18 15:28:26 +00:00
}
2006-07-19 11:45:24 +00:00
return 0;
2006-07-18 15:28:26 +00:00
}
2006-07-19 11:45:24 +00:00
static int __add_code (xp_awk_rex_t* rex, void* data, xp_size_t len)
2006-07-18 15:28:26 +00:00
{
2006-07-19 11:45:24 +00:00
if (len > rex->code.capa - rex->code.size)
{
xp_size_t capa = rex->code.capa * 2;
xp_byte_t* tmp;
if (capa == 0) capa = 1;
while (len > capa - rex->code.size) { capa = capa * 2; }
tmp = (xp_byte_t*) xp_realloc (rex->code.buf, capa);
if (tmp == XP_NULL)
{
/* TODO: */
/*rex->errnum = XP_AWK_REX_ENOMEM;*/
return -1;
}
rex->code.buf = tmp;
rex->code.capa = capa;
}
xp_memcpy (&rex->code.buf[rex->code.size], data, len);
rex->code.size += len;
return 0;
2006-07-18 15:28:26 +00:00
}
2006-07-20 16:21:54 +00:00
void xp_awk_rex_print (xp_awk_rex_t* rex)
{
const xp_byte_t* p;
p = __print_expression (rex->code.buf);
xp_printf (XP_T("\n"));
xp_assert (p == rex->code.buf + rex->code.size);
}
static const xp_byte_t* __print_expression (const xp_byte_t* p)
{
xp_size_t nb, el, i;
nb = *(xp_size_t*)p; p += xp_sizeof(nb);
el = *(xp_size_t*)p; p += xp_sizeof(el);
//xp_printf (XP_T("NA = %u, EL = %u\n"),
// (unsigned int)nb, (unsigned int)el);
for (i = 0; i < nb; i++)
{
if (i != 0) xp_printf (XP_T("|"));
p = __print_branch (p);
}
return p;
}
static const xp_byte_t* __print_branch (const xp_byte_t* p)
{
xp_size_t na, bl, i;
na = *(xp_size_t*)p; p += xp_sizeof(na);
bl = *(xp_size_t*)p; p += xp_sizeof(bl);
//xp_printf (XP_T("NA = %u, BL = %u\n"),
// (unsigned int) na, (unsigned int)bl);
for (i = 0; i < na; i++)
{
p = __print_atom (p);
}
return p;
}
static const xp_byte_t* __print_atom (const xp_byte_t* p)
{
struct __code* cp = (struct __code*)p;
if (cp->cmd == CMD_BOL)
{
xp_printf (XP_T("^"));
p += xp_sizeof(*cp);
}
else if (cp->cmd == CMD_EOL)
{
xp_printf (XP_T("$"));
p += xp_sizeof(*cp);
}
else if (cp->cmd == CMD_ANY_CHAR)
{
xp_printf (XP_T("."));
p += xp_sizeof(*cp);
}
else if (cp->cmd == CMD_ORD_CHAR)
{
p += xp_sizeof(*cp);
xp_printf (XP_T("%c"), *(xp_char_t*)p);
p += xp_sizeof(xp_char_t);
}
else if (cp->cmd == CMD_GROUP)
{
p += xp_sizeof(*cp);
xp_printf (XP_T("("));
p = __print_expression (p);
xp_printf (XP_T(")"));
}
2006-07-21 05:05:03 +00:00
else if (cp->cmd == CMD_CHARSET)
{
xp_size_t csc, csl, i;
p += xp_sizeof(*cp);
xp_printf (XP_T("["));
if (cp->negate) xp_printf (XP_T("^"));
csc = *(xp_size_t*)p; p += xp_sizeof(csc);
csl = *(xp_size_t*)p; p += xp_sizeof(csl);
for (i = 0; i < csc; i++)
{
xp_char_t c0, c1, c2;
c0 = *(xp_char_t*)p;
p += xp_sizeof(c0);
if (c0 == CHARSET_ONE)
{
c1 = *(xp_char_t*)p;
xp_printf (XP_T("%c"), c1);
}
else if (c0 == CHARSET_RANGE)
{
c1 = *(xp_char_t*)p;
p += xp_sizeof(c1);
c2 = *(xp_char_t*)p;
xp_printf (XP_T("%c-%c"), c1, c2);
}
else
{
xp_printf (XP_T("FUCK: WRONG CHARSET CODE\n"));
}
p += xp_sizeof(c1);
}
xp_printf (XP_T("]"));
}
2006-07-20 16:21:54 +00:00
else
{
xp_printf (XP_T("FUCK FUCK FUCK\n"));
}
if (cp->lbound == 0 && cp->ubound == BOUND_MAX)
xp_printf (XP_T("*"));
else if (cp->lbound == 1 && cp->ubound == BOUND_MAX)
xp_printf (XP_T("+"));
else if (cp->lbound == 0 && cp->ubound == 1)
xp_printf (XP_T("?"));
else if (cp->lbound != 1 || cp->ubound != 1)
{
xp_printf (XP_T("{%lu,%lu}"),
(unsigned long)cp->lbound, (unsigned long)cp->ubound);
}
return p;
}