*** empty log message ***
This commit is contained in:
parent
42ba51c5b3
commit
3030d45e33
368
ase/awk/rex.c
368
ase/awk/rex.c
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: rex.c,v 1.8 2006-07-21 05:05:03 bacon Exp $
|
* $Id: rex.c,v 1.9 2006-07-22 16:40:39 bacon Exp $
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <xp/awk/awk_i.h>
|
#include <xp/awk/awk_i.h>
|
||||||
@ -8,6 +8,7 @@
|
|||||||
#include <xp/bas/memory.h>
|
#include <xp/bas/memory.h>
|
||||||
#include <xp/bas/string.h>
|
#include <xp/bas/string.h>
|
||||||
#include <xp/bas/assert.h>
|
#include <xp/bas/assert.h>
|
||||||
|
#include <xp/bas/ctype.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
enum
|
enum
|
||||||
@ -49,16 +50,10 @@ enum
|
|||||||
CHARSET_CLASS_ALNUM
|
CHARSET_CLASS_ALNUM
|
||||||
};
|
};
|
||||||
|
|
||||||
#define PC_CMD(rex,base) (rex)->code[(base)].dc.cmd
|
|
||||||
#define PC_BFLAG(rex,base) (rex)->code[(base)].dc.bflag
|
|
||||||
#define PC_LBOUND(rex,base) (rex)->code[(base)].dc.lbound
|
|
||||||
#define PC_UBOUND(rex,base) (rex)->code[(base)].dc.ubound
|
|
||||||
#define PC_VALUE(rex,base) (rex)->code[(base)].cc
|
|
||||||
|
|
||||||
#define BOUND_MIN 0
|
#define BOUND_MIN 0
|
||||||
#define BOUND_MAX (XP_TYPE_MAX(xp_size_t))
|
#define BOUND_MAX (XP_TYPE_MAX(xp_size_t))
|
||||||
|
|
||||||
struct __code
|
struct __code_t
|
||||||
{
|
{
|
||||||
//xp_byte_t cmd;
|
//xp_byte_t cmd;
|
||||||
short cmd;
|
short cmd;
|
||||||
@ -75,12 +70,15 @@ struct __code
|
|||||||
#define ADD_CODE(rex,data,len) \
|
#define ADD_CODE(rex,data,len) \
|
||||||
do { if (__add_code(rex,data,len) == -1) return -1; } while (0)
|
do { if (__add_code(rex,data,len) == -1) return -1; } while (0)
|
||||||
|
|
||||||
|
#define CODEAT(rex,pos,type) (*((type*)&(rex)->code.buf[pos]))
|
||||||
|
|
||||||
static int __compile_expression (xp_awk_rex_t* rex);
|
static int __compile_expression (xp_awk_rex_t* rex);
|
||||||
static int __compile_branch (xp_awk_rex_t* rex);
|
static int __compile_branch (xp_awk_rex_t* rex);
|
||||||
static int __compile_atom (xp_awk_rex_t* rex);
|
static int __compile_atom (xp_awk_rex_t* rex);
|
||||||
static int __compile_charset (xp_awk_rex_t* rex, struct __code* cmd);
|
static int __compile_charset (xp_awk_rex_t* rex, struct __code_t* cmd);
|
||||||
static int __compile_bound (xp_awk_rex_t* rex, struct __code* cmd);
|
static int __compile_bound (xp_awk_rex_t* rex, struct __code_t* cmd);
|
||||||
static int __compile_range (xp_awk_rex_t* rex, struct __code* cmd);
|
static int __compile_cclass (xp_awk_rex_t* rex, xp_char_t* cc);
|
||||||
|
static int __compile_range (xp_awk_rex_t* rex, struct __code_t* cmd);
|
||||||
static int __next_char (xp_awk_rex_t* rex, int level);
|
static int __next_char (xp_awk_rex_t* rex, int level);
|
||||||
static int __add_code (xp_awk_rex_t* rex, void* data, xp_size_t len);
|
static int __add_code (xp_awk_rex_t* rex, void* data, xp_size_t len);
|
||||||
|
|
||||||
@ -88,6 +86,55 @@ static const xp_byte_t* __print_expression (const xp_byte_t* p);
|
|||||||
static const xp_byte_t* __print_branch (const xp_byte_t* p);
|
static const xp_byte_t* __print_branch (const xp_byte_t* p);
|
||||||
static const xp_byte_t* __print_atom (const xp_byte_t* p);
|
static const xp_byte_t* __print_atom (const xp_byte_t* p);
|
||||||
|
|
||||||
|
static xp_bool_t __begin_with (
|
||||||
|
const xp_char_t* str, xp_size_t len, const xp_char_t* what);
|
||||||
|
|
||||||
|
static xp_bool_t __cc_isalnum (xp_char_t c);
|
||||||
|
static xp_bool_t __cc_isalpha (xp_char_t c);
|
||||||
|
static xp_bool_t __cc_isblank (xp_char_t c);
|
||||||
|
static xp_bool_t __cc_iscntrl (xp_char_t c);
|
||||||
|
static xp_bool_t __cc_isdigit (xp_char_t c);
|
||||||
|
static xp_bool_t __cc_isgraph (xp_char_t c);
|
||||||
|
static xp_bool_t __cc_islower (xp_char_t c);
|
||||||
|
static xp_bool_t __cc_isprint (xp_char_t c);
|
||||||
|
static xp_bool_t __cc_ispunct (xp_char_t c);
|
||||||
|
static xp_bool_t __cc_isspace (xp_char_t c);
|
||||||
|
static xp_bool_t __cc_isupper (xp_char_t c);
|
||||||
|
static xp_bool_t __cc_isxdigit (xp_char_t c);
|
||||||
|
|
||||||
|
static struct __char_class_t
|
||||||
|
{
|
||||||
|
const xp_char_t* name;
|
||||||
|
xp_bool_t (*func) (xp_char_t c);
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct __char_class_t __char_class [] =
|
||||||
|
{
|
||||||
|
{ XP_T("alnum"), __cc_isalnum },
|
||||||
|
{ XP_T("alpha"), __cc_isalpha },
|
||||||
|
{ XP_T("blank"), __cc_isblank },
|
||||||
|
{ XP_T("cntrl"), __cc_iscntrl },
|
||||||
|
{ XP_T("digit"), __cc_isdigit },
|
||||||
|
{ XP_T("graph"), __cc_isgraph },
|
||||||
|
{ XP_T("lower"), __cc_islower },
|
||||||
|
{ XP_T("print"), __cc_isprint },
|
||||||
|
{ XP_T("punct"), __cc_ispunct },
|
||||||
|
{ XP_T("space"), __cc_isspace },
|
||||||
|
{ XP_T("upper"), __cc_isupper },
|
||||||
|
{ XP_T("xdigit"), __cc_isxdigit },
|
||||||
|
|
||||||
|
/*
|
||||||
|
{ XP_T("arabic"), __cc_isarabic },
|
||||||
|
{ XP_T("chinese"), __cc_ischinese },
|
||||||
|
{ XP_T("english"), __cc_isenglish },
|
||||||
|
{ XP_T("japanese"), __cc_isjapanese },
|
||||||
|
{ XP_T("korean"), __cc_iskorean },
|
||||||
|
{ XP_T("thai"), __cc_isthai },
|
||||||
|
*/
|
||||||
|
|
||||||
|
{ XP_NULL, XP_NULL }
|
||||||
|
};
|
||||||
|
|
||||||
xp_awk_rex_t* xp_awk_rex_open (xp_awk_rex_t* rex)
|
xp_awk_rex_t* xp_awk_rex_open (xp_awk_rex_t* rex)
|
||||||
{
|
{
|
||||||
if (rex == XP_NULL)
|
if (rex == XP_NULL)
|
||||||
@ -151,17 +198,17 @@ xp_printf (XP_T("code.size = %u\n"), (unsigned int)rex->code.size);
|
|||||||
static int __compile_expression (xp_awk_rex_t* rex)
|
static int __compile_expression (xp_awk_rex_t* rex)
|
||||||
{
|
{
|
||||||
xp_size_t zero = 0;
|
xp_size_t zero = 0;
|
||||||
xp_size_t* nb, * el;
|
|
||||||
xp_size_t old_size;
|
xp_size_t old_size;
|
||||||
|
xp_size_t pos_nb, pos_el;
|
||||||
int n;
|
int n;
|
||||||
|
|
||||||
old_size = rex->code.size;
|
old_size = rex->code.size;
|
||||||
|
|
||||||
/* secure space for header and set the header fields to zero */
|
/* secure space for header and set the header fields to zero */
|
||||||
nb = (xp_size_t*)&rex->code.buf[rex->code.size];
|
pos_nb = rex->code.size;
|
||||||
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
||||||
|
|
||||||
el = (xp_size_t*)&rex->code.buf[rex->code.size];
|
pos_el = rex->code.size;
|
||||||
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
||||||
|
|
||||||
/* handle the first branch */
|
/* handle the first branch */
|
||||||
@ -169,11 +216,11 @@ static int __compile_expression (xp_awk_rex_t* rex)
|
|||||||
if (n == -1) return -1;
|
if (n == -1) return -1;
|
||||||
if (n == 0)
|
if (n == 0)
|
||||||
{
|
{
|
||||||
/* TODO: what if the expression starts with a vertical bar??? */
|
/* if the expression is empty, the control reaches here */
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
(*nb) += 1;
|
CODEAT(rex,pos_nb,xp_size_t) += 1;
|
||||||
|
|
||||||
/* handle subsequent branches if any */
|
/* handle subsequent branches if any */
|
||||||
while (rex->ptn.curc.type == CT_SPECIAL &&
|
while (rex->ptn.curc.type == CT_SPECIAL &&
|
||||||
@ -186,39 +233,37 @@ static int __compile_expression (xp_awk_rex_t* rex)
|
|||||||
if (n == 0)
|
if (n == 0)
|
||||||
{
|
{
|
||||||
/* if the pattern ends with a vertical bar(|),
|
/* if the pattern ends with a vertical bar(|),
|
||||||
* this block can be reached. however, the use
|
* this block can be reached. however, such a
|
||||||
* of such an expression is highly discouraged */
|
* pattern is highly discouraged */
|
||||||
|
|
||||||
/* TODO: should it return an error???? */
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
(*nb) += 1;
|
CODEAT(rex,pos_nb,xp_size_t) += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
*el = rex->code.size - old_size;
|
CODEAT(rex,pos_el,xp_size_t) = rex->code.size - old_size;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __compile_branch (xp_awk_rex_t* rex)
|
static int __compile_branch (xp_awk_rex_t* rex)
|
||||||
{
|
{
|
||||||
int n;
|
int n;
|
||||||
xp_size_t* na, * bl;
|
|
||||||
xp_size_t old_size;
|
|
||||||
xp_size_t zero = 0;
|
xp_size_t zero = 0;
|
||||||
struct __code* cmd;
|
xp_size_t old_size;
|
||||||
|
xp_size_t pos_na, pos_bl;
|
||||||
|
struct __code_t* cmd;
|
||||||
|
|
||||||
old_size = rex->code.size;
|
old_size = rex->code.size;
|
||||||
|
|
||||||
na = (xp_size_t*)&rex->code.buf[rex->code.size];
|
pos_na = rex->code.size;
|
||||||
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
||||||
|
|
||||||
bl = (xp_size_t*)&rex->code.buf[rex->code.size];
|
pos_bl = rex->code.size;
|
||||||
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
||||||
|
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
cmd = (struct __code*)&rex->code.buf[rex->code.size];
|
cmd = (struct __code_t*)&rex->code.buf[rex->code.size];
|
||||||
|
|
||||||
n = __compile_atom (rex);
|
n = __compile_atom (rex);
|
||||||
if (n == -1)
|
if (n == -1)
|
||||||
@ -239,17 +284,17 @@ static int __compile_branch (xp_awk_rex_t* rex)
|
|||||||
/* n == 0 no bound character. just continue */
|
/* n == 0 no bound character. just continue */
|
||||||
/* n == 1 bound has been applied by compile_bound */
|
/* n == 1 bound has been applied by compile_bound */
|
||||||
|
|
||||||
(*na) += 1;
|
CODEAT(rex,pos_na,xp_size_t) += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
*bl = rex->code.size - old_size;
|
CODEAT(rex,pos_bl,xp_size_t) = rex->code.size - old_size;
|
||||||
return ((*na) == 0)? 0: 1;
|
return (rex->code.size == old_size)? 0: 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __compile_atom (xp_awk_rex_t* rex)
|
static int __compile_atom (xp_awk_rex_t* rex)
|
||||||
{
|
{
|
||||||
int n;
|
int n;
|
||||||
struct __code tmp;
|
struct __code_t tmp;
|
||||||
|
|
||||||
if (rex->ptn.curc.type == CT_EOF) return 0;
|
if (rex->ptn.curc.type == CT_EOF) return 0;
|
||||||
|
|
||||||
@ -301,9 +346,9 @@ static int __compile_atom (xp_awk_rex_t* rex)
|
|||||||
}
|
}
|
||||||
else if (rex->ptn.curc.value == XP_T('['))
|
else if (rex->ptn.curc.value == XP_T('['))
|
||||||
{
|
{
|
||||||
struct __code* cmd;
|
struct __code_t* cmd;
|
||||||
|
|
||||||
cmd = (struct __code*)&rex->code.buf[rex->code.size];
|
cmd = (struct __code_t*)&rex->code.buf[rex->code.size];
|
||||||
|
|
||||||
tmp.cmd = CMD_CHARSET;
|
tmp.cmd = CMD_CHARSET;
|
||||||
tmp.negate = 0;
|
tmp.negate = 0;
|
||||||
@ -349,17 +394,17 @@ static int __compile_atom (xp_awk_rex_t* rex)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __compile_charset (xp_awk_rex_t* rex, struct __code* cmd)
|
static int __compile_charset (xp_awk_rex_t* rex, struct __code_t* cmd)
|
||||||
{
|
{
|
||||||
xp_size_t zero = 0;
|
xp_size_t zero = 0;
|
||||||
xp_size_t* csc, * csl;
|
|
||||||
xp_size_t old_size;
|
xp_size_t old_size;
|
||||||
|
xp_size_t pos_csc, pos_csl;
|
||||||
|
|
||||||
old_size = rex->code.size;
|
old_size = rex->code.size;
|
||||||
|
|
||||||
csc = (xp_size_t*)&rex->code.buf[rex->code.size];
|
pos_csc = rex->code.size;
|
||||||
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
||||||
csl = (xp_size_t*)&rex->code.buf[rex->code.size];
|
pos_csl = rex->code.size;
|
||||||
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
ADD_CODE (rex, &zero, xp_sizeof(zero));
|
||||||
|
|
||||||
if (rex->ptn.curc.type == CT_NORMAL &&
|
if (rex->ptn.curc.type == CT_NORMAL &&
|
||||||
@ -372,20 +417,22 @@ static int __compile_charset (xp_awk_rex_t* rex, struct __code* cmd)
|
|||||||
while (rex->ptn.curc.type == CT_NORMAL)
|
while (rex->ptn.curc.type == CT_NORMAL)
|
||||||
{
|
{
|
||||||
xp_char_t c0, c1, c2;
|
xp_char_t c0, c1, c2;
|
||||||
|
int cc = 0;
|
||||||
|
|
||||||
c1 = rex->ptn.curc.value;
|
c1 = rex->ptn.curc.value;
|
||||||
NEXT_CHAR(rex, LEVEL_CHARSET);
|
NEXT_CHAR(rex, LEVEL_CHARSET);
|
||||||
|
|
||||||
#if 0
|
|
||||||
if (c1 == XP_T('[') &&
|
if (c1 == XP_T('[') &&
|
||||||
rex->ptn.curc.type == CT_NORMAL &&
|
rex->ptn.curc.type == CT_NORMAL &&
|
||||||
rex->ptn.curc.value == XP_T(':'))
|
rex->ptn.curc.value == XP_T(':'))
|
||||||
{
|
{
|
||||||
/* beginning of character class */
|
if (__compile_cclass (rex, &c1) == -1)
|
||||||
|
{
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
/* change c1 */
|
cc = cc | 1;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
c2 = c1;
|
c2 = c1;
|
||||||
if (rex->ptn.curc.type == CT_NORMAL &&
|
if (rex->ptn.curc.type == CT_NORMAL &&
|
||||||
@ -396,43 +443,105 @@ static int __compile_charset (xp_awk_rex_t* rex, struct __code* cmd)
|
|||||||
if (rex->ptn.curc.type == CT_NORMAL)
|
if (rex->ptn.curc.type == CT_NORMAL)
|
||||||
{
|
{
|
||||||
c2 = rex->ptn.curc.value;
|
c2 = rex->ptn.curc.value;
|
||||||
NEXT_CHAR(rex, LEVEL_CHARSET);
|
NEXT_CHAR (rex, LEVEL_CHARSET);
|
||||||
|
|
||||||
#if 0
|
|
||||||
if (c2 == XP_T('[') &&
|
if (c2 == XP_T('[') &&
|
||||||
rex->ptn.curc.type == CT_NORMAL &&
|
rex->ptn.curc.type == CT_NORMAL &&
|
||||||
rex->ptn.curc.value == XP_T(':'))
|
rex->ptn.curc.value == XP_T(':'))
|
||||||
{
|
{
|
||||||
/* beginning of character class */
|
if (__compile_cclass (rex, &c2) == -1)
|
||||||
/* change c2 */
|
{
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
cc = cc | 2;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
else cc = cc | 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (c1 == c2)
|
if (cc == 0 || cc == 4)
|
||||||
{
|
{
|
||||||
c0 = CHARSET_ONE;
|
if (c1 == c2)
|
||||||
|
{
|
||||||
|
c0 = CHARSET_ONE;
|
||||||
|
ADD_CODE (rex, &c0, xp_sizeof(c0));
|
||||||
|
ADD_CODE (rex, &c1, xp_sizeof(c1));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
c0 = CHARSET_RANGE;
|
||||||
|
ADD_CODE (rex, &c0, xp_sizeof(c0));
|
||||||
|
ADD_CODE (rex, &c1, xp_sizeof(c1));
|
||||||
|
ADD_CODE (rex, &c2, xp_sizeof(c2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (cc == 1)
|
||||||
|
{
|
||||||
|
c0 = CHARSET_CLASS;
|
||||||
ADD_CODE (rex, &c0, xp_sizeof(c0));
|
ADD_CODE (rex, &c0, xp_sizeof(c0));
|
||||||
ADD_CODE (rex, &c1, xp_sizeof(c1));
|
ADD_CODE (rex, &c1, xp_sizeof(c1));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
c0 = CHARSET_RANGE;
|
/* invalid range */
|
||||||
ADD_CODE (rex, &c0, xp_sizeof(c0));
|
xp_printf (XP_T("invalid character set range\n"));
|
||||||
ADD_CODE (rex, &c1, xp_sizeof(c1));
|
return -1;
|
||||||
ADD_CODE (rex, &c2, xp_sizeof(c2));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
(*csc) += 1;
|
CODEAT(rex,pos_csc,xp_size_t) += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
*csl = rex->code.size - old_size;
|
CODEAT(rex,pos_csl,xp_size_t) = rex->code.size - old_size;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __compile_bound (xp_awk_rex_t* rex, struct __code* cmd)
|
static int __compile_cclass (xp_awk_rex_t* rex, xp_char_t* cc)
|
||||||
|
{
|
||||||
|
const struct __char_class_t* ccp = __char_class;
|
||||||
|
xp_size_t len = rex->ptn.end - rex->ptn.curp;
|
||||||
|
|
||||||
|
while (ccp->name != XP_NULL)
|
||||||
|
{
|
||||||
|
if (__begin_with (rex->ptn.curp, len, ccp->name)) break;
|
||||||
|
ccp++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ccp->name == XP_NULL)
|
||||||
|
{
|
||||||
|
/* wrong class name */
|
||||||
|
xp_printf (XP_T("wrong class name\n"));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
rex->ptn.curp += xp_strlen(ccp->name);
|
||||||
|
|
||||||
|
NEXT_CHAR (rex, LEVEL_CHARSET);
|
||||||
|
if (rex->ptn.curc.type != CT_NORMAL ||
|
||||||
|
rex->ptn.curc.value != XP_T(':'))
|
||||||
|
{
|
||||||
|
xp_printf (XP_T(": expected\n"));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
NEXT_CHAR (rex, LEVEL_CHARSET);
|
||||||
|
|
||||||
|
/* ] happens to be the charset ender ] */
|
||||||
|
if (rex->ptn.curc.type != CT_SPECIAL ||
|
||||||
|
rex->ptn.curc.value != XP_T(']'))
|
||||||
|
{
|
||||||
|
xp_printf (XP_T("] expected\n"));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
NEXT_CHAR (rex, LEVEL_CHARSET);
|
||||||
|
|
||||||
|
*cc = (xp_char_t)(ccp - __char_class);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __compile_bound (xp_awk_rex_t* rex, struct __code_t* cmd)
|
||||||
{
|
{
|
||||||
if (rex->ptn.curc.type != CT_SPECIAL) return 0;
|
if (rex->ptn.curc.type != CT_SPECIAL) return 0;
|
||||||
|
|
||||||
@ -483,7 +592,7 @@ static int __compile_bound (xp_awk_rex_t* rex, struct __code* cmd)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __compile_range (xp_awk_rex_t* rex, struct __code* cmd)
|
static int __compile_range (xp_awk_rex_t* rex, struct __code_t* cmd)
|
||||||
{
|
{
|
||||||
xp_size_t bound;
|
xp_size_t bound;
|
||||||
|
|
||||||
@ -531,7 +640,6 @@ static int __next_char (xp_awk_rex_t* rex, int level)
|
|||||||
rex->ptn.curc.type = CT_NORMAL;
|
rex->ptn.curc.type = CT_NORMAL;
|
||||||
rex->ptn.curc.value = *rex->ptn.curp++;
|
rex->ptn.curc.value = *rex->ptn.curp++;
|
||||||
|
|
||||||
xp_printf (XP_T("[%c]\n"), rex->ptn.curc.value);
|
|
||||||
if (rex->ptn.curc.value == XP_T('\\'))
|
if (rex->ptn.curc.value == XP_T('\\'))
|
||||||
{
|
{
|
||||||
if (rex->ptn.curp >= rex->ptn.end)
|
if (rex->ptn.curp >= rex->ptn.end)
|
||||||
@ -542,14 +650,6 @@ xp_printf (XP_T("[%c]\n"), rex->ptn.curc.value);
|
|||||||
}
|
}
|
||||||
|
|
||||||
rex->ptn.curc.value = *rex->ptn.curp++;
|
rex->ptn.curc.value = *rex->ptn.curp++;
|
||||||
|
|
||||||
/* TODO: need this? */
|
|
||||||
/*
|
|
||||||
if (rex->ptn.curc.value == XP_T('n')) rex->ptn.curc = XP_T('\n');
|
|
||||||
else if (rex->ptn.curc.value == XP_T('r')) rex->ptn.curc = XP_T('\r');
|
|
||||||
else if (rex->ptn.curc.value == XP_T('t')) rex->ptn.curc = XP_T('\t');
|
|
||||||
*/
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -573,14 +673,6 @@ xp_printf (XP_T("[%c]\n"), rex->ptn.curc.value);
|
|||||||
}
|
}
|
||||||
else if (level == LEVEL_CHARSET)
|
else if (level == LEVEL_CHARSET)
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
if (rex->ptn.curc.value == XP_T('^') ||
|
|
||||||
rex->ptn.curc.value == XP_T('-') ||
|
|
||||||
rex->ptn.curc.value == XP_T(']'))
|
|
||||||
{
|
|
||||||
rex->ptn.curc.type = CT_SPECIAL;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
if (rex->ptn.curc.value == XP_T(']'))
|
if (rex->ptn.curc.value == XP_T(']'))
|
||||||
{
|
{
|
||||||
rex->ptn.curc.type = CT_SPECIAL;
|
rex->ptn.curc.type = CT_SPECIAL;
|
||||||
@ -641,8 +733,7 @@ static const xp_byte_t* __print_expression (const xp_byte_t* p)
|
|||||||
|
|
||||||
nb = *(xp_size_t*)p; p += xp_sizeof(nb);
|
nb = *(xp_size_t*)p; p += xp_sizeof(nb);
|
||||||
el = *(xp_size_t*)p; p += xp_sizeof(el);
|
el = *(xp_size_t*)p; p += xp_sizeof(el);
|
||||||
//xp_printf (XP_T("NA = %u, EL = %u\n"),
|
//xp_printf (XP_T("NA = %u, EL = %u\n"), (unsigned int)nb, (unsigned int)el);
|
||||||
// (unsigned int)nb, (unsigned int)el);
|
|
||||||
|
|
||||||
for (i = 0; i < nb; i++)
|
for (i = 0; i < nb; i++)
|
||||||
{
|
{
|
||||||
@ -659,8 +750,7 @@ static const xp_byte_t* __print_branch (const xp_byte_t* p)
|
|||||||
|
|
||||||
na = *(xp_size_t*)p; p += xp_sizeof(na);
|
na = *(xp_size_t*)p; p += xp_sizeof(na);
|
||||||
bl = *(xp_size_t*)p; p += xp_sizeof(bl);
|
bl = *(xp_size_t*)p; p += xp_sizeof(bl);
|
||||||
//xp_printf (XP_T("NA = %u, BL = %u\n"),
|
//xp_printf (XP_T("NA = %u, BL = %u\n"), (unsigned int) na, (unsigned int)bl);
|
||||||
// (unsigned int) na, (unsigned int)bl);
|
|
||||||
|
|
||||||
for (i = 0; i < na; i++)
|
for (i = 0; i < na; i++)
|
||||||
{
|
{
|
||||||
@ -672,7 +762,7 @@ static const xp_byte_t* __print_branch (const xp_byte_t* p)
|
|||||||
|
|
||||||
static const xp_byte_t* __print_atom (const xp_byte_t* p)
|
static const xp_byte_t* __print_atom (const xp_byte_t* p)
|
||||||
{
|
{
|
||||||
struct __code* cp = (struct __code*)p;
|
struct __code_t* cp = (struct __code_t*)p;
|
||||||
|
|
||||||
if (cp->cmd == CMD_BOL)
|
if (cp->cmd == CMD_BOL)
|
||||||
{
|
{
|
||||||
@ -731,6 +821,11 @@ static const xp_byte_t* __print_atom (const xp_byte_t* p)
|
|||||||
c2 = *(xp_char_t*)p;
|
c2 = *(xp_char_t*)p;
|
||||||
xp_printf (XP_T("%c-%c"), c1, c2);
|
xp_printf (XP_T("%c-%c"), c1, c2);
|
||||||
}
|
}
|
||||||
|
else if (c0 == CHARSET_CLASS)
|
||||||
|
{
|
||||||
|
c1 = *(xp_char_t*)p;
|
||||||
|
xp_printf (XP_T("[:%s:]"), __char_class[c1]);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
xp_printf (XP_T("FUCK: WRONG CHARSET CODE\n"));
|
xp_printf (XP_T("FUCK: WRONG CHARSET CODE\n"));
|
||||||
@ -761,3 +856,118 @@ xp_printf (XP_T("FUCK FUCK FUCK\n"));
|
|||||||
|
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static xp_bool_t __begin_with (
|
||||||
|
const xp_char_t* str, xp_size_t len, const xp_char_t* what)
|
||||||
|
{
|
||||||
|
const xp_char_t* end = str + len;
|
||||||
|
|
||||||
|
while (str < end)
|
||||||
|
{
|
||||||
|
if (*what == XP_T('\0')) return xp_true;
|
||||||
|
if (*what != *str) return xp_false;
|
||||||
|
|
||||||
|
str++; what++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*what == XP_T('\0')) return xp_true;
|
||||||
|
return xp_false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int xp_awk_rex_match (xp_awk_rex_t* rex,
|
||||||
|
const xp_char_t* str, xp_size_t len,
|
||||||
|
const xp_char_t** match, xp_size_t* match_len)
|
||||||
|
{
|
||||||
|
xp_size_t offset = 0;
|
||||||
|
|
||||||
|
while (offset <= len)
|
||||||
|
{
|
||||||
|
__match_expression (rex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void __match_expression (xp_awk_rex_t* rex)
|
||||||
|
{
|
||||||
|
xp_size_t nb, el, i;
|
||||||
|
|
||||||
|
nb = *(xp_size_t*)p; p += xp_sizeof(nb);
|
||||||
|
el = *(xp_size_t*)p; p += xp_sizeof(el);
|
||||||
|
|
||||||
|
for (i = 0; i < nb; i++)
|
||||||
|
{
|
||||||
|
__match_branch (rex);
|
||||||
|
}
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
void __match_branch (xp_awk_rex_t* rex)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void __match_atom (xp_awk_rex_t* rex)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
static const xp_byte_t* __print_branch (const xp_byte_t* p)
|
||||||
|
}
|
||||||
|
|
||||||
|
static xp_bool_t __cc_isalnum (xp_char_t c)
|
||||||
|
{
|
||||||
|
return xp_isalnum (c);
|
||||||
|
}
|
||||||
|
|
||||||
|
static xp_bool_t __cc_isalpha (xp_char_t c)
|
||||||
|
{
|
||||||
|
return xp_isalpha (c);
|
||||||
|
}
|
||||||
|
|
||||||
|
static xp_bool_t __cc_isblank (xp_char_t c)
|
||||||
|
{
|
||||||
|
return c == XP_T(' ') || c == XP_T('\t');
|
||||||
|
}
|
||||||
|
|
||||||
|
static xp_bool_t __cc_iscntrl (xp_char_t c)
|
||||||
|
{
|
||||||
|
return xp_iscntrl (c);
|
||||||
|
}
|
||||||
|
|
||||||
|
static xp_bool_t __cc_isdigit (xp_char_t c)
|
||||||
|
{
|
||||||
|
return xp_isdigit (c);
|
||||||
|
}
|
||||||
|
|
||||||
|
static xp_bool_t __cc_isgraph (xp_char_t c)
|
||||||
|
{
|
||||||
|
return xp_isgraph (c);
|
||||||
|
}
|
||||||
|
|
||||||
|
static xp_bool_t __cc_islower (xp_char_t c)
|
||||||
|
{
|
||||||
|
return xp_islower (c);
|
||||||
|
}
|
||||||
|
|
||||||
|
static xp_bool_t __cc_isprint (xp_char_t c)
|
||||||
|
{
|
||||||
|
return xp_isprint (c);
|
||||||
|
}
|
||||||
|
|
||||||
|
static xp_bool_t __cc_ispunct (xp_char_t c)
|
||||||
|
{
|
||||||
|
return xp_ispunct (c);
|
||||||
|
}
|
||||||
|
|
||||||
|
static xp_bool_t __cc_isspace (xp_char_t c)
|
||||||
|
{
|
||||||
|
return xp_isspace (c);
|
||||||
|
}
|
||||||
|
|
||||||
|
static xp_bool_t __cc_isupper (xp_char_t c)
|
||||||
|
{
|
||||||
|
return xp_isupper (c);
|
||||||
|
}
|
||||||
|
|
||||||
|
static xp_bool_t __cc_isxdigit (xp_char_t c)
|
||||||
|
{
|
||||||
|
return xp_isxdigit (c);
|
||||||
|
}
|
||||||
|
14
ase/awk/sa.h
14
ase/awk/sa.h
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: sa.h,v 1.26 2006-07-06 15:54:41 bacon Exp $
|
* $Id: sa.h,v 1.27 2006-07-22 16:40:39 bacon Exp $
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _XP_AWK_SA_H_
|
#ifndef _XP_AWK_SA_H_
|
||||||
@ -44,6 +44,12 @@
|
|||||||
#define xp_isalpha isalpha
|
#define xp_isalpha isalpha
|
||||||
#define xp_isalnum isalnum
|
#define xp_isalnum isalnum
|
||||||
#define xp_isspace isspace
|
#define xp_isspace isspace
|
||||||
|
#define xp_iscntrl iscntrl
|
||||||
|
#define xp_isgraph isgraph
|
||||||
|
#define xp_islower islower
|
||||||
|
#define xp_isupper isupper
|
||||||
|
#define xp_isprint isprint
|
||||||
|
#define xp_ispunct ispunct
|
||||||
#define xp_toupper toupper
|
#define xp_toupper toupper
|
||||||
#define xp_tolower tolower
|
#define xp_tolower tolower
|
||||||
#else
|
#else
|
||||||
@ -52,6 +58,12 @@
|
|||||||
#define xp_isalpha iswalpha
|
#define xp_isalpha iswalpha
|
||||||
#define xp_isalnum iswalnum
|
#define xp_isalnum iswalnum
|
||||||
#define xp_isspace iswspace
|
#define xp_isspace iswspace
|
||||||
|
#define xp_iscntrl iswcntrl
|
||||||
|
#define xp_isgraph iswgraph
|
||||||
|
#define xp_islower iswlower
|
||||||
|
#define xp_isupper iswupper
|
||||||
|
#define xp_isprint iswprint
|
||||||
|
#define xp_ispunct iswpunct
|
||||||
#define xp_toupper towupper
|
#define xp_toupper towupper
|
||||||
#define xp_tolower towlower
|
#define xp_tolower towlower
|
||||||
#endif
|
#endif
|
||||||
|
@ -23,7 +23,10 @@ int xp_main (int argc, const xp_char_t* argv[])
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
ptn = XP_T("^he.llo(jo(in|kk)s|com)+h*e{1,40}abc|[^abc][de-f]|^he.llo(jo(in|kk)s|com)+h*e{1,40}abc|[^abc][de-f]");
|
//ptn = XP_T("|");
|
||||||
|
//ptn = XP_T("^he.llo(jo(in|kk)s|com)+h*e{1,40}abc|[^abc][de-f]|^he.llo(jo(in|kk)s|com)+h*e{1,40}abc|[^abc][de-f]");
|
||||||
|
//ptn = XP_T("^he.llo(jo(in|kk)s|com)+h*e{1,40}abc|[^abc][de-f]");
|
||||||
|
ptn = XP_T("^he.llo(jo(in|kk)s|com)|[^x[:space:][:alpha:]j][^abc][de-f]|^he.llo(jo(in|kk)s|com)|[^x[:space:][:alpha:]j][^abc][de-f]");
|
||||||
if (xp_awk_rex_compile (rex, ptn, xp_strlen(ptn)) == -1)
|
if (xp_awk_rex_compile (rex, ptn, xp_strlen(ptn)) == -1)
|
||||||
{
|
{
|
||||||
xp_printf (XP_T("cannot compile pattern...\n"));
|
xp_printf (XP_T("cannot compile pattern...\n"));
|
||||||
|
Loading…
Reference in New Issue
Block a user