qse/ase/awk/rex.c

1881 lines
41 KiB
C
Raw Normal View History

2006-07-17 06:21:39 +00:00
/*
2007-03-06 14:58:00 +00:00
* $Id: rex.c,v 1.79 2007-03-06 14:51:53 bacon Exp $
2007-02-03 10:47:41 +00:00
*
* {License}
2006-07-17 06:21:39 +00:00
*/
2006-10-24 04:10:12 +00:00
#include <ase/awk/awk_i.h>
2006-07-17 06:21:39 +00:00
2007-02-28 11:00:32 +00:00
#ifdef DEBUG_REX
#include <ase/utl/stdio.h>
#endif
2006-07-20 03:41:00 +00:00
enum
{
2006-07-21 05:05:03 +00:00
CT_EOF,
CT_SPECIAL,
CT_NORMAL
2006-07-20 03:41:00 +00:00
};
enum
{
2006-07-21 05:05:03 +00:00
LEVEL_TOP,
LEVEL_CHARSET,
2006-11-19 14:55:20 +00:00
LEVEL_RANGE
2006-07-20 03:41:00 +00:00
};
2006-07-17 14:27:09 +00:00
enum
{
2006-07-20 16:21:54 +00:00
CMD_BOL,
CMD_EOL,
2006-07-17 14:27:09 +00:00
CMD_ANY_CHAR,
2006-07-20 16:21:54 +00:00
CMD_ORD_CHAR,
2006-07-21 05:05:03 +00:00
CMD_CHARSET,
2006-07-20 16:21:54 +00:00
CMD_GROUP
2006-07-17 14:27:09 +00:00
};
enum
{
2006-07-21 05:05:03 +00:00
CHARSET_ONE,
CHARSET_RANGE,
CHARSET_CLASS
};
enum
{
CHARSET_CLASS_PUNCT,
CHARSET_CLASS_SPACE,
CHARSET_CLASS_DIGIT,
CHARSET_CLASS_ALNUM
2006-07-17 14:27:09 +00:00
};
2006-08-16 15:21:17 +00:00
#define DEF_CODE_CAPA 512
2006-07-19 11:45:24 +00:00
#define BOUND_MIN 0
2006-10-24 04:10:12 +00:00
#define BOUND_MAX (ASE_TYPE_MAX(ase_size_t))
2006-07-19 11:45:24 +00:00
2007-02-28 11:04:16 +00:00
typedef struct builder_t builder_t;
typedef struct matcher_t matcher_t;
2007-02-28 14:46:08 +00:00
typedef struct match_t match_t;
2007-02-18 16:21:10 +00:00
2007-03-01 04:31:27 +00:00
typedef struct code_t code_t;
2007-03-01 07:43:54 +00:00
typedef struct rhdr_t rhdr_t;
typedef struct bhdr_t bhdr_t;
2007-03-01 04:31:27 +00:00
typedef struct cshdr_t cshdr_t;
2006-07-19 11:45:24 +00:00
2007-03-01 04:31:27 +00:00
struct builder_t
{
2006-10-24 04:10:12 +00:00
ase_awk_t* awk;
2006-09-01 03:44:51 +00:00
2006-07-26 02:25:47 +00:00
struct
{
2006-10-24 04:10:12 +00:00
const ase_char_t* ptr;
const ase_char_t* end;
const ase_char_t* curp;
2006-07-26 02:25:47 +00:00
struct
{
int type;
2006-10-24 04:10:12 +00:00
ase_char_t value;
2006-07-26 02:25:47 +00:00
} curc;
} ptn;
struct
{
2006-10-24 04:10:12 +00:00
ase_byte_t* buf;
ase_size_t size;
ase_size_t capa;
2006-07-26 02:25:47 +00:00
} code;
2006-08-16 08:55:43 +00:00
struct
{
int max;
int cur;
} depth;
2006-07-26 02:25:47 +00:00
int errnum;
2007-03-01 04:31:27 +00:00
};
2006-07-26 02:25:47 +00:00
2007-03-01 04:31:27 +00:00
struct matcher_t
{
2006-10-24 04:10:12 +00:00
ase_awk_t* awk;
2006-09-01 03:44:51 +00:00
2006-07-26 05:19:46 +00:00
struct
{
struct
{
2006-10-24 04:10:12 +00:00
const ase_char_t* ptr;
const ase_char_t* end;
2006-07-26 05:19:46 +00:00
} str;
} match;
2006-08-16 08:55:43 +00:00
struct
{
int max;
int cur;
} depth;
2006-09-10 15:50:34 +00:00
int ignorecase;
2006-07-26 05:19:46 +00:00
int errnum;
2007-03-01 04:31:27 +00:00
};
2006-07-26 05:19:46 +00:00
2007-03-01 04:31:27 +00:00
struct match_t
{
2006-10-24 04:10:12 +00:00
const ase_char_t* match_ptr;
2006-07-24 11:58:55 +00:00
2006-10-24 04:10:12 +00:00
ase_bool_t matched;
ase_size_t match_len;
2006-07-23 16:31:20 +00:00
2006-10-24 04:10:12 +00:00
const ase_byte_t* branch;
const ase_byte_t* branch_end;
2007-03-01 04:31:27 +00:00
};
2007-03-06 14:16:53 +00:00
#include <ase/cmn/pack.h>
2007-03-01 04:31:27 +00:00
ASE_BEGIN_PACKED_STRUCT (code_t)
/*ase_byte_t cmd;*/
short cmd;
short negate; /* only for CMD_CHARSET */
ase_size_t lbound;
ase_size_t ubound;
2007-02-18 16:49:03 +00:00
ASE_END_PACKED_STRUCT ()
2007-02-18 16:21:10 +00:00
2007-03-01 07:43:54 +00:00
/* compiled regular expression header */
ASE_BEGIN_PACKED_STRUCT (rhdr_t)
ase_size_t nb; /* number of branches */
ase_size_t el; /* expression length in bytes */
ASE_END_PACKED_STRUCT ()
/* branch header */
ASE_BEGIN_PACKED_STRUCT (bhdr_t)
ase_size_t na; /* number of atoms */
ase_size_t bl; /* branch length in bytes */
ASE_END_PACKED_STRUCT ()
/* character set header */
2007-02-28 14:46:08 +00:00
ASE_BEGIN_PACKED_STRUCT (cshdr_t)
2007-03-01 04:31:27 +00:00
ase_size_t csc; /* count */
ase_size_t csl; /* length */
2007-02-28 14:46:08 +00:00
ASE_END_PACKED_STRUCT ()
2007-03-06 14:16:53 +00:00
#include <ase/cmn/unpack.h>
2006-07-23 16:31:20 +00:00
2006-10-24 04:10:12 +00:00
typedef const ase_byte_t* (*atom_matcher_t) (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat);
2006-07-24 11:58:55 +00:00
2006-07-21 05:05:03 +00:00
#define NCHARS_REMAINING(rex) ((rex)->ptn.end - (rex)->ptn.curp)
2006-07-20 03:41:00 +00:00
#define NEXT_CHAR(rex,level) \
do { if (__next_char(rex,level) == -1) return -1; } while (0)
2006-07-19 11:45:24 +00:00
#define ADD_CODE(rex,data,len) \
do { if (__add_code(rex,data,len) == -1) return -1; } while (0)
2007-02-28 11:04:16 +00:00
static int __build_pattern (builder_t* rex);
static int __build_pattern0 (builder_t* rex);
static int __build_branch (builder_t* rex);
static int __build_atom (builder_t* rex);
2007-03-01 04:31:27 +00:00
static int __build_charset (builder_t* rex, code_t* cmd);
static int __build_occurrences (builder_t* rex, code_t* cmd);
2007-02-28 11:04:16 +00:00
static int __build_cclass (builder_t* rex, ase_char_t* cc);
2007-03-01 04:31:27 +00:00
static int __build_range (builder_t* rex, code_t* cmd);
2007-02-28 11:04:16 +00:00
static int __next_char (builder_t* rex, int level);
static int __add_code (builder_t* rex, void* data, ase_size_t len);
2006-07-19 11:45:24 +00:00
2006-10-24 04:10:12 +00:00
static ase_bool_t __begin_with (
const ase_char_t* str, ase_size_t len, const ase_char_t* what);
static const ase_byte_t* __match_pattern (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat);
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_branch (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat);
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_branch_body (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat);
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_branch_body0 (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat);
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_atom (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat);
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_bol (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat);
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_eol (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat);
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_any_char (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat);
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_ord_char (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat);
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_charset (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat);
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_group (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat);
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_occurrences (
2007-02-28 11:04:16 +00:00
matcher_t* matcher, ase_size_t si, const ase_byte_t* p,
2007-02-28 14:46:08 +00:00
ase_size_t lbound, ase_size_t ubound, match_t* mat);
2006-10-24 04:10:12 +00:00
static ase_bool_t __test_charset (
2007-02-28 11:04:16 +00:00
matcher_t* matcher, const ase_byte_t* p, ase_size_t csc, ase_char_t c);
2006-10-24 04:10:12 +00:00
2007-03-03 13:22:01 +00:00
static ase_bool_t cc_isalnum (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_isalpha (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_isblank (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_iscntrl (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_isdigit (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_isgraph (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_islower (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_isprint (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_ispunct (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_isspace (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_isupper (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_isxdigit (ase_awk_t* awk, ase_char_t c);
2006-10-24 04:10:12 +00:00
2007-02-28 11:00:32 +00:00
static const ase_byte_t* __print_pattern (ase_awk_t* awk, const ase_byte_t* p);
static const ase_byte_t* __print_branch (ase_awk_t* awk, const ase_byte_t* p);
static const ase_byte_t* __print_atom (ase_awk_t* awk, const ase_byte_t* p);
2006-07-24 16:23:19 +00:00
2006-09-01 07:18:40 +00:00
struct __char_class_t
2006-07-22 16:40:39 +00:00
{
2006-10-24 04:10:12 +00:00
const ase_char_t* name;
ase_size_t name_len;
ase_bool_t (*func) (ase_awk_t* awk, ase_char_t c);
2006-09-01 07:18:40 +00:00
};
2006-07-22 16:40:39 +00:00
2006-09-01 07:18:40 +00:00
static struct __char_class_t __char_class[] =
2006-07-22 16:40:39 +00:00
{
2007-03-03 13:22:01 +00:00
{ ASE_T("alnum"), 5, cc_isalnum },
{ ASE_T("alpha"), 5, cc_isalpha },
{ ASE_T("blank"), 5, cc_isblank },
{ ASE_T("cntrl"), 5, cc_iscntrl },
{ ASE_T("digit"), 5, cc_isdigit },
{ ASE_T("graph"), 5, cc_isgraph },
{ ASE_T("lower"), 5, cc_islower },
{ ASE_T("print"), 5, cc_isprint },
{ ASE_T("punct"), 5, cc_ispunct },
{ ASE_T("space"), 5, cc_isspace },
{ ASE_T("upper"), 5, cc_isupper },
{ ASE_T("xdigit"), 6, cc_isxdigit },
2006-07-22 16:40:39 +00:00
/*
2007-03-03 13:22:01 +00:00
{ ASE_T("arabic"), 6, cc_isarabic },
{ ASE_T("chinese"), 7, cc_ischinese },
{ ASE_T("english"), 7, cc_isenglish },
{ ASE_T("japanese"), 8, cc_isjapanese },
{ ASE_T("korean"), 6, cc_iskorean },
{ ASE_T("thai"), 4, cc_isthai },
2006-07-22 16:40:39 +00:00
*/
2006-10-24 04:10:12 +00:00
{ ASE_NULL, 0, ASE_NULL }
2006-07-22 16:40:39 +00:00
};
2006-10-24 04:10:12 +00:00
void* ase_awk_buildrex (
ase_awk_t* awk, const ase_char_t* ptn, ase_size_t len, int* errnum)
2006-07-26 02:25:47 +00:00
{
2007-02-28 11:04:16 +00:00
builder_t builder;
2006-07-26 02:25:47 +00:00
2006-09-01 03:44:51 +00:00
builder.awk = awk;
2006-08-16 15:21:17 +00:00
builder.code.capa = DEF_CODE_CAPA;
2006-07-26 05:19:46 +00:00
builder.code.size = 0;
2006-10-24 04:10:12 +00:00
builder.code.buf = (ase_byte_t*)
ASE_AWK_MALLOC (builder.awk, builder.code.capa);
if (builder.code.buf == ASE_NULL)
2006-07-26 16:43:35 +00:00
{
2006-10-24 04:10:12 +00:00
*errnum = ASE_AWK_ENOMEM;
return ASE_NULL;
2006-07-26 16:43:35 +00:00
}
2006-07-26 02:25:47 +00:00
2006-07-26 05:19:46 +00:00
builder.ptn.ptr = ptn;
builder.ptn.end = builder.ptn.ptr + len;
builder.ptn.curp = builder.ptn.ptr;
2006-07-26 02:25:47 +00:00
2006-07-26 05:19:46 +00:00
builder.ptn.curc.type = CT_EOF;
2006-10-24 04:10:12 +00:00
builder.ptn.curc.value = ASE_T('\0');
2006-07-26 02:25:47 +00:00
2007-01-06 15:45:50 +00:00
builder.depth.max = awk->rex.depth.max.build;
2006-08-16 08:55:43 +00:00
builder.depth.cur = 0;
2006-07-26 15:00:01 +00:00
if (__next_char (&builder, LEVEL_TOP) == -1)
{
2006-10-24 04:10:12 +00:00
if (errnum != ASE_NULL) *errnum = builder.errnum;
ASE_AWK_FREE (builder.awk, builder.code.buf);
return ASE_NULL;
2006-07-26 15:00:01 +00:00
}
2006-07-26 02:25:47 +00:00
2006-07-26 05:19:46 +00:00
if (__build_pattern (&builder) == -1)
2006-07-26 02:25:47 +00:00
{
2006-10-24 04:10:12 +00:00
if (errnum != ASE_NULL) *errnum = builder.errnum;
ASE_AWK_FREE (builder.awk, builder.code.buf);
return ASE_NULL;
2006-07-26 02:25:47 +00:00
}
2006-07-26 05:19:46 +00:00
if (builder.ptn.curc.type != CT_EOF)
2006-07-26 02:25:47 +00:00
{
2007-02-11 04:44:39 +00:00
if (errnum != ASE_NULL)
{
if (builder.ptn.curc.type == CT_SPECIAL &&
builder.ptn.curc.value == ASE_T(')'))
{
*errnum = ASE_AWK_EREXUNBALPAR;
}
else
{
*errnum = ASE_AWK_EREXGARBAGE;
}
}
2006-10-24 04:10:12 +00:00
ASE_AWK_FREE (builder.awk, builder.code.buf);
return ASE_NULL;
2006-07-26 02:25:47 +00:00
}
2006-07-26 05:19:46 +00:00
return builder.code.buf;
2006-07-26 02:25:47 +00:00
}
2006-10-24 04:10:12 +00:00
int ase_awk_matchrex (
ase_awk_t* awk, void* code, int option,
const ase_char_t* str, ase_size_t len,
const ase_char_t** match_ptr, ase_size_t* match_len, int* errnum)
2006-07-26 02:25:47 +00:00
{
2007-02-28 11:04:16 +00:00
matcher_t matcher;
2007-02-28 14:46:08 +00:00
match_t mat;
2006-10-24 04:10:12 +00:00
ase_size_t offset = 0;
/*const ase_char_t* match_ptr_zero = ASE_NULL;*/
2006-07-26 02:25:47 +00:00
2006-09-01 03:44:51 +00:00
matcher.awk = awk;
2006-07-26 02:25:47 +00:00
/* store the source string */
2006-07-26 05:19:46 +00:00
matcher.match.str.ptr = str;
matcher.match.str.end = str + len;
2006-07-26 02:25:47 +00:00
2007-01-06 15:45:50 +00:00
matcher.depth.max = awk->rex.depth.max.match;
2006-08-16 08:55:43 +00:00
matcher.depth.cur = 0;
2006-10-24 04:10:12 +00:00
matcher.ignorecase = (option & ASE_AWK_REX_IGNORECASE)? 1: 0;
2006-08-16 08:55:43 +00:00
2006-10-24 04:10:12 +00:00
mat.matched = ase_false;
2007-02-04 04:40:33 +00:00
/* TODO: should it allow an offset here??? */
2006-07-26 02:25:47 +00:00
mat.match_ptr = str + offset;
2007-02-04 04:40:33 +00:00
/*while (mat.match_ptr < matcher.match.str.end)*/
while (mat.match_ptr <= matcher.match.str.end)
2006-07-26 02:25:47 +00:00
{
2006-10-24 04:10:12 +00:00
if (__match_pattern (&matcher, code, &mat) == ASE_NULL)
2006-07-26 16:43:35 +00:00
{
2006-10-24 04:10:12 +00:00
if (errnum != ASE_NULL) *errnum = matcher.errnum;
2006-07-26 16:43:35 +00:00
return -1;
}
2006-07-26 02:25:47 +00:00
if (mat.matched)
{
2006-09-05 15:18:36 +00:00
/*
if (mat.match_len == 0)
{
2006-10-24 04:10:12 +00:00
if (match_ptr_zero == ASE_NULL)
2006-09-05 15:18:36 +00:00
match_ptr_zero = mat.match_ptr;
mat.match_ptr++;
continue;
}
*/
2006-10-24 04:10:12 +00:00
if (match_ptr != ASE_NULL) *match_ptr = mat.match_ptr;
if (match_len != ASE_NULL) *match_len = mat.match_len;
2006-09-05 15:18:36 +00:00
2006-10-24 04:10:12 +00:00
/*match_ptr_zero = ASE_NULL;*/
2006-07-26 02:25:47 +00:00
break;
}
mat.match_ptr++;
}
2006-09-05 15:18:36 +00:00
/*
2006-10-24 04:10:12 +00:00
if (match_ptr_zero != ASE_NULL)
2006-09-05 15:18:36 +00:00
{
2006-10-24 04:10:12 +00:00
if (match_ptr != ASE_NULL) *match_ptr = match_ptr_zero;
if (match_len != ASE_NULL) *match_len = 0;
2006-09-05 15:18:36 +00:00
return 1;
}
*/
2006-07-26 02:25:47 +00:00
return (mat.matched)? 1: 0;
}
2006-10-24 04:10:12 +00:00
void ase_awk_freerex (ase_awk_t* awk, void* code)
2006-07-26 02:25:47 +00:00
{
2007-03-06 14:58:00 +00:00
ASE_ASSERT (code != ASE_NULL);
2006-10-24 04:10:12 +00:00
ASE_AWK_FREE (awk, code);
2006-07-26 02:25:47 +00:00
}
2006-10-24 04:10:12 +00:00
ase_bool_t ase_awk_isemptyrex (ase_awk_t* awk, void* code)
2006-08-30 07:15:14 +00:00
{
2007-03-01 07:43:54 +00:00
rhdr_t* rhdr = (rhdr_t*) code;
2007-03-06 14:58:00 +00:00
ASE_ASSERT (rhdr != ASE_NULL);
2006-08-30 07:15:14 +00:00
2006-10-22 12:39:30 +00:00
/* an empty regular expression look like:
* | expression |
2006-08-30 07:15:14 +00:00
* | header | branch |
* | | branch header |
* | NB(1) | EL(16) | NA(1) | BL(8) | */
2007-03-01 07:43:54 +00:00
return (rhdr->nb == 1 &&
rhdr->el == ASE_SIZEOF(ase_size_t)*4)? ase_true: ase_false;
2006-08-30 07:15:14 +00:00
}
2007-02-28 11:04:16 +00:00
static int __build_pattern (builder_t* builder)
2006-08-16 08:55:43 +00:00
{
int n;
if (builder->depth.max > 0 && builder->depth.cur >= builder->depth.max)
{
2007-03-03 13:22:01 +00:00
builder->errnum = ASE_AWK_EREXRECUR;
2006-08-16 08:55:43 +00:00
return -1;
}
builder->depth.cur++;
n = __build_pattern0 (builder);
builder->depth.cur--;
return n;
}
2007-02-28 11:04:16 +00:00
static int __build_pattern0 (builder_t* builder)
2006-07-18 15:28:26 +00:00
{
2006-10-24 04:10:12 +00:00
ase_size_t zero = 0;
ase_size_t old_size;
2007-03-01 07:48:51 +00:00
ase_size_t pos_nb;
2007-03-01 07:43:54 +00:00
rhdr_t* rhdr;
2006-07-20 03:41:00 +00:00
int n;
2006-07-19 15:58:01 +00:00
2006-07-26 05:19:46 +00:00
old_size = builder->code.size;
2006-07-20 16:21:54 +00:00
2006-07-20 03:41:00 +00:00
/* secure space for header and set the header fields to zero */
2006-07-26 05:19:46 +00:00
pos_nb = builder->code.size;
2006-11-29 02:54:17 +00:00
ADD_CODE (builder, &zero, ASE_SIZEOF(zero));
ADD_CODE (builder, &zero, ASE_SIZEOF(zero));
2006-07-19 15:58:01 +00:00
2006-07-20 03:41:00 +00:00
/* handle the first branch */
2006-07-26 05:19:46 +00:00
n = __build_branch (builder);
2006-07-20 03:41:00 +00:00
if (n == -1) return -1;
if (n == 0)
{
2006-07-24 16:23:19 +00:00
/* if the pattern is empty, the control reaches here */
2006-07-20 03:41:00 +00:00
return 0;
}
2006-07-18 15:28:26 +00:00
2007-03-01 07:43:54 +00:00
rhdr = (rhdr_t*)&builder->code.buf[pos_nb];
rhdr->nb++;
2006-07-19 15:58:01 +00:00
2006-07-20 03:41:00 +00:00
/* handle subsequent branches if any */
2006-07-26 05:19:46 +00:00
while (builder->ptn.curc.type == CT_SPECIAL &&
2006-10-24 04:10:12 +00:00
builder->ptn.curc.value == ASE_T('|'))
2006-07-18 15:28:26 +00:00
{
2006-07-26 05:19:46 +00:00
NEXT_CHAR (builder, LEVEL_TOP);
2006-07-18 15:28:26 +00:00
2006-07-26 05:19:46 +00:00
n = __build_branch(builder);
2006-07-20 03:41:00 +00:00
if (n == -1) return -1;
if (n == 0)
{
/* if the pattern ends with a vertical bar(|),
2006-07-22 16:40:39 +00:00
* this block can be reached. however, such a
* pattern is highly discouraged */
2006-07-20 03:41:00 +00:00
break;
}
2006-07-18 15:28:26 +00:00
2007-03-01 07:43:54 +00:00
rhdr = (rhdr_t*)&builder->code.buf[pos_nb];
rhdr->nb++;
2006-07-18 15:28:26 +00:00
}
2006-07-19 11:45:24 +00:00
2007-03-01 07:43:54 +00:00
rhdr = (rhdr_t*)&builder->code.buf[pos_nb];
rhdr->el = builder->code.size - old_size;
2006-07-20 03:41:00 +00:00
return 1;
2006-07-18 15:28:26 +00:00
}
2007-02-28 11:04:16 +00:00
static int __build_branch (builder_t* builder)
2006-07-18 15:28:26 +00:00
{
2006-07-19 11:45:24 +00:00
int n;
2006-10-24 04:10:12 +00:00
ase_size_t zero = 0;
ase_size_t old_size;
2007-03-01 07:48:51 +00:00
ase_size_t pos_na;
2007-03-01 04:31:27 +00:00
code_t* cmd;
2007-03-01 07:43:54 +00:00
bhdr_t* bhdr;
2006-07-19 15:58:01 +00:00
2006-07-26 05:19:46 +00:00
old_size = builder->code.size;
2006-07-19 15:58:01 +00:00
2006-07-26 05:19:46 +00:00
pos_na = builder->code.size;
2006-11-29 02:54:17 +00:00
ADD_CODE (builder, &zero, ASE_SIZEOF(zero));
ADD_CODE (builder, &zero, ASE_SIZEOF(zero));
2006-07-18 15:28:26 +00:00
2006-07-20 03:41:00 +00:00
while (1)
2006-07-18 15:28:26 +00:00
{
2007-03-01 04:31:27 +00:00
cmd = (code_t*)&builder->code.buf[builder->code.size];
2006-07-20 16:21:54 +00:00
2006-07-26 05:19:46 +00:00
n = __build_atom (builder);
2006-07-19 15:58:01 +00:00
if (n == -1)
{
2006-07-26 05:19:46 +00:00
builder->code.size = old_size;
2006-07-19 15:58:01 +00:00
return -1;
}
2006-07-18 15:28:26 +00:00
2006-07-20 03:41:00 +00:00
if (n == 0) break; /* no atom */
2006-07-19 11:45:24 +00:00
2006-09-05 15:18:36 +00:00
n = __build_occurrences (builder, cmd);
2006-07-20 03:41:00 +00:00
if (n == -1)
2006-07-18 15:28:26 +00:00
{
2006-07-26 05:19:46 +00:00
builder->code.size = old_size;
2006-07-20 03:41:00 +00:00
return -1;
2006-07-18 15:28:26 +00:00
}
2006-07-20 03:41:00 +00:00
/* n == 0 no bound character. just continue */
2006-09-05 15:18:36 +00:00
/* n == 1 bound has been applied by build_occurrences */
2006-07-20 16:21:54 +00:00
2007-03-01 07:43:54 +00:00
bhdr = (bhdr_t*)&builder->code.buf[pos_na];
bhdr->na++;
2006-07-19 11:45:24 +00:00
}
2007-03-01 07:43:54 +00:00
bhdr = (bhdr_t*)&builder->code.buf[pos_na];
bhdr->bl = builder->code.size - old_size;
2006-07-26 05:19:46 +00:00
return (builder->code.size == old_size)? 0: 1;
2006-07-19 11:45:24 +00:00
}
2007-02-28 11:04:16 +00:00
static int __build_atom (builder_t* builder)
2006-07-19 11:45:24 +00:00
{
2006-07-20 16:21:54 +00:00
int n;
2007-03-01 04:31:27 +00:00
code_t tmp;
2006-07-19 11:45:24 +00:00
2006-07-26 05:19:46 +00:00
if (builder->ptn.curc.type == CT_EOF) return 0;
2006-07-20 16:21:54 +00:00
2006-07-26 05:19:46 +00:00
if (builder->ptn.curc.type == CT_SPECIAL)
2006-07-19 11:45:24 +00:00
{
2006-10-24 04:10:12 +00:00
if (builder->ptn.curc.value == ASE_T('('))
2006-07-20 03:41:00 +00:00
{
2006-07-20 16:21:54 +00:00
tmp.cmd = CMD_GROUP;
2006-07-21 05:05:03 +00:00
tmp.negate = 0;
2006-07-20 16:21:54 +00:00
tmp.lbound = 1;
tmp.ubound = 1;
2006-11-29 02:54:17 +00:00
ADD_CODE (builder, &tmp, ASE_SIZEOF(tmp));
2006-07-21 05:05:03 +00:00
2006-07-26 05:19:46 +00:00
NEXT_CHAR (builder, LEVEL_TOP);
2006-07-20 16:21:54 +00:00
2006-07-26 05:19:46 +00:00
n = __build_pattern (builder);
2006-07-20 03:41:00 +00:00
if (n == -1) return -1;
2006-07-26 05:19:46 +00:00
if (builder->ptn.curc.type != CT_SPECIAL ||
2006-10-24 04:10:12 +00:00
builder->ptn.curc.value != ASE_T(')'))
2006-07-20 03:41:00 +00:00
{
2006-10-24 04:10:12 +00:00
builder->errnum = ASE_AWK_EREXRPAREN;
2006-07-20 03:41:00 +00:00
return -1;
}
}
2006-10-24 04:10:12 +00:00
else if (builder->ptn.curc.value == ASE_T('^'))
2006-07-19 11:45:24 +00:00
{
2006-07-20 16:21:54 +00:00
tmp.cmd = CMD_BOL;
2006-07-21 05:05:03 +00:00
tmp.negate = 0;
2006-07-19 11:45:24 +00:00
tmp.lbound = 1;
tmp.ubound = 1;
2006-11-29 02:54:17 +00:00
ADD_CODE (builder, &tmp, ASE_SIZEOF(tmp));
2006-07-19 11:45:24 +00:00
}
2006-10-24 04:10:12 +00:00
else if (builder->ptn.curc.value == ASE_T('$'))
2006-07-19 11:45:24 +00:00
{
2006-07-20 16:21:54 +00:00
tmp.cmd = CMD_EOL;
2006-07-21 05:05:03 +00:00
tmp.negate = 0;
2006-07-19 11:45:24 +00:00
tmp.lbound = 1;
tmp.ubound = 1;
2006-11-29 02:54:17 +00:00
ADD_CODE (builder, &tmp, ASE_SIZEOF(tmp));
2006-07-19 11:45:24 +00:00
}
2006-10-24 04:10:12 +00:00
else if (builder->ptn.curc.value == ASE_T('.'))
2006-07-18 15:28:26 +00:00
{
2006-07-19 11:45:24 +00:00
tmp.cmd = CMD_ANY_CHAR;
2006-07-21 05:05:03 +00:00
tmp.negate = 0;
2006-07-19 11:45:24 +00:00
tmp.lbound = 1;
tmp.ubound = 1;
2006-11-29 02:54:17 +00:00
ADD_CODE (builder, &tmp, ASE_SIZEOF(tmp));
2006-07-18 15:28:26 +00:00
}
2006-10-24 04:10:12 +00:00
else if (builder->ptn.curc.value == ASE_T('['))
2006-07-18 15:28:26 +00:00
{
2007-03-01 04:31:27 +00:00
code_t* cmd;
2006-07-21 05:05:03 +00:00
2007-03-01 04:31:27 +00:00
cmd = (code_t*)&builder->code.buf[builder->code.size];
2006-07-21 05:05:03 +00:00
tmp.cmd = CMD_CHARSET;
tmp.negate = 0;
tmp.lbound = 1;
tmp.ubound = 1;
2006-11-29 02:54:17 +00:00
ADD_CODE (builder, &tmp, ASE_SIZEOF(tmp));
2006-07-21 05:05:03 +00:00
2006-07-26 05:19:46 +00:00
NEXT_CHAR (builder, LEVEL_CHARSET);
2006-07-21 05:05:03 +00:00
2006-07-26 05:19:46 +00:00
n = __build_charset (builder, cmd);
2006-07-21 05:05:03 +00:00
if (n == -1) return -1;
2007-03-06 14:58:00 +00:00
ASE_ASSERT (n != 0);
2006-07-21 05:05:03 +00:00
2006-07-26 05:19:46 +00:00
if (builder->ptn.curc.type != CT_SPECIAL ||
2006-10-24 04:10:12 +00:00
builder->ptn.curc.value != ASE_T(']'))
2006-07-21 05:05:03 +00:00
{
2006-10-24 04:10:12 +00:00
builder->errnum = ASE_AWK_EREXRBRACKET;
2006-07-21 05:05:03 +00:00
return -1;
}
2006-07-19 11:45:24 +00:00
}
2006-07-20 16:21:54 +00:00
else return 0;
2006-07-19 11:45:24 +00:00
2006-07-26 05:19:46 +00:00
NEXT_CHAR (builder, LEVEL_TOP);
2006-07-20 03:41:00 +00:00
return 1;
}
else
{
2007-03-06 14:58:00 +00:00
ASE_ASSERT (builder->ptn.curc.type == CT_NORMAL);
2006-07-20 16:21:54 +00:00
2006-07-20 03:41:00 +00:00
tmp.cmd = CMD_ORD_CHAR;
2006-07-21 05:05:03 +00:00
tmp.negate = 0;
2006-07-20 03:41:00 +00:00
tmp.lbound = 1;
tmp.ubound = 1;
2006-11-29 02:54:17 +00:00
ADD_CODE (builder, &tmp, ASE_SIZEOF(tmp));
2006-07-21 05:05:03 +00:00
2007-02-11 04:44:39 +00:00
ADD_CODE (builder,
&builder->ptn.curc.value,
ASE_SIZEOF(builder->ptn.curc.value));
2006-07-26 05:19:46 +00:00
NEXT_CHAR (builder, LEVEL_TOP);
2006-07-19 11:45:24 +00:00
2006-07-20 03:41:00 +00:00
return 1;
2006-07-18 15:28:26 +00:00
}
}
2007-03-01 04:31:27 +00:00
static int __build_charset (builder_t* builder, code_t* cmd)
2006-07-18 15:28:26 +00:00
{
2006-10-24 04:10:12 +00:00
ase_size_t zero = 0;
ase_size_t old_size;
2007-03-01 07:48:51 +00:00
ase_size_t pos_csc;
2007-03-01 07:43:54 +00:00
cshdr_t* cshdr;
2006-07-21 05:05:03 +00:00
2006-07-26 05:19:46 +00:00
old_size = builder->code.size;
2006-07-21 05:05:03 +00:00
2006-07-26 05:19:46 +00:00
pos_csc = builder->code.size;
2006-11-29 02:54:17 +00:00
ADD_CODE (builder, &zero, ASE_SIZEOF(zero));
ADD_CODE (builder, &zero, ASE_SIZEOF(zero));
2006-07-21 05:05:03 +00:00
2006-07-26 05:19:46 +00:00
if (builder->ptn.curc.type == CT_NORMAL &&
2006-10-24 04:10:12 +00:00
builder->ptn.curc.value == ASE_T('^'))
2006-07-21 05:05:03 +00:00
{
cmd->negate = 1;
2006-07-26 05:19:46 +00:00
NEXT_CHAR (builder, LEVEL_CHARSET);
2006-07-21 05:05:03 +00:00
}
2006-07-26 05:19:46 +00:00
while (builder->ptn.curc.type == CT_NORMAL)
2006-07-21 05:05:03 +00:00
{
2006-10-24 04:10:12 +00:00
ase_char_t c0, c1, c2;
2006-07-22 16:40:39 +00:00
int cc = 0;
2006-07-21 05:05:03 +00:00
2006-07-26 05:19:46 +00:00
c1 = builder->ptn.curc.value;
NEXT_CHAR(builder, LEVEL_CHARSET);
2006-07-21 05:05:03 +00:00
2006-10-24 04:10:12 +00:00
if (c1 == ASE_T('[') &&
2006-07-26 05:19:46 +00:00
builder->ptn.curc.type == CT_NORMAL &&
2006-10-24 04:10:12 +00:00
builder->ptn.curc.value == ASE_T(':'))
2006-07-21 05:05:03 +00:00
{
2006-07-26 05:19:46 +00:00
if (__build_cclass (builder, &c1) == -1) return -1;
2006-07-22 16:40:39 +00:00
cc = cc | 1;
2006-07-21 05:05:03 +00:00
}
c2 = c1;
2006-07-26 05:19:46 +00:00
if (builder->ptn.curc.type == CT_NORMAL &&
2006-10-24 04:10:12 +00:00
builder->ptn.curc.value == ASE_T('-'))
2006-07-21 05:05:03 +00:00
{
2006-07-26 05:19:46 +00:00
NEXT_CHAR (builder, LEVEL_CHARSET);
2006-07-21 05:05:03 +00:00
2006-07-26 05:19:46 +00:00
if (builder->ptn.curc.type == CT_NORMAL)
2006-07-21 05:05:03 +00:00
{
2006-07-26 05:19:46 +00:00
c2 = builder->ptn.curc.value;
NEXT_CHAR (builder, LEVEL_CHARSET);
2006-07-21 05:05:03 +00:00
2006-10-24 04:10:12 +00:00
if (c2 == ASE_T('[') &&
2006-07-26 05:19:46 +00:00
builder->ptn.curc.type == CT_NORMAL &&
2006-10-24 04:10:12 +00:00
builder->ptn.curc.value == ASE_T(':'))
2006-07-21 05:05:03 +00:00
{
2006-07-26 05:19:46 +00:00
if (__build_cclass (builder, &c2) == -1)
2006-07-22 16:40:39 +00:00
{
return -1;
}
cc = cc | 2;
2006-07-21 05:05:03 +00:00
}
}
2006-07-22 16:40:39 +00:00
else cc = cc | 4;
2006-07-21 05:05:03 +00:00
}
2006-07-22 16:40:39 +00:00
if (cc == 0 || cc == 4)
{
if (c1 == c2)
{
c0 = CHARSET_ONE;
2006-11-29 02:54:17 +00:00
ADD_CODE (builder, &c0, ASE_SIZEOF(c0));
ADD_CODE (builder, &c1, ASE_SIZEOF(c1));
2006-07-22 16:40:39 +00:00
}
else
{
c0 = CHARSET_RANGE;
2006-11-29 02:54:17 +00:00
ADD_CODE (builder, &c0, ASE_SIZEOF(c0));
ADD_CODE (builder, &c1, ASE_SIZEOF(c1));
ADD_CODE (builder, &c2, ASE_SIZEOF(c2));
2006-07-22 16:40:39 +00:00
}
}
else if (cc == 1)
2006-07-21 05:05:03 +00:00
{
2006-07-22 16:40:39 +00:00
c0 = CHARSET_CLASS;
2006-11-29 02:54:17 +00:00
ADD_CODE (builder, &c0, ASE_SIZEOF(c0));
ADD_CODE (builder, &c1, ASE_SIZEOF(c1));
2006-07-21 05:05:03 +00:00
}
else
{
2006-07-22 16:40:39 +00:00
/* invalid range */
2006-12-02 16:26:29 +00:00
#ifdef DEBUG_REX
2007-02-28 11:00:32 +00:00
ase_dprintf (
2006-12-02 16:26:29 +00:00
ASE_T("__build_charset: invalid character set range\n"));
#endif
2006-10-24 04:10:12 +00:00
builder->errnum = ASE_AWK_EREXCRANGE;
2006-07-22 16:40:39 +00:00
return -1;
2006-07-21 05:05:03 +00:00
}
2007-03-01 07:43:54 +00:00
cshdr = (cshdr_t*)&builder->code.buf[pos_csc];
cshdr->csc++;
2006-07-22 16:40:39 +00:00
}
2007-03-01 07:43:54 +00:00
cshdr = (cshdr_t*)&builder->code.buf[pos_csc];
cshdr->csl = builder->code.size - old_size;
2006-10-04 10:11:04 +00:00
2006-07-22 16:40:39 +00:00
return 1;
}
2007-02-28 11:04:16 +00:00
static int __build_cclass (builder_t* builder, ase_char_t* cc)
2006-07-22 16:40:39 +00:00
{
const struct __char_class_t* ccp = __char_class;
2006-10-24 04:10:12 +00:00
ase_size_t len = builder->ptn.end - builder->ptn.curp;
2006-07-22 16:40:39 +00:00
2006-10-24 04:10:12 +00:00
while (ccp->name != ASE_NULL)
2006-07-22 16:40:39 +00:00
{
2006-07-26 05:19:46 +00:00
if (__begin_with (builder->ptn.curp, len, ccp->name)) break;
2006-07-22 16:40:39 +00:00
ccp++;
}
2006-10-24 04:10:12 +00:00
if (ccp->name == ASE_NULL)
2006-07-22 16:40:39 +00:00
{
/* wrong class name */
2006-12-02 16:26:29 +00:00
#ifdef DEBUG_REX
2007-02-28 11:00:32 +00:00
ase_dprintf (ASE_T("__build_cclass: wrong class name\n"));
2006-12-02 16:26:29 +00:00
#endif
2006-10-24 04:10:12 +00:00
builder->errnum = ASE_AWK_EREXCCLASS;
2006-07-22 16:40:39 +00:00
return -1;
}
2006-07-26 05:19:46 +00:00
builder->ptn.curp += ccp->name_len;
2006-07-22 16:40:39 +00:00
2006-07-26 05:19:46 +00:00
NEXT_CHAR (builder, LEVEL_CHARSET);
if (builder->ptn.curc.type != CT_NORMAL ||
2006-10-24 04:10:12 +00:00
builder->ptn.curc.value != ASE_T(':'))
2006-07-22 16:40:39 +00:00
{
2007-02-28 11:00:32 +00:00
#ifdef DEBUG_REX
ase_dprintf (ASE_T("__build_cclass: a colon(:) expected\n"));
2006-12-02 16:26:29 +00:00
#endif
2006-10-24 04:10:12 +00:00
builder->errnum = ASE_AWK_EREXCOLON;
2006-07-22 16:40:39 +00:00
return -1;
}
2006-07-26 05:19:46 +00:00
NEXT_CHAR (builder, LEVEL_CHARSET);
2006-07-22 16:40:39 +00:00
/* ] happens to be the charset ender ] */
2006-07-26 05:19:46 +00:00
if (builder->ptn.curc.type != CT_SPECIAL ||
2006-10-24 04:10:12 +00:00
builder->ptn.curc.value != ASE_T(']'))
2006-07-22 16:40:39 +00:00
{
2006-12-02 16:26:29 +00:00
#ifdef DEBUG_REX
2007-02-28 11:00:32 +00:00
ase_dprintf (ASE_T("__build_cclass: ] expected\n"));
2006-12-02 16:26:29 +00:00
#endif
2006-10-24 04:10:12 +00:00
builder->errnum = ASE_AWK_EREXRBRACKET;
2006-07-22 16:40:39 +00:00
return -1;
2006-07-21 05:05:03 +00:00
}
2006-07-26 05:19:46 +00:00
NEXT_CHAR (builder, LEVEL_CHARSET);
2006-07-22 16:40:39 +00:00
2006-10-24 04:10:12 +00:00
*cc = (ase_char_t)(ccp - __char_class);
2006-07-21 05:05:03 +00:00
return 1;
2006-07-19 11:45:24 +00:00
}
2006-07-18 15:28:26 +00:00
2007-03-01 04:31:27 +00:00
static int __build_occurrences (builder_t* builder, code_t* cmd)
2006-07-20 03:41:00 +00:00
{
2006-07-26 05:19:46 +00:00
if (builder->ptn.curc.type != CT_SPECIAL) return 0;
2006-07-20 03:41:00 +00:00
2006-07-26 05:19:46 +00:00
switch (builder->ptn.curc.value)
2006-07-20 03:41:00 +00:00
{
2006-10-24 04:10:12 +00:00
case ASE_T('+'):
2006-07-20 03:41:00 +00:00
{
2006-07-20 16:21:54 +00:00
cmd->lbound = 1;
cmd->ubound = BOUND_MAX;
2006-07-26 05:19:46 +00:00
NEXT_CHAR(builder, LEVEL_TOP);
2006-07-20 03:41:00 +00:00
return 1;
}
2006-10-24 04:10:12 +00:00
case ASE_T('*'):
2006-07-20 03:41:00 +00:00
{
2006-07-20 16:21:54 +00:00
cmd->lbound = 0;
cmd->ubound = BOUND_MAX;
2006-07-26 05:19:46 +00:00
NEXT_CHAR(builder, LEVEL_TOP);
2006-07-20 03:41:00 +00:00
return 1;
}
2006-10-24 04:10:12 +00:00
case ASE_T('?'):
2006-07-20 03:41:00 +00:00
{
2006-07-20 16:21:54 +00:00
cmd->lbound = 0;
cmd->ubound = 1;
2006-07-26 05:19:46 +00:00
NEXT_CHAR(builder, LEVEL_TOP);
2006-07-20 03:41:00 +00:00
return 1;
}
2006-10-24 04:10:12 +00:00
case ASE_T('{'):
2006-07-20 03:41:00 +00:00
{
2006-07-26 05:19:46 +00:00
NEXT_CHAR (builder, LEVEL_RANGE);
2006-07-20 16:21:54 +00:00
2006-07-26 05:19:46 +00:00
if (__build_range(builder, cmd) == -1) return -1;
2006-07-20 16:21:54 +00:00
2006-07-26 05:19:46 +00:00
if (builder->ptn.curc.type != CT_SPECIAL ||
2006-10-24 04:10:12 +00:00
builder->ptn.curc.value != ASE_T('}'))
2006-07-20 16:21:54 +00:00
{
2006-10-24 04:10:12 +00:00
builder->errnum = ASE_AWK_EREXRBRACE;
2006-07-20 16:21:54 +00:00
return -1;
}
2006-07-26 05:19:46 +00:00
NEXT_CHAR (builder, LEVEL_TOP);
2006-07-20 03:41:00 +00:00
return 1;
}
}
return 0;
}
2007-03-01 04:31:27 +00:00
static int __build_range (builder_t* builder, code_t* cmd)
2006-07-19 11:45:24 +00:00
{
2006-10-24 04:10:12 +00:00
ase_size_t bound;
2006-07-20 16:21:54 +00:00
2006-10-08 05:46:41 +00:00
/* TODO: should allow white spaces in the range???
what if it is not in the raight format? convert it to ordinary characters?? */
2006-07-20 16:21:54 +00:00
bound = 0;
2006-07-26 05:19:46 +00:00
while (builder->ptn.curc.type == CT_NORMAL &&
2006-10-24 04:10:12 +00:00
(builder->ptn.curc.value >= ASE_T('0') &&
builder->ptn.curc.value <= ASE_T('9')))
2006-07-20 16:21:54 +00:00
{
2006-10-24 04:10:12 +00:00
bound = bound * 10 + builder->ptn.curc.value - ASE_T('0');
2006-07-26 05:19:46 +00:00
NEXT_CHAR (builder, LEVEL_RANGE);
2006-07-20 16:21:54 +00:00
}
cmd->lbound = bound;
2006-07-26 05:19:46 +00:00
if (builder->ptn.curc.type == CT_SPECIAL &&
2006-10-24 04:10:12 +00:00
builder->ptn.curc.value == ASE_T(','))
2006-07-20 16:21:54 +00:00
{
2006-07-26 05:19:46 +00:00
NEXT_CHAR (builder, LEVEL_RANGE);
2006-07-20 16:21:54 +00:00
bound = 0;
2006-07-26 05:19:46 +00:00
while (builder->ptn.curc.type == CT_NORMAL &&
2006-10-24 04:10:12 +00:00
(builder->ptn.curc.value >= ASE_T('0') &&
builder->ptn.curc.value <= ASE_T('9')))
2006-07-20 16:21:54 +00:00
{
2006-10-24 04:10:12 +00:00
bound = bound * 10 + builder->ptn.curc.value - ASE_T('0');
2006-07-26 05:19:46 +00:00
NEXT_CHAR (builder, LEVEL_RANGE);
2006-07-20 16:21:54 +00:00
}
cmd->ubound = bound;
}
else cmd->ubound = BOUND_MAX;
2006-07-24 16:23:19 +00:00
if (cmd->lbound > cmd->ubound)
{
2006-09-05 15:18:36 +00:00
/* invalid occurrences range */
2006-10-24 04:10:12 +00:00
builder->errnum = ASE_AWK_EREXBRANGE;
2006-07-24 16:23:19 +00:00
return -1;
}
2006-07-20 16:21:54 +00:00
return 0;
2006-07-19 11:45:24 +00:00
}
2006-07-18 15:28:26 +00:00
2007-02-28 11:04:16 +00:00
static int __next_char (builder_t* builder, int level)
2006-07-19 11:45:24 +00:00
{
2006-07-26 05:19:46 +00:00
if (builder->ptn.curp >= builder->ptn.end)
2006-07-18 15:28:26 +00:00
{
2006-07-26 05:19:46 +00:00
builder->ptn.curc.type = CT_EOF;
2006-10-24 04:10:12 +00:00
builder->ptn.curc.value = ASE_T('\0');
2006-07-20 03:41:00 +00:00
return 0;
2006-07-18 15:28:26 +00:00
}
2006-07-19 11:45:24 +00:00
2006-07-26 05:19:46 +00:00
builder->ptn.curc.type = CT_NORMAL;
builder->ptn.curc.value = *builder->ptn.curp++;
2006-07-20 03:41:00 +00:00
2006-10-24 04:10:12 +00:00
if (builder->ptn.curc.value == ASE_T('\\'))
2006-07-19 11:45:24 +00:00
{
2006-07-26 05:19:46 +00:00
if (builder->ptn.curp >= builder->ptn.end)
2006-07-19 11:45:24 +00:00
{
2006-10-24 04:10:12 +00:00
builder->errnum = ASE_AWK_EREXEND;
2006-07-19 11:45:24 +00:00
return -1;
}
2006-07-26 05:19:46 +00:00
builder->ptn.curc.value = *builder->ptn.curp++;
2006-07-20 03:41:00 +00:00
return 0;
}
else
{
2006-07-21 05:05:03 +00:00
if (level == LEVEL_TOP)
2006-07-20 03:41:00 +00:00
{
2006-10-24 04:10:12 +00:00
if (builder->ptn.curc.value == ASE_T('[') ||
builder->ptn.curc.value == ASE_T('|') ||
builder->ptn.curc.value == ASE_T('^') ||
builder->ptn.curc.value == ASE_T('$') ||
builder->ptn.curc.value == ASE_T('{') ||
builder->ptn.curc.value == ASE_T('+') ||
builder->ptn.curc.value == ASE_T('?') ||
builder->ptn.curc.value == ASE_T('*') ||
builder->ptn.curc.value == ASE_T('.') ||
builder->ptn.curc.value == ASE_T('(') ||
builder->ptn.curc.value == ASE_T(')'))
2006-07-20 03:41:00 +00:00
{
2006-07-26 05:19:46 +00:00
builder->ptn.curc.type = CT_SPECIAL;
2006-07-20 03:41:00 +00:00
}
}
2006-07-21 05:05:03 +00:00
else if (level == LEVEL_CHARSET)
2006-07-20 03:41:00 +00:00
{
2006-10-24 04:10:12 +00:00
if (builder->ptn.curc.value == ASE_T(']'))
2006-07-21 05:05:03 +00:00
{
2006-07-26 05:19:46 +00:00
builder->ptn.curc.type = CT_SPECIAL;
2006-07-20 03:41:00 +00:00
}
}
2006-07-21 05:05:03 +00:00
else if (level == LEVEL_RANGE)
2006-07-20 03:41:00 +00:00
{
2006-10-24 04:10:12 +00:00
if (builder->ptn.curc.value == ASE_T(',') ||
builder->ptn.curc.value == ASE_T('}'))
2006-07-20 03:41:00 +00:00
{
2006-07-26 05:19:46 +00:00
builder->ptn.curc.type = CT_SPECIAL;
2006-07-20 03:41:00 +00:00
}
}
2006-07-18 15:28:26 +00:00
}
2006-07-19 11:45:24 +00:00
return 0;
2006-07-18 15:28:26 +00:00
}
2007-02-28 11:04:16 +00:00
static int __add_code (builder_t* builder, void* data, ase_size_t len)
2006-07-18 15:28:26 +00:00
{
2006-07-26 05:19:46 +00:00
if (len > builder->code.capa - builder->code.size)
2006-07-19 11:45:24 +00:00
{
2006-10-24 04:10:12 +00:00
ase_size_t capa = builder->code.capa * 2;
ase_byte_t* tmp;
2006-07-19 11:45:24 +00:00
2006-08-16 15:21:17 +00:00
if (capa == 0) capa = DEF_CODE_CAPA;
2006-07-26 05:19:46 +00:00
while (len > capa - builder->code.size) { capa = capa * 2; }
2006-07-19 11:45:24 +00:00
2007-02-23 08:17:51 +00:00
if (builder->awk->prmfns.mmgr.realloc != ASE_NULL)
2006-08-16 11:35:54 +00:00
{
2006-10-24 04:10:12 +00:00
tmp = (ase_byte_t*) ASE_AWK_REALLOC (
2006-09-01 03:44:51 +00:00
builder->awk, builder->code.buf, capa);
2006-10-24 04:10:12 +00:00
if (tmp == ASE_NULL)
2006-09-01 03:44:51 +00:00
{
2006-10-24 04:10:12 +00:00
builder->errnum = ASE_AWK_ENOMEM;
2006-09-01 03:44:51 +00:00
return -1;
}
2006-08-16 11:35:54 +00:00
}
2006-09-01 03:44:51 +00:00
else
2006-08-16 11:35:54 +00:00
{
2006-10-24 04:10:12 +00:00
tmp = (ase_byte_t*) ASE_AWK_MALLOC (builder->awk, capa);
if (tmp == ASE_NULL)
2006-09-01 03:44:51 +00:00
{
2006-10-24 04:10:12 +00:00
builder->errnum = ASE_AWK_ENOMEM;
2006-09-01 03:44:51 +00:00
return -1;
}
2006-10-24 04:10:12 +00:00
if (builder->code.buf != ASE_NULL)
2006-09-01 03:44:51 +00:00
{
2007-02-23 08:17:51 +00:00
ase_memcpy (tmp, builder->code.buf, builder->code.capa);
2006-10-24 04:10:12 +00:00
ASE_AWK_FREE (builder->awk, builder->code.buf);
2006-09-01 03:44:51 +00:00
}
2006-08-16 11:35:54 +00:00
}
2006-07-19 11:45:24 +00:00
2006-07-26 05:19:46 +00:00
builder->code.buf = tmp;
builder->code.capa = capa;
2006-07-19 11:45:24 +00:00
}
2007-02-23 08:17:51 +00:00
ase_memcpy (&builder->code.buf[builder->code.size], data, len);
2006-07-26 05:19:46 +00:00
builder->code.size += len;
2006-07-19 11:45:24 +00:00
return 0;
2006-07-18 15:28:26 +00:00
}
2006-07-20 16:21:54 +00:00
2006-10-24 04:10:12 +00:00
static ase_bool_t __begin_with (
const ase_char_t* str, ase_size_t len, const ase_char_t* what)
2006-07-22 16:40:39 +00:00
{
2006-10-24 04:10:12 +00:00
const ase_char_t* end = str + len;
2006-07-22 16:40:39 +00:00
while (str < end)
{
2006-10-24 04:10:12 +00:00
if (*what == ASE_T('\0')) return ase_true;
if (*what != *str) return ase_false;
2006-07-22 16:40:39 +00:00
str++; what++;
}
2006-10-24 04:10:12 +00:00
if (*what == ASE_T('\0')) return ase_true;
return ase_false;
2006-07-22 16:40:39 +00:00
}
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_pattern (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat)
2006-07-22 16:40:39 +00:00
{
2007-02-28 14:46:08 +00:00
match_t mat2;
2007-03-01 07:43:54 +00:00
ase_size_t i;
const ase_byte_t* p;
rhdr_t* rhdr;
2006-07-22 16:40:39 +00:00
2006-07-23 16:31:20 +00:00
p = base;
2007-03-01 07:43:54 +00:00
rhdr = (rhdr_t*) p; p += ASE_SIZEOF(*rhdr);
2006-07-22 16:40:39 +00:00
2007-02-28 11:00:32 +00:00
#ifdef DEBUG_REX
ase_dprintf (
2006-12-02 16:26:29 +00:00
ASE_T("__match_pattern: NB = %u, EL = %u\n"),
2007-03-01 07:43:54 +00:00
(unsigned int)rhdr->nb, (unsigned int)rhdr->el);
2006-10-08 05:46:41 +00:00
#endif
2007-02-28 11:00:32 +00:00
2006-10-24 04:10:12 +00:00
mat->matched = ase_false;
2006-07-23 16:31:20 +00:00
mat->match_len = 0;
2007-03-01 07:43:54 +00:00
for (i = 0; i < rhdr->nb; i++)
2006-07-22 16:40:39 +00:00
{
2006-07-24 11:58:55 +00:00
mat2.match_ptr = mat->match_ptr;
2006-07-23 16:31:20 +00:00
2006-07-26 05:19:46 +00:00
p = __match_branch (matcher, p, &mat2);
2006-10-24 04:10:12 +00:00
if (p == ASE_NULL) return ASE_NULL;
2006-07-25 16:41:40 +00:00
2006-07-23 16:31:20 +00:00
if (mat2.matched)
{
2006-10-24 04:10:12 +00:00
mat->matched = ase_true;
2006-07-23 16:31:20 +00:00
mat->match_len = mat2.match_len;
break;
}
2006-07-22 16:40:39 +00:00
}
2007-03-01 07:43:54 +00:00
return base + rhdr->el;
2006-07-22 16:40:39 +00:00
}
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_branch (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat)
2006-07-22 16:40:39 +00:00
{
2007-03-01 07:43:54 +00:00
/* branch body base+sizeof(NA)+sizeof(BL)-----+
* BL base+sizeof(NA) ----------+ |
* base NA ------+ | |
* | | |
2006-10-24 04:10:12 +00:00
* |NA(ase_size_t)|BL(ase_size_t)|ATOMS.........|
2006-07-25 16:41:40 +00:00
*/
2006-07-23 16:31:20 +00:00
mat->branch = base;
2007-03-01 07:43:54 +00:00
mat->branch_end = base + ((bhdr_t*)base)->bl;
2006-07-23 16:31:20 +00:00
2006-07-25 16:41:40 +00:00
return __match_branch_body (
2007-03-01 07:43:54 +00:00
matcher, (const ase_byte_t*)((bhdr_t*)base+1), mat);
2006-07-23 16:31:20 +00:00
}
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_branch_body (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat)
2006-08-16 08:55:43 +00:00
{
2006-10-24 04:10:12 +00:00
const ase_byte_t* n;
2006-08-16 08:55:43 +00:00
if (matcher->depth.max > 0 && matcher->depth.cur >= matcher->depth.max)
{
2007-03-03 13:22:01 +00:00
matcher->errnum = ASE_AWK_EREXRECUR;
2006-10-24 04:10:12 +00:00
return ASE_NULL;
2006-08-16 08:55:43 +00:00
}
matcher->depth.cur++;
n = __match_branch_body0 (matcher, base, mat);
matcher->depth.cur--;
return n;
}
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_branch_body0 (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat)
2006-07-23 16:31:20 +00:00
{
2006-10-24 04:10:12 +00:00
const ase_byte_t* p;
2007-02-28 14:46:08 +00:00
/* match_t mat2;*/
2006-10-24 04:10:12 +00:00
ase_size_t match_len = 0;
2006-07-23 16:31:20 +00:00
2006-10-24 04:10:12 +00:00
mat->matched = ase_false;
2006-07-23 16:31:20 +00:00
mat->match_len = 0;
2006-07-25 16:41:40 +00:00
/* TODO: is mat2 necessary here ? */
/*
2006-07-24 11:58:55 +00:00
mat2.match_ptr = mat->match_ptr;
mat2.branch = mat->branch;
mat2.branch_end = mat->branch_end;
2006-07-25 16:41:40 +00:00
*/
2006-07-23 16:31:20 +00:00
p = base;
while (p < mat->branch_end)
{
2006-07-26 05:19:46 +00:00
p = __match_atom (matcher, p, mat);
2006-10-24 04:10:12 +00:00
if (p == ASE_NULL) return ASE_NULL;
2006-07-25 16:41:40 +00:00
if (!mat->matched) break;
mat->match_ptr = &mat->match_ptr[mat->match_len];
match_len += mat->match_len;
#if 0
2006-07-26 05:19:46 +00:00
p = __match_atom (matcher, p, &mat2);
2006-10-24 04:10:12 +00:00
if (p == ASE_NULL) return ASE_NULL;
2006-07-23 16:31:20 +00:00
if (!mat2.matched)
{
2006-10-24 04:10:12 +00:00
mat->matched = ase_false;
2006-07-23 16:31:20 +00:00
break; /* stop matching */
}
2006-10-24 04:10:12 +00:00
mat->matched = ase_true;
2006-07-23 16:31:20 +00:00
mat->match_len += mat2.match_len;
2006-07-24 11:58:55 +00:00
mat2.match_ptr = &mat2.match_ptr[mat2.match_len];
2006-07-25 16:41:40 +00:00
#endif
2006-07-23 16:31:20 +00:00
}
2006-07-25 16:41:40 +00:00
if (mat->matched) mat->match_len = match_len;
2006-07-24 11:58:55 +00:00
return mat->branch_end;
2006-07-22 16:40:39 +00:00
}
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_atom (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat)
2006-07-22 16:40:39 +00:00
{
2006-07-24 11:58:55 +00:00
static atom_matcher_t matchers[] =
{
__match_bol,
__match_eol,
__match_any_char,
__match_ord_char,
__match_charset,
__match_group
};
2006-07-23 16:31:20 +00:00
2007-03-06 14:58:00 +00:00
ASE_ASSERT (
2007-03-01 04:31:27 +00:00
((code_t*)base)->cmd >= 0 &&
((code_t*)base)->cmd < ASE_COUNTOF(matchers));
2006-07-24 11:58:55 +00:00
2007-03-01 04:31:27 +00:00
return matchers[((code_t*)base)->cmd] (matcher, base, mat);
2006-07-24 11:58:55 +00:00
}
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_bol (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat)
2006-07-24 11:58:55 +00:00
{
2006-10-24 04:10:12 +00:00
const ase_byte_t* p = base;
2007-03-01 04:31:27 +00:00
const code_t* cp;
2006-07-24 11:58:55 +00:00
2007-03-01 04:31:27 +00:00
cp = (const code_t*)p; p += ASE_SIZEOF(*cp);
2007-03-06 14:58:00 +00:00
ASE_ASSERT (cp->cmd == CMD_BOL);
2006-07-23 16:31:20 +00:00
2006-07-26 05:19:46 +00:00
mat->matched = (mat->match_ptr == matcher->match.str.ptr ||
2006-07-24 11:58:55 +00:00
(cp->lbound == cp->ubound && cp->lbound == 0));
mat->match_len = 0;
return p;
}
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_eol (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat)
2006-07-24 11:58:55 +00:00
{
2006-10-24 04:10:12 +00:00
const ase_byte_t* p = base;
2007-03-01 04:31:27 +00:00
const code_t* cp;
2006-07-24 11:58:55 +00:00
2007-03-01 04:31:27 +00:00
cp = (const code_t*)p; p += ASE_SIZEOF(*cp);
2007-03-06 14:58:00 +00:00
ASE_ASSERT (cp->cmd == CMD_EOL);
2006-07-24 11:58:55 +00:00
2006-07-26 05:19:46 +00:00
mat->matched = (mat->match_ptr == matcher->match.str.end ||
2006-07-24 11:58:55 +00:00
(cp->lbound == cp->ubound && cp->lbound == 0));
mat->match_len = 0;
2006-07-23 16:31:20 +00:00
2006-07-24 11:58:55 +00:00
return p;
}
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_any_char (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat)
2006-07-24 11:58:55 +00:00
{
2006-10-24 04:10:12 +00:00
const ase_byte_t* p = base;
2007-03-01 04:31:27 +00:00
const code_t* cp;
2006-10-24 04:10:12 +00:00
ase_size_t si = 0, lbound, ubound;
2006-07-24 11:58:55 +00:00
2007-03-01 04:31:27 +00:00
cp = (const code_t*)p; p += ASE_SIZEOF(*cp);
2007-03-06 14:58:00 +00:00
ASE_ASSERT (cp->cmd == CMD_ANY_CHAR);
2006-07-24 11:58:55 +00:00
2006-07-24 16:23:19 +00:00
lbound = cp->lbound;
ubound = cp->ubound;
2006-10-24 04:10:12 +00:00
mat->matched = ase_false;
2006-07-24 11:58:55 +00:00
mat->match_len = 0;
2006-07-24 16:23:19 +00:00
/* merge the same consecutive codes */
while (p < mat->branch_end &&
2007-03-01 04:31:27 +00:00
cp->cmd == ((const code_t*)p)->cmd)
2006-07-24 16:23:19 +00:00
{
2007-03-01 04:31:27 +00:00
lbound += ((const code_t*)p)->lbound;
ubound += ((const code_t*)p)->ubound;
2006-07-24 16:23:19 +00:00
2006-11-29 02:54:17 +00:00
p += ASE_SIZEOF(*cp);
2006-07-24 16:23:19 +00:00
}
2007-02-28 11:00:32 +00:00
#ifdef DEBUG_REX
ase_dprintf (
2006-12-02 16:26:29 +00:00
ASE_T("__match_any_char: lbound = %u, ubound = %u\n"),
(unsigned int)lbound, (unsigned int)ubound);
2006-10-08 05:46:41 +00:00
#endif
2006-07-24 11:58:55 +00:00
/* find the longest match */
2006-07-24 16:23:19 +00:00
while (si < ubound)
2006-07-23 16:31:20 +00:00
{
2006-07-26 05:19:46 +00:00
if (&mat->match_ptr[si] >= matcher->match.str.end) break;
2006-07-24 11:58:55 +00:00
si++;
2006-07-23 16:31:20 +00:00
}
2006-07-24 11:58:55 +00:00
2007-02-28 11:00:32 +00:00
#ifdef DEBUG_REX
ase_dprintf (
2006-12-02 16:26:29 +00:00
ASE_T("__match_any_char: max si = %u\n"), (unsigned)si);
2006-10-08 05:46:41 +00:00
#endif
2006-12-04 06:50:26 +00:00
2006-07-24 16:23:19 +00:00
if (si >= lbound && si <= ubound)
2006-07-23 16:31:20 +00:00
{
2006-09-05 15:18:36 +00:00
p = __match_occurrences (matcher, si, p, lbound, ubound, mat);
2006-07-23 16:31:20 +00:00
}
return p;
2006-07-22 16:40:39 +00:00
}
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_ord_char (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat)
2006-07-23 16:31:20 +00:00
{
2006-10-24 04:10:12 +00:00
const ase_byte_t* p = base;
2007-03-01 04:31:27 +00:00
const code_t* cp;
2006-10-24 04:10:12 +00:00
ase_size_t si = 0, lbound, ubound;
ase_char_t cc;
2006-07-23 16:31:20 +00:00
2007-03-01 04:31:27 +00:00
cp = (const code_t*)p; p += ASE_SIZEOF(*cp);
2007-03-06 14:58:00 +00:00
ASE_ASSERT (cp->cmd == CMD_ORD_CHAR);
2006-07-24 11:58:55 +00:00
2006-07-24 16:23:19 +00:00
lbound = cp->lbound;
ubound = cp->ubound;
2006-11-29 02:54:17 +00:00
cc = *(ase_char_t*)p; p += ASE_SIZEOF(cc);
2006-10-24 04:10:12 +00:00
if (matcher->ignorecase) cc = ASE_AWK_TOUPPER(matcher->awk, cc);
2006-07-23 16:31:20 +00:00
2006-07-24 16:23:19 +00:00
/* merge the same consecutive codes
2007-02-11 04:44:39 +00:00
* for example, a{1,10}a{0,10} is shortened to a{1,20} */
2006-09-10 15:50:34 +00:00
if (matcher->ignorecase)
2006-07-24 16:23:19 +00:00
{
2006-09-10 15:50:34 +00:00
while (p < mat->branch_end &&
2007-03-01 04:31:27 +00:00
cp->cmd == ((const code_t*)p)->cmd)
2006-09-10 15:50:34 +00:00
{
2006-11-29 02:54:17 +00:00
if (ASE_AWK_TOUPPER (matcher->awk, *(ase_char_t*)(p+ASE_SIZEOF(*cp))) != cc) break;
2006-07-24 16:23:19 +00:00
2007-03-01 04:31:27 +00:00
lbound += ((const code_t*)p)->lbound;
ubound += ((const code_t*)p)->ubound;
2006-07-24 16:23:19 +00:00
2006-11-29 02:54:17 +00:00
p += ASE_SIZEOF(*cp) + ASE_SIZEOF(cc);
2006-09-10 15:50:34 +00:00
}
}
else
{
while (p < mat->branch_end &&
2007-03-01 04:31:27 +00:00
cp->cmd == ((const code_t*)p)->cmd)
2006-09-10 15:50:34 +00:00
{
2006-11-29 02:54:17 +00:00
if (*(ase_char_t*)(p+ASE_SIZEOF(*cp)) != cc) break;
2006-09-10 15:50:34 +00:00
2007-03-01 04:31:27 +00:00
lbound += ((const code_t*)p)->lbound;
ubound += ((const code_t*)p)->ubound;
2006-09-10 15:50:34 +00:00
2006-11-29 02:54:17 +00:00
p += ASE_SIZEOF(*cp) + ASE_SIZEOF(cc);
2006-09-10 15:50:34 +00:00
}
2006-07-24 16:23:19 +00:00
}
2007-02-28 11:00:32 +00:00
#ifdef DEBUG_REX
ase_dprintf (
2006-12-04 06:50:26 +00:00
ASE_T("__match_ord_char: cc = %c, lbound = %u, ubound = %u\n"),
cc, (unsigned int)lbound, (unsigned int)ubound);
2006-10-08 05:46:41 +00:00
#endif
2006-07-24 16:23:19 +00:00
2006-10-24 04:10:12 +00:00
mat->matched = ase_false;
2006-07-23 16:31:20 +00:00
mat->match_len = 0;
/* find the longest match */
2006-09-10 15:50:34 +00:00
if (matcher->ignorecase)
2006-07-24 11:58:55 +00:00
{
2006-09-10 15:50:34 +00:00
while (si < ubound)
{
if (&mat->match_ptr[si] >= matcher->match.str.end) break;
2006-12-04 06:50:26 +00:00
#ifdef DEBUG_REX
2007-02-28 11:00:32 +00:00
ase_dprintf (
2007-02-11 04:44:39 +00:00
ASE_T("__match_ord_char: <ignorecase> %c %c\n"),
2006-12-04 06:50:26 +00:00
cc, mat->match_ptr[si]);
#endif
2006-10-24 04:10:12 +00:00
if (cc != ASE_AWK_TOUPPER (matcher->awk, mat->match_ptr[si])) break;
2006-09-10 15:50:34 +00:00
si++;
}
}
else
{
while (si < ubound)
{
if (&mat->match_ptr[si] >= matcher->match.str.end) break;
2006-12-02 16:26:29 +00:00
#ifdef DEBUG_REX
2007-02-28 11:00:32 +00:00
ase_dprintf (
2006-12-04 06:50:26 +00:00
ASE_T("__match_ord_char: %c %c\n"),
cc, mat->match_ptr[si]);
2006-12-02 16:26:29 +00:00
#endif
2006-09-10 15:50:34 +00:00
if (cc != mat->match_ptr[si]) break;
si++;
}
2006-07-24 11:58:55 +00:00
}
2006-10-08 05:46:41 +00:00
#ifdef DEBUG_REX
2007-02-28 11:00:32 +00:00
ase_dprintf (
2006-12-04 06:50:26 +00:00
ASE_T("__match_ord_char: max occurrences=%u, lbound=%u, ubound=%u\n"),
2006-12-02 16:26:29 +00:00
(unsigned)si, (unsigned)lbound, (unsigned)ubound);
2006-10-08 05:46:41 +00:00
#endif
2006-07-24 16:23:19 +00:00
if (si >= lbound && si <= ubound)
2006-07-24 11:58:55 +00:00
{
2006-09-05 15:18:36 +00:00
p = __match_occurrences (matcher, si, p, lbound, ubound, mat);
2006-07-24 11:58:55 +00:00
}
return p;
}
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_charset (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat)
2006-07-24 11:58:55 +00:00
{
2006-10-24 04:10:12 +00:00
const ase_byte_t* p = base;
2007-03-01 04:31:27 +00:00
ase_size_t si = 0;
2006-10-24 04:10:12 +00:00
ase_bool_t n;
ase_char_t c;
2006-07-24 11:58:55 +00:00
2007-03-01 04:31:27 +00:00
code_t* cp;
2007-02-28 14:46:08 +00:00
cshdr_t* cshdr;
2007-03-01 04:31:27 +00:00
cp = (code_t*)p; p += ASE_SIZEOF(*cp);
2007-03-06 14:58:00 +00:00
ASE_ASSERT (cp->cmd == CMD_CHARSET);
2006-07-24 11:58:55 +00:00
2007-03-01 04:31:27 +00:00
cshdr = (cshdr_t*)p; p += ASE_SIZEOF(*cshdr);
2006-07-24 11:58:55 +00:00
2007-02-28 11:00:32 +00:00
#ifdef DEBUG_REX
ase_dprintf (
2006-12-02 16:26:29 +00:00
ASE_T("__match_charset: lbound = %u, ubound = %u\n"),
2007-03-01 04:31:27 +00:00
(unsigned int)cp->lbound, (unsigned int)cp->ubound);
2006-12-02 16:26:29 +00:00
#endif
2006-10-24 04:10:12 +00:00
mat->matched = ase_false;
2006-07-24 11:58:55 +00:00
mat->match_len = 0;
2007-03-01 04:31:27 +00:00
while (si < cp->ubound)
2006-07-23 16:31:20 +00:00
{
2006-07-26 05:19:46 +00:00
if (&mat->match_ptr[si] >= matcher->match.str.end) break;
2006-07-24 11:58:55 +00:00
2006-09-10 15:50:34 +00:00
c = mat->match_ptr[si];
2006-10-24 04:10:12 +00:00
if (matcher->ignorecase) c = ASE_AWK_TOUPPER(matcher->awk, c);
2006-09-10 15:50:34 +00:00
2007-03-01 04:31:27 +00:00
n = __test_charset (matcher, p, cshdr->csc, c);
2006-07-24 11:58:55 +00:00
if (cp->negate) n = !n;
if (!n) break;
2006-07-23 16:31:20 +00:00
si++;
}
2007-03-01 04:31:27 +00:00
p = p + cshdr->csl - ASE_SIZEOF(*cshdr);
2006-07-24 11:58:55 +00:00
2006-12-02 16:26:29 +00:00
#ifdef DEBUG_REX
2007-02-28 11:00:32 +00:00
ase_dprintf (
2006-12-04 06:50:26 +00:00
ASE_T("__match_charset: max occurrences=%u, lbound=%u, ubound=%u\n"),
2007-03-01 04:31:27 +00:00
(unsigned)si, (unsigned)cp->lbound, (unsigned)cp->ubound);
2006-12-02 16:26:29 +00:00
#endif
2007-03-01 04:31:27 +00:00
if (si >= cp->lbound && si <= cp->ubound)
2006-07-23 16:31:20 +00:00
{
2007-03-01 04:31:27 +00:00
p = __match_occurrences (matcher, si, p, cp->lbound, cp->ubound, mat);
2006-07-24 11:58:55 +00:00
}
return p;
}
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_group (
2007-02-28 14:46:08 +00:00
matcher_t* matcher, const ase_byte_t* base, match_t* mat)
2006-07-24 11:58:55 +00:00
{
2006-10-24 04:10:12 +00:00
const ase_byte_t* p = base;
2007-03-01 04:31:27 +00:00
const code_t* cp;
2007-02-28 14:46:08 +00:00
match_t mat2;
2006-10-24 04:10:12 +00:00
ase_size_t si = 0, grp_len_static[16], * grp_len;
2006-07-24 11:58:55 +00:00
2007-03-01 04:31:27 +00:00
cp = (const code_t*)p; p += ASE_SIZEOF(*cp);
2007-03-06 14:58:00 +00:00
ASE_ASSERT (cp->cmd == CMD_GROUP);
2006-07-23 16:31:20 +00:00
2006-10-24 04:10:12 +00:00
mat->matched = ase_false;
2006-07-24 11:58:55 +00:00
mat->match_len = 0;
/*
2006-07-24 16:23:19 +00:00
* A grouped pattern, unlike other atoms, can match one or more
2006-07-24 11:58:55 +00:00
* characters. When it is requested with a variable occurrences,
* the number of characters that have matched at each occurrence
* needs to be remembered for the backtracking purpose.
*
* An array "grp_len" is used to store the accumulated number of
* characters. grp_len[0] is set to zero always for convenience.
* grp_len[1] holds the number of characters that have matched
* at the first occurrence, grp_len[2] at the second occurrence,
* and so on.
*
* Look at the following example
*
* pattern: (abc){1,3}x string: abcabcabcxyz
*
* grp_len[3] => 9 -----------+
* grp_len[2] => 6 --------+ |
* grp_len[1] => 3 -----+ | |
* grp_len[0] => 0 --+ | | |
* | | | |
* abcabcabcxyz
*/
2006-11-29 02:54:17 +00:00
if (cp->ubound < ASE_COUNTOF(grp_len_static))
2006-07-25 16:41:40 +00:00
{
grp_len = grp_len_static;
}
else
{
2006-10-24 04:10:12 +00:00
grp_len = (ase_size_t*) ASE_AWK_MALLOC (
2006-11-29 02:54:17 +00:00
matcher->awk, ASE_SIZEOF(ase_size_t) * cp->ubound);
2006-10-24 04:10:12 +00:00
if (grp_len == ASE_NULL)
2006-07-25 16:41:40 +00:00
{
2006-10-24 04:10:12 +00:00
matcher->errnum = ASE_AWK_ENOMEM;
return ASE_NULL;
2006-07-25 16:41:40 +00:00
}
}
grp_len[si] = 0;
mat2.match_ptr = mat->match_ptr;
2006-07-24 11:58:55 +00:00
while (si < cp->ubound)
{
2006-07-26 05:19:46 +00:00
if (mat2.match_ptr >= matcher->match.str.end) break;
2006-07-24 11:58:55 +00:00
2006-10-24 04:10:12 +00:00
if (__match_pattern (matcher, p, &mat2) == ASE_NULL)
2006-07-25 16:41:40 +00:00
{
2006-09-01 03:44:51 +00:00
if (grp_len != grp_len_static)
2006-10-24 04:10:12 +00:00
ASE_AWK_FREE (matcher->awk, grp_len);
return ASE_NULL;
2006-07-25 16:41:40 +00:00
}
2006-07-24 11:58:55 +00:00
if (!mat2.matched) break;
2006-07-25 16:41:40 +00:00
grp_len[si+1] = grp_len[si] + mat2.match_len;
2006-07-24 11:58:55 +00:00
mat2.match_ptr += mat2.match_len;
mat2.match_len = 0;
2006-10-24 04:10:12 +00:00
mat2.matched = ase_false;
2006-07-24 11:58:55 +00:00
si++;
}
2006-07-25 16:41:40 +00:00
/* increment p by the length of the subpattern */
2006-11-29 02:54:17 +00:00
p += *(ase_size_t*)(p+ASE_SIZEOF(ase_size_t));
2006-07-24 11:58:55 +00:00
2006-09-05 15:18:36 +00:00
/* check the occurrences */
2006-07-24 11:58:55 +00:00
if (si >= cp->lbound && si <= cp->ubound)
{
if (cp->lbound == cp->ubound || p >= mat->branch_end)
2006-07-23 16:31:20 +00:00
{
2006-10-24 04:10:12 +00:00
mat->matched = ase_true;
2006-07-24 11:58:55 +00:00
mat->match_len = grp_len[si];
2006-07-23 16:31:20 +00:00
}
2006-07-24 11:58:55 +00:00
else
2006-07-23 16:31:20 +00:00
{
2007-03-06 14:58:00 +00:00
ASE_ASSERT (cp->ubound > cp->lbound);
2006-07-23 16:31:20 +00:00
2006-07-24 11:58:55 +00:00
do
{
2006-10-24 04:10:12 +00:00
const ase_byte_t* tmp;
2006-07-24 11:58:55 +00:00
mat2.match_ptr = &mat->match_ptr[grp_len[si]];
2006-07-23 16:31:20 +00:00
mat2.branch = mat->branch;
mat2.branch_end = mat->branch_end;
2006-07-24 11:58:55 +00:00
2006-12-02 16:26:29 +00:00
#ifdef DEBUG_REX
2007-02-28 11:00:32 +00:00
ase_dprintf (
2006-12-02 16:26:29 +00:00
ASE_T("__match_group: GROUP si=%d [%s]\n"),
(unsigned)si, mat->match_ptr);
#endif
2006-07-26 05:19:46 +00:00
tmp = __match_branch_body (matcher, p, &mat2);
2006-10-24 04:10:12 +00:00
if (tmp == ASE_NULL)
2006-07-25 16:41:40 +00:00
{
if (grp_len != grp_len_static)
2006-10-24 04:10:12 +00:00
ASE_AWK_FREE (matcher->awk, grp_len);
return ASE_NULL;
2006-07-25 16:41:40 +00:00
}
2006-07-23 16:31:20 +00:00
if (mat2.matched)
{
2006-10-24 04:10:12 +00:00
mat->matched = ase_true;
2006-07-24 11:58:55 +00:00
mat->match_len = grp_len[si] + mat2.match_len;
p = tmp;
2006-07-23 16:31:20 +00:00
break;
}
2006-07-24 11:58:55 +00:00
if (si <= cp->lbound) break;
2006-07-23 16:31:20 +00:00
si--;
2006-07-24 11:58:55 +00:00
}
while (1);
2006-07-23 16:31:20 +00:00
}
2006-07-24 11:58:55 +00:00
}
2006-10-24 04:10:12 +00:00
if (grp_len != grp_len_static) ASE_AWK_FREE (matcher->awk, grp_len);
2006-07-24 11:58:55 +00:00
return p;
}
2006-10-24 04:10:12 +00:00
static const ase_byte_t* __match_occurrences (
2007-02-28 11:04:16 +00:00
matcher_t* matcher, ase_size_t si, const ase_byte_t* p,
2007-02-28 14:46:08 +00:00
ase_size_t lbound, ase_size_t ubound, match_t* mat)
2006-07-24 11:58:55 +00:00
{
2007-03-06 14:58:00 +00:00
ASE_ASSERT (si >= lbound && si <= ubound);
2006-07-24 11:58:55 +00:00
/* the match has been found */
2006-07-24 16:23:19 +00:00
if (lbound == ubound || p >= mat->branch_end)
2006-07-24 11:58:55 +00:00
{
/* if the match for fixed occurrences was
* requested or no atoms remain unchecked in
* the branch, the match is returned. */
2006-10-24 04:10:12 +00:00
mat->matched = ase_true;
2006-07-24 11:58:55 +00:00
mat->match_len = si;
}
else
{
/* Otherwise, it checks if the remaining atoms
* match the rest of the string
*
* Let's say the caller of this function was processing
* the first period character in the following example.
*
* pattern: .{1,3}xx string: xxxyy
*
* It scans up to the third "x" in the string. si is set
* to 3 and p points to the first "x" in the pattern.
* It doesn't change mat.match_ptr so mat.match_ptr remains
* the same.
*
* si = 3 p -----+ mat.match_ptr ---+
* | |
* .{1,3}xx xxxyy
*
* When the code reaches here, the string pointed at by
* &mat.match_ptr[si] is tried to match against the remaining
* pattern pointed at p.
*
* &mat.match_ptr[si] ---+
* |
* xxxyy
*
* If a match is found, the match and the previous match are
* merged and returned.
*
* If not, si is decremented by one and the match is performed
* from the string pointed at by &mat.match_ptr[si].
*
* &mat.match_ptr[si] --+
* |
* xxxyy
*
* This process is repeated until a match is found or si
2006-07-24 16:23:19 +00:00
* becomes less than lbound. (si never becomes less than
* lbound in the implementation below, though)
2006-07-24 11:58:55 +00:00
*/
2007-03-06 14:58:00 +00:00
ASE_ASSERT (ubound > lbound);
2006-07-24 11:58:55 +00:00
do
{
2007-02-28 14:46:08 +00:00
match_t mat2;
2006-10-24 04:10:12 +00:00
const ase_byte_t* tmp;
2006-07-24 11:58:55 +00:00
mat2.match_ptr = &mat->match_ptr[si];
mat2.branch = mat->branch;
mat2.branch_end = mat->branch_end;
2006-12-02 16:26:29 +00:00
#ifdef DEBUG_REX
2007-02-28 11:00:32 +00:00
ase_dprintf (
2006-12-02 16:26:29 +00:00
ASE_T("__match occurrences: si=%u [%s]\n"),
(unsigned)si, mat->match_ptr);
#endif
2006-07-26 05:19:46 +00:00
tmp = __match_branch_body (matcher, p, &mat2);
2006-07-24 11:58:55 +00:00
if (mat2.matched)
{
2006-10-24 04:10:12 +00:00
mat->matched = ase_true;
2006-07-24 11:58:55 +00:00
mat->match_len = si + mat2.match_len;
p = tmp;
break;
}
2006-07-24 16:23:19 +00:00
if (si <= lbound) break;
2006-07-24 11:58:55 +00:00
si--;
}
while (1);
2006-07-23 16:31:20 +00:00
}
return p;
2006-07-22 16:40:39 +00:00
}
2006-11-19 06:15:58 +00:00
static ase_bool_t __test_charset (
2007-02-28 11:04:16 +00:00
matcher_t* matcher, const ase_byte_t* p, ase_size_t csc, ase_char_t c)
2006-07-24 11:58:55 +00:00
{
2006-10-24 04:10:12 +00:00
ase_size_t i;
2006-07-24 11:58:55 +00:00
for (i = 0; i < csc; i++)
{
2006-10-24 04:10:12 +00:00
ase_char_t c0, c1, c2;
2006-07-24 11:58:55 +00:00
2006-11-19 06:15:58 +00:00
c0 = *(const ase_char_t*)p;
2006-11-29 02:54:17 +00:00
p += ASE_SIZEOF(c0);
2006-07-24 11:58:55 +00:00
if (c0 == CHARSET_ONE)
{
2006-11-19 06:15:58 +00:00
c1 = *(const ase_char_t*)p;
2006-09-10 15:50:34 +00:00
if (matcher->ignorecase)
2006-10-24 04:10:12 +00:00
c1 = ASE_AWK_TOUPPER(matcher->awk, c1);
2006-12-04 06:50:26 +00:00
#ifdef DEBUG_REX
2007-02-28 11:00:32 +00:00
ase_dprintf (
2006-12-04 06:50:26 +00:00
ASE_T("__match_charset: <one> %c %c\n"), c, c1);
#endif
2006-10-24 04:10:12 +00:00
if (c == c1) return ase_true;
2006-07-24 11:58:55 +00:00
}
else if (c0 == CHARSET_RANGE)
{
2006-11-19 06:15:58 +00:00
c1 = *(const ase_char_t*)p;
2006-11-29 02:54:17 +00:00
p += ASE_SIZEOF(c1);
2006-11-19 06:15:58 +00:00
c2 = *(const ase_char_t*)p;
2006-07-24 11:58:55 +00:00
2006-09-10 15:50:34 +00:00
if (matcher->ignorecase)
{
2006-10-24 04:10:12 +00:00
c1 = ASE_AWK_TOUPPER(matcher->awk, c1);
c2 = ASE_AWK_TOUPPER(matcher->awk, c2);
2006-09-10 15:50:34 +00:00
}
2006-12-04 06:50:26 +00:00
#ifdef DEBUG_REX
2007-02-28 11:00:32 +00:00
ase_dprintf (
2006-12-04 06:50:26 +00:00
ASE_T("__match_charset: <range> %c %c-%c\n"), c, c1, c2);
#endif
2006-10-24 04:10:12 +00:00
if (c >= c1 && c <= c2) return ase_true;
2006-07-24 11:58:55 +00:00
}
else if (c0 == CHARSET_CLASS)
{
2006-11-19 06:15:58 +00:00
c1 = *(const ase_char_t*)p;
2006-12-04 06:50:26 +00:00
#ifdef DEBUG_REX
2007-02-28 11:00:32 +00:00
ase_dprintf (
2006-12-04 06:50:26 +00:00
ASE_T("__match_charset: <class> %c %s\n"),
c, __char_class[c1].name);
#endif
2006-09-01 06:45:05 +00:00
if (__char_class[c1].func (
2006-10-24 04:10:12 +00:00
matcher->awk, c)) return ase_true;
2006-07-24 11:58:55 +00:00
}
else
{
2007-03-06 14:58:00 +00:00
ASE_ASSERT (!"should never happen - invalid charset code");
2006-07-24 16:23:19 +00:00
break;
2006-07-24 11:58:55 +00:00
}
2006-11-29 02:54:17 +00:00
p += ASE_SIZEOF(c1);
2006-07-24 11:58:55 +00:00
}
2006-10-24 04:10:12 +00:00
return ase_false;
2006-07-24 11:58:55 +00:00
}
2007-03-03 13:22:01 +00:00
static ase_bool_t cc_isalnum (ase_awk_t* awk, ase_char_t c)
2006-07-22 16:40:39 +00:00
{
2006-10-24 04:10:12 +00:00
return ASE_AWK_ISALNUM (awk, c);
2006-07-22 16:40:39 +00:00
}
2007-03-03 13:22:01 +00:00
static ase_bool_t cc_isalpha (ase_awk_t* awk, ase_char_t c)
2006-07-22 16:40:39 +00:00
{
2006-10-24 04:10:12 +00:00
return ASE_AWK_ISALPHA (awk, c);
2006-07-22 16:40:39 +00:00
}
2007-03-03 13:22:01 +00:00
static ase_bool_t cc_isblank (ase_awk_t* awk, ase_char_t c)
2006-07-22 16:40:39 +00:00
{
2006-10-24 04:10:12 +00:00
return c == ASE_T(' ') || c == ASE_T('\t');
2006-07-22 16:40:39 +00:00
}
2007-03-03 13:22:01 +00:00
static ase_bool_t cc_iscntrl (ase_awk_t* awk, ase_char_t c)
2006-07-22 16:40:39 +00:00
{
2006-10-24 04:10:12 +00:00
return ASE_AWK_ISCNTRL (awk, c);
2006-07-22 16:40:39 +00:00
}
2007-03-03 13:22:01 +00:00
static ase_bool_t cc_isdigit (ase_awk_t* awk, ase_char_t c)
2006-07-22 16:40:39 +00:00
{
2006-10-24 04:10:12 +00:00
return ASE_AWK_ISDIGIT (awk, c);
2006-07-22 16:40:39 +00:00
}
2007-03-03 13:22:01 +00:00
static ase_bool_t cc_isgraph (ase_awk_t* awk, ase_char_t c)
2006-07-22 16:40:39 +00:00
{
2006-10-24 04:10:12 +00:00
return ASE_AWK_ISGRAPH (awk, c);
2006-07-22 16:40:39 +00:00
}
2007-03-03 13:22:01 +00:00
static ase_bool_t cc_islower (ase_awk_t* awk, ase_char_t c)
2006-07-22 16:40:39 +00:00
{
2006-10-24 04:10:12 +00:00
return ASE_AWK_ISLOWER (awk, c);
2006-07-22 16:40:39 +00:00
}
2007-03-03 13:22:01 +00:00
static ase_bool_t cc_isprint (ase_awk_t* awk, ase_char_t c)
2006-07-22 16:40:39 +00:00
{
2006-10-24 04:10:12 +00:00
return ASE_AWK_ISPRINT (awk, c);
2006-07-22 16:40:39 +00:00
}
2007-03-03 13:22:01 +00:00
static ase_bool_t cc_ispunct (ase_awk_t* awk, ase_char_t c)
2006-07-22 16:40:39 +00:00
{
2006-10-24 04:10:12 +00:00
return ASE_AWK_ISPUNCT (awk, c);
2006-07-22 16:40:39 +00:00
}
2007-03-03 13:22:01 +00:00
static ase_bool_t cc_isspace (ase_awk_t* awk, ase_char_t c)
2006-07-22 16:40:39 +00:00
{
2006-10-24 04:10:12 +00:00
return ASE_AWK_ISSPACE (awk, c);
2006-07-22 16:40:39 +00:00
}
2007-03-03 13:22:01 +00:00
static ase_bool_t cc_isupper (ase_awk_t* awk, ase_char_t c)
2006-07-22 16:40:39 +00:00
{
2006-10-24 04:10:12 +00:00
return ASE_AWK_ISUPPER (awk, c);
2006-07-22 16:40:39 +00:00
}
2007-03-03 13:22:01 +00:00
static ase_bool_t cc_isxdigit (ase_awk_t* awk, ase_char_t c)
2006-07-22 16:40:39 +00:00
{
2006-10-24 04:10:12 +00:00
return ASE_AWK_ISXDIGIT (awk, c);
2006-07-22 16:40:39 +00:00
}
2006-07-24 16:23:19 +00:00
2007-02-28 11:00:32 +00:00
#define DPRINTF awk->prmfns.misc.dprintf
#define DCUSTOM awk->prmfns.misc.custom_data
2006-11-15 05:49:22 +00:00
2007-02-28 11:00:32 +00:00
void ase_awk_dprintrex (ase_awk_t* awk, void* rex)
2006-08-16 11:35:54 +00:00
{
2007-02-28 11:00:32 +00:00
__print_pattern (awk, rex);
DPRINTF (DCUSTOM, awk->prmfns.misc.custom_data, ASE_T("\n"));
2006-08-16 11:35:54 +00:00
}
2007-02-28 11:00:32 +00:00
static const ase_byte_t* __print_pattern (ase_awk_t* awk, const ase_byte_t* p)
2006-07-24 16:23:19 +00:00
{
2007-03-01 07:43:54 +00:00
ase_size_t i;
rhdr_t* rhdr;
2006-12-02 16:26:29 +00:00
2007-03-01 07:43:54 +00:00
rhdr = (rhdr_t*)p; p += ASE_SIZEOF(*rhdr);
2006-07-24 16:23:19 +00:00
2007-03-01 07:43:54 +00:00
for (i = 0; i < rhdr->nb; i++)
2006-07-24 16:23:19 +00:00
{
2007-02-28 11:00:32 +00:00
if (i != 0) DPRINTF (DCUSTOM, ASE_T("|"));
p = __print_branch (awk, p);
2006-07-24 16:23:19 +00:00
}
return p;
}
2007-02-28 11:00:32 +00:00
static const ase_byte_t* __print_branch (ase_awk_t* awk, const ase_byte_t* p)
2006-07-24 16:23:19 +00:00
{
2007-03-03 13:22:01 +00:00
ase_size_t i;
bhdr_t* bhdr;
bhdr = (bhdr_t*)p; p += ASE_SIZEOF(*bhdr);
2006-07-24 16:23:19 +00:00
2007-03-03 13:22:01 +00:00
for (i = 0; i < bhdr->na; i++)
2006-07-24 16:23:19 +00:00
{
2007-02-28 11:00:32 +00:00
p = __print_atom (awk, p);
2006-07-24 16:23:19 +00:00
}
return p;
}
2007-02-28 11:00:32 +00:00
static const ase_byte_t* __print_atom (ase_awk_t* awk, const ase_byte_t* p)
2006-07-24 16:23:19 +00:00
{
2007-03-01 04:31:27 +00:00
const code_t* cp = (const code_t*)p;
2006-07-24 16:23:19 +00:00
if (cp->cmd == CMD_BOL)
{
2007-02-28 11:00:32 +00:00
DPRINTF (DCUSTOM, ASE_T("^"));
2006-11-29 02:54:17 +00:00
p += ASE_SIZEOF(*cp);
2006-07-24 16:23:19 +00:00
}
else if (cp->cmd == CMD_EOL)
{
2007-02-28 11:00:32 +00:00
DPRINTF (DCUSTOM, ASE_T("$"));
2006-11-29 02:54:17 +00:00
p += ASE_SIZEOF(*cp);
2006-07-24 16:23:19 +00:00
}
else if (cp->cmd == CMD_ANY_CHAR)
{
2007-02-28 11:00:32 +00:00
DPRINTF (DCUSTOM, ASE_T("."));
2006-11-29 02:54:17 +00:00
p += ASE_SIZEOF(*cp);
2006-07-24 16:23:19 +00:00
}
else if (cp->cmd == CMD_ORD_CHAR)
{
2006-11-29 02:54:17 +00:00
p += ASE_SIZEOF(*cp);
2007-02-28 11:00:32 +00:00
DPRINTF (DCUSTOM, ASE_T("%c"), *(ase_char_t*)p);
2006-11-29 02:54:17 +00:00
p += ASE_SIZEOF(ase_char_t);
2006-07-24 16:23:19 +00:00
}
else if (cp->cmd == CMD_CHARSET)
{
2007-03-01 04:31:27 +00:00
ase_size_t i;
cshdr_t* cshdr;
2006-07-24 16:23:19 +00:00
2006-11-29 02:54:17 +00:00
p += ASE_SIZEOF(*cp);
2007-02-28 11:00:32 +00:00
DPRINTF (DCUSTOM, ASE_T("["));
if (cp->negate) DPRINTF (DCUSTOM, ASE_T("^"));
2007-03-01 04:31:27 +00:00
cshdr = (cshdr_t*)p; p += ASE_SIZEOF(*cshdr);
2006-07-24 16:23:19 +00:00
2007-03-01 04:31:27 +00:00
for (i = 0; i < cshdr->csc; i++)
2006-07-24 16:23:19 +00:00
{
2006-10-24 04:10:12 +00:00
ase_char_t c0, c1, c2;
2006-07-24 16:23:19 +00:00
2006-10-24 04:10:12 +00:00
c0 = *(ase_char_t*)p;
2006-11-29 02:54:17 +00:00
p += ASE_SIZEOF(c0);
2006-07-24 16:23:19 +00:00
if (c0 == CHARSET_ONE)
{
2006-10-24 04:10:12 +00:00
c1 = *(ase_char_t*)p;
2007-02-28 11:00:32 +00:00
DPRINTF (DCUSTOM, ASE_T("%c"), c1);
2006-07-24 16:23:19 +00:00
}
else if (c0 == CHARSET_RANGE)
{
2006-10-24 04:10:12 +00:00
c1 = *(ase_char_t*)p;
2006-11-29 02:54:17 +00:00
p += ASE_SIZEOF(c1);
2006-10-24 04:10:12 +00:00
c2 = *(ase_char_t*)p;
2007-02-28 11:00:32 +00:00
DPRINTF (DCUSTOM, ASE_T("%c-%c"), c1, c2);
2006-07-24 16:23:19 +00:00
}
else if (c0 == CHARSET_CLASS)
{
2006-10-24 04:10:12 +00:00
c1 = *(ase_char_t*)p;
2007-02-28 11:00:32 +00:00
DPRINTF (DCUSTOM, ASE_T("[:%s:]"), __char_class[c1].name);
2006-07-24 16:23:19 +00:00
}
else
{
2007-02-28 11:00:32 +00:00
DPRINTF (DCUSTOM, ASE_T("should never happen - invalid charset code\n"));
2006-07-24 16:23:19 +00:00
}
2006-11-29 02:54:17 +00:00
p += ASE_SIZEOF(c1);
2006-07-24 16:23:19 +00:00
}
2007-02-28 11:00:32 +00:00
DPRINTF (DCUSTOM, ASE_T("]"));
2006-07-24 16:23:19 +00:00
}
else if (cp->cmd == CMD_GROUP)
{
2006-11-29 02:54:17 +00:00
p += ASE_SIZEOF(*cp);
2007-02-28 11:00:32 +00:00
DPRINTF (DCUSTOM, ASE_T("("));
p = __print_pattern (awk, p);
DPRINTF (DCUSTOM, ASE_T(")"));
2006-07-24 16:23:19 +00:00
}
else
{
2007-02-28 11:00:32 +00:00
DPRINTF (DCUSTOM, ASE_T("should never happen - invalid atom code\n"));
2006-07-24 16:23:19 +00:00
}
if (cp->lbound == 0 && cp->ubound == BOUND_MAX)
2007-02-28 11:00:32 +00:00
DPRINTF (DCUSTOM, ASE_T("*"));
2006-07-24 16:23:19 +00:00
else if (cp->lbound == 1 && cp->ubound == BOUND_MAX)
2007-02-28 11:00:32 +00:00
DPRINTF (DCUSTOM, ASE_T("+"));
2006-07-24 16:23:19 +00:00
else if (cp->lbound == 0 && cp->ubound == 1)
2007-02-28 11:00:32 +00:00
DPRINTF (DCUSTOM, ASE_T("?"));
2006-07-24 16:23:19 +00:00
else if (cp->lbound != 1 || cp->ubound != 1)
{
2007-02-28 11:00:32 +00:00
DPRINTF (DCUSTOM, ASE_T("{%lu,%lu}"),
2006-07-24 16:23:19 +00:00
(unsigned long)cp->lbound, (unsigned long)cp->ubound);
}
return p;
}
2006-08-16 11:35:54 +00:00