fixed charset range handling in awk/rex.c

This commit is contained in:
hyung-hwan 2007-12-05 20:08:45 +00:00
parent 0089196f2e
commit fc00b6f468

View File

@ -75,6 +75,7 @@ struct builder_t
{ {
int type; int type;
ase_char_t value; ase_char_t value;
ase_bool_t escaped;
} curc; } curc;
} ptn; } ptn;
@ -164,49 +165,49 @@ typedef const ase_byte_t* (*atom_matcher_t) (
#define NCHARS_REMAINING(rex) ((rex)->ptn.end - (rex)->ptn.curp) #define NCHARS_REMAINING(rex) ((rex)->ptn.end - (rex)->ptn.curp)
#define NEXT_CHAR(rex,level) \ #define NEXT_CHAR(rex,level) \
do { if (__next_char(rex,level) == -1) return -1; } while (0) do { if (next_char(rex,level) == -1) return -1; } while (0)
#define ADD_CODE(rex,data,len) \ #define ADD_CODE(rex,data,len) \
do { if (__add_code(rex,data,len) == -1) return -1; } while (0) do { if (add_code(rex,data,len) == -1) return -1; } while (0)
static int __build_pattern (builder_t* rex); static int build_pattern (builder_t* rex);
static int __build_pattern0 (builder_t* rex); static int build_pattern0 (builder_t* rex);
static int __build_branch (builder_t* rex); static int build_branch (builder_t* rex);
static int __build_atom (builder_t* rex); static int build_atom (builder_t* rex);
static int __build_charset (builder_t* rex, code_t* cmd); static int build_charset (builder_t* rex, code_t* cmd);
static int __build_occurrences (builder_t* rex, code_t* cmd); static int build_occurrences (builder_t* rex, code_t* cmd);
static int __build_cclass (builder_t* rex, ase_char_t* cc); static int build_cclass (builder_t* rex, ase_char_t* cc);
static int __build_range (builder_t* rex, code_t* cmd); static int build_range (builder_t* rex, code_t* cmd);
static int __next_char (builder_t* rex, int level); static int next_char (builder_t* rex, int level);
static int __add_code (builder_t* rex, void* data, ase_size_t len); static int add_code (builder_t* rex, void* data, ase_size_t len);
static ase_bool_t __begin_with ( static ase_bool_t __begin_with (
const ase_char_t* str, ase_size_t len, const ase_char_t* what); const ase_char_t* str, ase_size_t len, const ase_char_t* what);
static const ase_byte_t* __match_pattern ( static const ase_byte_t* match_pattern (
matcher_t* matcher, const ase_byte_t* base, match_t* mat); matcher_t* matcher, const ase_byte_t* base, match_t* mat);
static const ase_byte_t* __match_branch ( static const ase_byte_t* match_branch (
matcher_t* matcher, const ase_byte_t* base, match_t* mat); matcher_t* matcher, const ase_byte_t* base, match_t* mat);
static const ase_byte_t* __match_branch_body ( static const ase_byte_t* match_branch_body (
matcher_t* matcher, const ase_byte_t* base, match_t* mat); matcher_t* matcher, const ase_byte_t* base, match_t* mat);
static const ase_byte_t* __match_branch_body0 ( static const ase_byte_t* match_branch_body0 (
matcher_t* matcher, const ase_byte_t* base, match_t* mat); matcher_t* matcher, const ase_byte_t* base, match_t* mat);
static const ase_byte_t* __match_atom ( static const ase_byte_t* match_atom (
matcher_t* matcher, const ase_byte_t* base, match_t* mat); matcher_t* matcher, const ase_byte_t* base, match_t* mat);
static const ase_byte_t* __match_bol ( static const ase_byte_t* match_bol (
matcher_t* matcher, const ase_byte_t* base, match_t* mat); matcher_t* matcher, const ase_byte_t* base, match_t* mat);
static const ase_byte_t* __match_eol ( static const ase_byte_t* match_eol (
matcher_t* matcher, const ase_byte_t* base, match_t* mat); matcher_t* matcher, const ase_byte_t* base, match_t* mat);
static const ase_byte_t* __match_any_char ( static const ase_byte_t* match_any_char (
matcher_t* matcher, const ase_byte_t* base, match_t* mat); matcher_t* matcher, const ase_byte_t* base, match_t* mat);
static const ase_byte_t* __match_ord_char ( static const ase_byte_t* match_ord_char (
matcher_t* matcher, const ase_byte_t* base, match_t* mat); matcher_t* matcher, const ase_byte_t* base, match_t* mat);
static const ase_byte_t* __match_charset ( static const ase_byte_t* match_charset (
matcher_t* matcher, const ase_byte_t* base, match_t* mat); matcher_t* matcher, const ase_byte_t* base, match_t* mat);
static const ase_byte_t* __match_group ( static const ase_byte_t* match_group (
matcher_t* matcher, const ase_byte_t* base, match_t* mat); matcher_t* matcher, const ase_byte_t* base, match_t* mat);
static const ase_byte_t* __match_occurrences ( static const ase_byte_t* match_occurrences (
matcher_t* matcher, ase_size_t si, const ase_byte_t* p, matcher_t* matcher, ase_size_t si, const ase_byte_t* p,
ase_size_t lbound, ase_size_t ubound, match_t* mat); ase_size_t lbound, ase_size_t ubound, match_t* mat);
@ -286,18 +287,19 @@ void* ase_awk_buildrex (
builder.ptn.curc.type = CT_EOF; builder.ptn.curc.type = CT_EOF;
builder.ptn.curc.value = ASE_T('\0'); builder.ptn.curc.value = ASE_T('\0');
builder.ptn.curc.escaped = ase_false;
builder.depth.max = awk->rex.depth.max.build; builder.depth.max = awk->rex.depth.max.build;
builder.depth.cur = 0; builder.depth.cur = 0;
if (__next_char (&builder, LEVEL_TOP) == -1) if (next_char (&builder, LEVEL_TOP) == -1)
{ {
if (errnum != ASE_NULL) *errnum = builder.errnum; if (errnum != ASE_NULL) *errnum = builder.errnum;
ASE_AWK_FREE (builder.awk, builder.code.buf); ASE_AWK_FREE (builder.awk, builder.code.buf);
return ASE_NULL; return ASE_NULL;
} }
if (__build_pattern (&builder) == -1) if (build_pattern (&builder) == -1)
{ {
if (errnum != ASE_NULL) *errnum = builder.errnum; if (errnum != ASE_NULL) *errnum = builder.errnum;
ASE_AWK_FREE (builder.awk, builder.code.buf); ASE_AWK_FREE (builder.awk, builder.code.buf);
@ -353,7 +355,7 @@ int ase_awk_matchrex (
/*while (mat.match_ptr < matcher.match.str.end)*/ /*while (mat.match_ptr < matcher.match.str.end)*/
while (mat.match_ptr <= matcher.match.str.end) while (mat.match_ptr <= matcher.match.str.end)
{ {
if (__match_pattern (&matcher, code, &mat) == ASE_NULL) if (match_pattern (&matcher, code, &mat) == ASE_NULL)
{ {
if (errnum != ASE_NULL) *errnum = matcher.errnum; if (errnum != ASE_NULL) *errnum = matcher.errnum;
return -1; return -1;
@ -413,7 +415,7 @@ ase_bool_t ase_awk_isemptyrex (ase_awk_t* awk, void* code)
rhdr->el == ASE_SIZEOF(ase_size_t)*4)? ase_true: ase_false; rhdr->el == ASE_SIZEOF(ase_size_t)*4)? ase_true: ase_false;
} }
static int __build_pattern (builder_t* builder) static int build_pattern (builder_t* builder)
{ {
int n; int n;
@ -424,13 +426,13 @@ static int __build_pattern (builder_t* builder)
} }
builder->depth.cur++; builder->depth.cur++;
n = __build_pattern0 (builder); n = build_pattern0 (builder);
builder->depth.cur--; builder->depth.cur--;
return n; return n;
} }
static int __build_pattern0 (builder_t* builder) static int build_pattern0 (builder_t* builder)
{ {
ase_size_t zero = 0; ase_size_t zero = 0;
ase_size_t old_size; ase_size_t old_size;
@ -446,7 +448,7 @@ static int __build_pattern0 (builder_t* builder)
ADD_CODE (builder, &zero, ASE_SIZEOF(zero)); ADD_CODE (builder, &zero, ASE_SIZEOF(zero));
/* handle the first branch */ /* handle the first branch */
n = __build_branch (builder); n = build_branch (builder);
if (n == -1) return -1; if (n == -1) return -1;
if (n == 0) if (n == 0)
{ {
@ -463,7 +465,7 @@ static int __build_pattern0 (builder_t* builder)
{ {
NEXT_CHAR (builder, LEVEL_TOP); NEXT_CHAR (builder, LEVEL_TOP);
n = __build_branch(builder); n = build_branch(builder);
if (n == -1) return -1; if (n == -1) return -1;
if (n == 0) if (n == 0)
{ {
@ -483,7 +485,7 @@ static int __build_pattern0 (builder_t* builder)
return 1; return 1;
} }
static int __build_branch (builder_t* builder) static int build_branch (builder_t* builder)
{ {
int n; int n;
ase_size_t zero = 0; ase_size_t zero = 0;
@ -502,7 +504,7 @@ static int __build_branch (builder_t* builder)
{ {
cmd = (code_t*)&builder->code.buf[builder->code.size]; cmd = (code_t*)&builder->code.buf[builder->code.size];
n = __build_atom (builder); n = build_atom (builder);
if (n == -1) if (n == -1)
{ {
builder->code.size = old_size; builder->code.size = old_size;
@ -511,7 +513,7 @@ static int __build_branch (builder_t* builder)
if (n == 0) break; /* no atom */ if (n == 0) break; /* no atom */
n = __build_occurrences (builder, cmd); n = build_occurrences (builder, cmd);
if (n == -1) if (n == -1)
{ {
builder->code.size = old_size; builder->code.size = old_size;
@ -531,7 +533,7 @@ static int __build_branch (builder_t* builder)
return (builder->code.size == old_size)? 0: 1; return (builder->code.size == old_size)? 0: 1;
} }
static int __build_atom (builder_t* builder) static int build_atom (builder_t* builder)
{ {
int n; int n;
code_t tmp; code_t tmp;
@ -550,7 +552,7 @@ static int __build_atom (builder_t* builder)
NEXT_CHAR (builder, LEVEL_TOP); NEXT_CHAR (builder, LEVEL_TOP);
n = __build_pattern (builder); n = build_pattern (builder);
if (n == -1) return -1; if (n == -1) return -1;
if (builder->ptn.curc.type != CT_SPECIAL || if (builder->ptn.curc.type != CT_SPECIAL ||
@ -598,7 +600,7 @@ static int __build_atom (builder_t* builder)
NEXT_CHAR (builder, LEVEL_CHARSET); NEXT_CHAR (builder, LEVEL_CHARSET);
n = __build_charset (builder, cmd); n = build_charset (builder, cmd);
if (n == -1) return -1; if (n == -1) return -1;
ASE_ASSERT (n != 0); ASE_ASSERT (n != 0);
@ -635,7 +637,7 @@ static int __build_atom (builder_t* builder)
} }
} }
static int __build_charset (builder_t* builder, code_t* cmd) static int build_charset (builder_t* builder, code_t* cmd)
{ {
ase_size_t zero = 0; ase_size_t zero = 0;
ase_size_t old_size; ase_size_t old_size;
@ -667,13 +669,14 @@ static int __build_charset (builder_t* builder, code_t* cmd)
builder->ptn.curc.type == CT_NORMAL && builder->ptn.curc.type == CT_NORMAL &&
builder->ptn.curc.value == ASE_T(':')) builder->ptn.curc.value == ASE_T(':'))
{ {
if (__build_cclass (builder, &c1) == -1) return -1; if (build_cclass (builder, &c1) == -1) return -1;
cc = cc | 1; cc = cc | 1;
} }
c2 = c1; c2 = c1;
if (builder->ptn.curc.type == CT_NORMAL && if (builder->ptn.curc.type == CT_NORMAL &&
builder->ptn.curc.value == ASE_T('-')) builder->ptn.curc.value == ASE_T('-') &&
builder->ptn.curc.escaped == ase_false)
{ {
NEXT_CHAR (builder, LEVEL_CHARSET); NEXT_CHAR (builder, LEVEL_CHARSET);
@ -686,7 +689,7 @@ static int __build_charset (builder_t* builder, code_t* cmd)
builder->ptn.curc.type == CT_NORMAL && builder->ptn.curc.type == CT_NORMAL &&
builder->ptn.curc.value == ASE_T(':')) builder->ptn.curc.value == ASE_T(':'))
{ {
if (__build_cclass (builder, &c2) == -1) if (build_cclass (builder, &c2) == -1)
{ {
return -1; return -1;
} }
@ -725,7 +728,7 @@ static int __build_charset (builder_t* builder, code_t* cmd)
/* invalid range */ /* invalid range */
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf ( ase_dprintf (
ASE_T("__build_charset: invalid character set range\n")); ASE_T("build_charset: invalid character set range\n"));
#endif #endif
builder->errnum = ASE_AWK_EREXCRANGE; builder->errnum = ASE_AWK_EREXCRANGE;
return -1; return -1;
@ -741,7 +744,7 @@ static int __build_charset (builder_t* builder, code_t* cmd)
return 1; return 1;
} }
static int __build_cclass (builder_t* builder, ase_char_t* cc) static int build_cclass (builder_t* builder, ase_char_t* cc)
{ {
const struct __char_class_t* ccp = __char_class; const struct __char_class_t* ccp = __char_class;
ase_size_t len = builder->ptn.end - builder->ptn.curp; ase_size_t len = builder->ptn.end - builder->ptn.curp;
@ -756,7 +759,7 @@ static int __build_cclass (builder_t* builder, ase_char_t* cc)
{ {
/* wrong class name */ /* wrong class name */
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf (ASE_T("__build_cclass: wrong class name\n")); ase_dprintf (ASE_T("build_cclass: wrong class name\n"));
#endif #endif
builder->errnum = ASE_AWK_EREXCCLASS; builder->errnum = ASE_AWK_EREXCCLASS;
return -1; return -1;
@ -769,7 +772,7 @@ static int __build_cclass (builder_t* builder, ase_char_t* cc)
builder->ptn.curc.value != ASE_T(':')) builder->ptn.curc.value != ASE_T(':'))
{ {
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf (ASE_T("__build_cclass: a colon(:) expected\n")); ase_dprintf (ASE_T("build_cclass: a colon(:) expected\n"));
#endif #endif
builder->errnum = ASE_AWK_EREXCOLON; builder->errnum = ASE_AWK_EREXCOLON;
return -1; return -1;
@ -782,7 +785,7 @@ static int __build_cclass (builder_t* builder, ase_char_t* cc)
builder->ptn.curc.value != ASE_T(']')) builder->ptn.curc.value != ASE_T(']'))
{ {
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf (ASE_T("__build_cclass: ] expected\n")); ase_dprintf (ASE_T("build_cclass: ] expected\n"));
#endif #endif
builder->errnum = ASE_AWK_EREXRBRACKET; builder->errnum = ASE_AWK_EREXRBRACKET;
return -1; return -1;
@ -794,7 +797,7 @@ static int __build_cclass (builder_t* builder, ase_char_t* cc)
return 1; return 1;
} }
static int __build_occurrences (builder_t* builder, code_t* cmd) static int build_occurrences (builder_t* builder, code_t* cmd)
{ {
if (builder->ptn.curc.type != CT_SPECIAL) return 0; if (builder->ptn.curc.type != CT_SPECIAL) return 0;
@ -828,7 +831,7 @@ static int __build_occurrences (builder_t* builder, code_t* cmd)
{ {
NEXT_CHAR (builder, LEVEL_RANGE); NEXT_CHAR (builder, LEVEL_RANGE);
if (__build_range(builder, cmd) == -1) return -1; if (build_range(builder, cmd) == -1) return -1;
if (builder->ptn.curc.type != CT_SPECIAL || if (builder->ptn.curc.type != CT_SPECIAL ||
builder->ptn.curc.value != ASE_T('}')) builder->ptn.curc.value != ASE_T('}'))
@ -845,7 +848,7 @@ static int __build_occurrences (builder_t* builder, code_t* cmd)
return 0; return 0;
} }
static int __build_range (builder_t* builder, code_t* cmd) static int build_range (builder_t* builder, code_t* cmd)
{ {
ase_size_t bound; ase_size_t bound;
@ -890,17 +893,19 @@ what if it is not in the raight format? convert it to ordinary characters?? */
return 0; return 0;
} }
static int __next_char (builder_t* builder, int level) static int next_char (builder_t* builder, int level)
{ {
if (builder->ptn.curp >= builder->ptn.end) if (builder->ptn.curp >= builder->ptn.end)
{ {
builder->ptn.curc.type = CT_EOF; builder->ptn.curc.type = CT_EOF;
builder->ptn.curc.value = ASE_T('\0'); builder->ptn.curc.value = ASE_T('\0');
builder->ptn.curc.escaped = ase_false;
return 0; return 0;
} }
builder->ptn.curc.type = CT_NORMAL; builder->ptn.curc.type = CT_NORMAL;
builder->ptn.curc.value = *builder->ptn.curp++; builder->ptn.curc.value = *builder->ptn.curp++;
builder->ptn.curc.escaped = ase_false;
if (builder->ptn.curc.value == ASE_T('\\')) if (builder->ptn.curc.value == ASE_T('\\'))
{ {
@ -911,6 +916,7 @@ static int __next_char (builder_t* builder, int level)
} }
builder->ptn.curc.value = *builder->ptn.curp++; builder->ptn.curc.value = *builder->ptn.curp++;
builder->ptn.curc.escaped = ase_true;
return 0; return 0;
} }
else else
@ -952,7 +958,7 @@ static int __next_char (builder_t* builder, int level)
return 0; return 0;
} }
static int __add_code (builder_t* builder, void* data, ase_size_t len) static int add_code (builder_t* builder, void* data, ase_size_t len)
{ {
if (len > builder->code.capa - builder->code.size) if (len > builder->code.capa - builder->code.size)
{ {
@ -1015,7 +1021,7 @@ static ase_bool_t __begin_with (
return ase_false; return ase_false;
} }
static const ase_byte_t* __match_pattern ( static const ase_byte_t* match_pattern (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
match_t mat2; match_t mat2;
@ -1028,7 +1034,7 @@ static const ase_byte_t* __match_pattern (
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf ( ase_dprintf (
ASE_T("__match_pattern: NB = %u, EL = %u\n"), ASE_T("match_pattern: NB = %u, EL = %u\n"),
(unsigned int)rhdr->nb, (unsigned int)rhdr->el); (unsigned int)rhdr->nb, (unsigned int)rhdr->el);
#endif #endif
@ -1039,7 +1045,7 @@ static const ase_byte_t* __match_pattern (
{ {
mat2.match_ptr = mat->match_ptr; mat2.match_ptr = mat->match_ptr;
p = __match_branch (matcher, p, &mat2); p = match_branch (matcher, p, &mat2);
if (p == ASE_NULL) return ASE_NULL; if (p == ASE_NULL) return ASE_NULL;
if (mat2.matched) if (mat2.matched)
@ -1053,7 +1059,7 @@ static const ase_byte_t* __match_pattern (
return base + rhdr->el; return base + rhdr->el;
} }
static const ase_byte_t* __match_branch ( static const ase_byte_t* match_branch (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
/* branch body base+sizeof(NA)+sizeof(BL)-----+ /* branch body base+sizeof(NA)+sizeof(BL)-----+
@ -1065,11 +1071,11 @@ static const ase_byte_t* __match_branch (
mat->branch = base; mat->branch = base;
mat->branch_end = base + ((bhdr_t*)base)->bl; mat->branch_end = base + ((bhdr_t*)base)->bl;
return __match_branch_body ( return match_branch_body (
matcher, (const ase_byte_t*)((bhdr_t*)base+1), mat); matcher, (const ase_byte_t*)((bhdr_t*)base+1), mat);
} }
static const ase_byte_t* __match_branch_body ( static const ase_byte_t* match_branch_body (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
const ase_byte_t* n; const ase_byte_t* n;
@ -1081,13 +1087,13 @@ static const ase_byte_t* __match_branch_body (
} }
matcher->depth.cur++; matcher->depth.cur++;
n = __match_branch_body0 (matcher, base, mat); n = match_branch_body0 (matcher, base, mat);
matcher->depth.cur--; matcher->depth.cur--;
return n; return n;
} }
static const ase_byte_t* __match_branch_body0 ( static const ase_byte_t* match_branch_body0 (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
const ase_byte_t* p; const ase_byte_t* p;
@ -1108,7 +1114,7 @@ static const ase_byte_t* __match_branch_body0 (
while (p < mat->branch_end) while (p < mat->branch_end)
{ {
p = __match_atom (matcher, p, mat); p = match_atom (matcher, p, mat);
if (p == ASE_NULL) return ASE_NULL; if (p == ASE_NULL) return ASE_NULL;
if (!mat->matched) break; if (!mat->matched) break;
@ -1116,7 +1122,7 @@ static const ase_byte_t* __match_branch_body0 (
mat->match_ptr = &mat->match_ptr[mat->match_len]; mat->match_ptr = &mat->match_ptr[mat->match_len];
match_len += mat->match_len; match_len += mat->match_len;
#if 0 #if 0
p = __match_atom (matcher, p, &mat2); p = match_atom (matcher, p, &mat2);
if (p == ASE_NULL) return ASE_NULL; if (p == ASE_NULL) return ASE_NULL;
if (!mat2.matched) if (!mat2.matched)
@ -1136,17 +1142,17 @@ static const ase_byte_t* __match_branch_body0 (
return mat->branch_end; return mat->branch_end;
} }
static const ase_byte_t* __match_atom ( static const ase_byte_t* match_atom (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
static atom_matcher_t matchers[] = static atom_matcher_t matchers[] =
{ {
__match_bol, match_bol,
__match_eol, match_eol,
__match_any_char, match_any_char,
__match_ord_char, match_ord_char,
__match_charset, match_charset,
__match_group match_group
}; };
ASE_ASSERT ( ASE_ASSERT (
@ -1156,7 +1162,7 @@ static const ase_byte_t* __match_atom (
return matchers[((code_t*)base)->cmd] (matcher, base, mat); return matchers[((code_t*)base)->cmd] (matcher, base, mat);
} }
static const ase_byte_t* __match_bol ( static const ase_byte_t* match_bol (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
const ase_byte_t* p = base; const ase_byte_t* p = base;
@ -1172,7 +1178,7 @@ static const ase_byte_t* __match_bol (
return p; return p;
} }
static const ase_byte_t* __match_eol ( static const ase_byte_t* match_eol (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
const ase_byte_t* p = base; const ase_byte_t* p = base;
@ -1188,7 +1194,7 @@ static const ase_byte_t* __match_eol (
return p; return p;
} }
static const ase_byte_t* __match_any_char ( static const ase_byte_t* match_any_char (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
const ase_byte_t* p = base; const ase_byte_t* p = base;
@ -1216,7 +1222,7 @@ static const ase_byte_t* __match_any_char (
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf ( ase_dprintf (
ASE_T("__match_any_char: lbound = %u, ubound = %u\n"), ASE_T("match_any_char: lbound = %u, ubound = %u\n"),
(unsigned int)lbound, (unsigned int)ubound); (unsigned int)lbound, (unsigned int)ubound);
#endif #endif
@ -1229,18 +1235,18 @@ static const ase_byte_t* __match_any_char (
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf ( ase_dprintf (
ASE_T("__match_any_char: max si = %u\n"), (unsigned)si); ASE_T("match_any_char: max si = %u\n"), (unsigned)si);
#endif #endif
if (si >= lbound && si <= ubound) if (si >= lbound && si <= ubound)
{ {
p = __match_occurrences (matcher, si, p, lbound, ubound, mat); p = match_occurrences (matcher, si, p, lbound, ubound, mat);
} }
return p; return p;
} }
static const ase_byte_t* __match_ord_char ( static const ase_byte_t* match_ord_char (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
const ase_byte_t* p = base; const ase_byte_t* p = base;
@ -1288,7 +1294,7 @@ static const ase_byte_t* __match_ord_char (
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf ( ase_dprintf (
ASE_T("__match_ord_char: cc = %c, lbound = %u, ubound = %u\n"), ASE_T("match_ord_char: cc = %c, lbound = %u, ubound = %u\n"),
cc, (unsigned int)lbound, (unsigned int)ubound); cc, (unsigned int)lbound, (unsigned int)ubound);
#endif #endif
@ -1303,7 +1309,7 @@ static const ase_byte_t* __match_ord_char (
if (&mat->match_ptr[si] >= matcher->match.str.end) break; if (&mat->match_ptr[si] >= matcher->match.str.end) break;
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf ( ase_dprintf (
ASE_T("__match_ord_char: <ignorecase> %c %c\n"), ASE_T("match_ord_char: <ignorecase> %c %c\n"),
cc, mat->match_ptr[si]); cc, mat->match_ptr[si]);
#endif #endif
if (cc != ASE_AWK_TOUPPER (matcher->awk, mat->match_ptr[si])) break; if (cc != ASE_AWK_TOUPPER (matcher->awk, mat->match_ptr[si])) break;
@ -1317,7 +1323,7 @@ static const ase_byte_t* __match_ord_char (
if (&mat->match_ptr[si] >= matcher->match.str.end) break; if (&mat->match_ptr[si] >= matcher->match.str.end) break;
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf ( ase_dprintf (
ASE_T("__match_ord_char: %c %c\n"), ASE_T("match_ord_char: %c %c\n"),
cc, mat->match_ptr[si]); cc, mat->match_ptr[si]);
#endif #endif
if (cc != mat->match_ptr[si]) break; if (cc != mat->match_ptr[si]) break;
@ -1327,19 +1333,19 @@ static const ase_byte_t* __match_ord_char (
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf ( ase_dprintf (
ASE_T("__match_ord_char: max occurrences=%u, lbound=%u, ubound=%u\n"), ASE_T("match_ord_char: max occurrences=%u, lbound=%u, ubound=%u\n"),
(unsigned)si, (unsigned)lbound, (unsigned)ubound); (unsigned)si, (unsigned)lbound, (unsigned)ubound);
#endif #endif
if (si >= lbound && si <= ubound) if (si >= lbound && si <= ubound)
{ {
p = __match_occurrences (matcher, si, p, lbound, ubound, mat); p = match_occurrences (matcher, si, p, lbound, ubound, mat);
} }
return p; return p;
} }
static const ase_byte_t* __match_charset ( static const ase_byte_t* match_charset (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
const ase_byte_t* p = base; const ase_byte_t* p = base;
@ -1357,7 +1363,7 @@ static const ase_byte_t* __match_charset (
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf ( ase_dprintf (
ASE_T("__match_charset: lbound = %u, ubound = %u\n"), ASE_T("match_charset: lbound = %u, ubound = %u\n"),
(unsigned int)cp->lbound, (unsigned int)cp->ubound); (unsigned int)cp->lbound, (unsigned int)cp->ubound);
#endif #endif
@ -1382,19 +1388,19 @@ static const ase_byte_t* __match_charset (
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf ( ase_dprintf (
ASE_T("__match_charset: max occurrences=%u, lbound=%u, ubound=%u\n"), ASE_T("match_charset: max occurrences=%u, lbound=%u, ubound=%u\n"),
(unsigned)si, (unsigned)cp->lbound, (unsigned)cp->ubound); (unsigned)si, (unsigned)cp->lbound, (unsigned)cp->ubound);
#endif #endif
if (si >= cp->lbound && si <= cp->ubound) if (si >= cp->lbound && si <= cp->ubound)
{ {
p = __match_occurrences (matcher, si, p, cp->lbound, cp->ubound, mat); p = match_occurrences (matcher, si, p, cp->lbound, cp->ubound, mat);
} }
return p; return p;
} }
static const ase_byte_t* __match_group ( static const ase_byte_t* match_group (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
const ase_byte_t* p = base; const ase_byte_t* p = base;
@ -1454,7 +1460,7 @@ static const ase_byte_t* __match_group (
{ {
if (mat2.match_ptr >= matcher->match.str.end) break; if (mat2.match_ptr >= matcher->match.str.end) break;
if (__match_pattern (matcher, p, &mat2) == ASE_NULL) if (match_pattern (matcher, p, &mat2) == ASE_NULL)
{ {
if (grp_len != grp_len_static) if (grp_len != grp_len_static)
ASE_AWK_FREE (matcher->awk, grp_len); ASE_AWK_FREE (matcher->awk, grp_len);
@ -1496,10 +1502,10 @@ static const ase_byte_t* __match_group (
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf ( ase_dprintf (
ASE_T("__match_group: GROUP si=%d [%s]\n"), ASE_T("match_group: GROUP si=%d [%s]\n"),
(unsigned)si, mat->match_ptr); (unsigned)si, mat->match_ptr);
#endif #endif
tmp = __match_branch_body (matcher, p, &mat2); tmp = match_branch_body (matcher, p, &mat2);
if (tmp == ASE_NULL) if (tmp == ASE_NULL)
{ {
if (grp_len != grp_len_static) if (grp_len != grp_len_static)
@ -1527,7 +1533,7 @@ static const ase_byte_t* __match_group (
return p; return p;
} }
static const ase_byte_t* __match_occurrences ( static const ase_byte_t* match_occurrences (
matcher_t* matcher, ase_size_t si, const ase_byte_t* p, matcher_t* matcher, ase_size_t si, const ase_byte_t* p,
ase_size_t lbound, ase_size_t ubound, match_t* mat) ase_size_t lbound, ase_size_t ubound, match_t* mat)
{ {
@ -1600,7 +1606,7 @@ static const ase_byte_t* __match_occurrences (
ASE_T("__match occurrences: si=%u [%s]\n"), ASE_T("__match occurrences: si=%u [%s]\n"),
(unsigned)si, mat->match_ptr); (unsigned)si, mat->match_ptr);
#endif #endif
tmp = __match_branch_body (matcher, p, &mat2); tmp = match_branch_body (matcher, p, &mat2);
if (mat2.matched) if (mat2.matched)
{ {
@ -1637,7 +1643,7 @@ static ase_bool_t __test_charset (
c1 = ASE_AWK_TOUPPER(matcher->awk, c1); c1 = ASE_AWK_TOUPPER(matcher->awk, c1);
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf ( ase_dprintf (
ASE_T("__match_charset: <one> %c %c\n"), c, c1); ASE_T("match_charset: <one> %c %c\n"), c, c1);
#endif #endif
if (c == c1) return ase_true; if (c == c1) return ase_true;
} }
@ -1654,7 +1660,7 @@ static ase_bool_t __test_charset (
} }
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf ( ase_dprintf (
ASE_T("__match_charset: <range> %c %c-%c\n"), c, c1, c2); ASE_T("match_charset: <range> %c %c-%c\n"), c, c1, c2);
#endif #endif
if (c >= c1 && c <= c2) return ase_true; if (c >= c1 && c <= c2) return ase_true;
} }
@ -1663,7 +1669,7 @@ static ase_bool_t __test_charset (
c1 = *(const ase_char_t*)p; c1 = *(const ase_char_t*)p;
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf ( ase_dprintf (
ASE_T("__match_charset: <class> %c %s\n"), ASE_T("match_charset: <class> %c %s\n"),
c, __char_class[c1].name); c, __char_class[c1].name);
#endif #endif
if (__char_class[c1].func ( if (__char_class[c1].func (