*** empty log message ***

This commit is contained in:
hyung-hwan 2007-03-01 07:43:54 +00:00
parent 6c7776f0c7
commit b139683413

View File

@ -1,5 +1,5 @@
/* /*
* $Id: rex.c,v 1.74 2007-03-01 04:31:27 bacon Exp $ * $Id: rex.c,v 1.75 2007-03-01 07:43:54 bacon Exp $
* *
* {License} * {License}
*/ */
@ -58,6 +58,8 @@ typedef struct matcher_t matcher_t;
typedef struct match_t match_t; typedef struct match_t match_t;
typedef struct code_t code_t; typedef struct code_t code_t;
typedef struct rhdr_t rhdr_t;
typedef struct bhdr_t bhdr_t;
typedef struct cshdr_t cshdr_t; typedef struct cshdr_t cshdr_t;
struct builder_t struct builder_t
@ -136,6 +138,19 @@ ASE_BEGIN_PACKED_STRUCT (code_t)
ase_size_t ubound; ase_size_t ubound;
ASE_END_PACKED_STRUCT () ASE_END_PACKED_STRUCT ()
/* compiled regular expression header */
ASE_BEGIN_PACKED_STRUCT (rhdr_t)
ase_size_t nb; /* number of branches */
ase_size_t el; /* expression length in bytes */
ASE_END_PACKED_STRUCT ()
/* branch header */
ASE_BEGIN_PACKED_STRUCT (bhdr_t)
ase_size_t na; /* number of atoms */
ase_size_t bl; /* branch length in bytes */
ASE_END_PACKED_STRUCT ()
/* character set header */
ASE_BEGIN_PACKED_STRUCT (cshdr_t) ASE_BEGIN_PACKED_STRUCT (cshdr_t)
ase_size_t csc; /* count */ ase_size_t csc; /* count */
ase_size_t csl; /* length */ ase_size_t csl; /* length */
@ -154,19 +169,6 @@ typedef const ase_byte_t* (*atom_matcher_t) (
#define ADD_CODE(rex,data,len) \ #define ADD_CODE(rex,data,len) \
do { if (__add_code(rex,data,len) == -1) return -1; } while (0) do { if (__add_code(rex,data,len) == -1) return -1; } while (0)
#if defined(__i386)||defined(__i386__)||defined(_M_IX86)||defined(__INTEL__)||defined(_X86_)||defined(__I86__)||defined(__THW_INTEL__)
#if !defined(__i386)
#define __i386
#endif
#define GET_CODE(rex,pos,type) (*((type*)&(rex)->code.buf[pos]))
#define SET_CODE(rex,pos,type,code) (GET_CODE(rex,pos,type) = (code))
#else
#define GET_CODE(rex,pos,type) __get_code(rex,pos)
#define SET_CODE(rex,pos,type,code) __set_code(rex,pos,code)
#endif
static int __build_pattern (builder_t* rex); static int __build_pattern (builder_t* rex);
static int __build_pattern0 (builder_t* rex); static int __build_pattern0 (builder_t* rex);
static int __build_branch (builder_t* rex); static int __build_branch (builder_t* rex);
@ -178,22 +180,6 @@ static int __build_range (builder_t* rex, code_t* cmd);
static int __next_char (builder_t* rex, int level); static int __next_char (builder_t* rex, int level);
static int __add_code (builder_t* rex, void* data, ase_size_t len); static int __add_code (builder_t* rex, void* data, ase_size_t len);
#if !defined(__i386) && !defined(__i386__)
static ase_size_t __get_code (builder_t* builder, ase_size_t pos)
{
ase_size_t code;
ase_memcpy (&code, &builder->code.buf[pos], ASE_SIZEOF(code));
return code;
}
static void __set_code (builder_t* builder, ase_size_t pos, ase_size_t code)
{
ase_memcpy (&builder->code.buf[pos], &code, ASE_SIZEOF(code));
}
#endif
static ase_bool_t __begin_with ( static ase_bool_t __begin_with (
const ase_char_t* str, ase_size_t len, const ase_char_t* what); const ase_char_t* str, ase_size_t len, const ase_char_t* what);
@ -415,31 +401,16 @@ void ase_awk_freerex (ase_awk_t* awk, void* code)
ase_bool_t ase_awk_isemptyrex (ase_awk_t* awk, void* code) ase_bool_t ase_awk_isemptyrex (ase_awk_t* awk, void* code)
{ {
const ase_byte_t* p = code; rhdr_t* rhdr = (rhdr_t*) code;
ase_size_t nb, el; ASE_AWK_ASSERT (awk, rhdr != ASE_NULL);
ASE_AWK_ASSERT (awk, p != ASE_NULL);
#if defined(__i386) || defined(__i386__)
nb = *(ase_size_t*)p;
#else
ase_memcpy (&nb, p, ASE_SIZEOF(nb));
#endif
p += ASE_SIZEOF(nb);
#if defined(__i386) || defined(__i386__)
el = *(ase_size_t*)p;
#else
ase_memcpy (&el, p, ASE_SIZEOF(el));
#endif
p += ASE_SIZEOF(el);
/* an empty regular expression look like: /* an empty regular expression look like:
* | expression | * | expression |
* | header | branch | * | header | branch |
* | | branch header | * | | branch header |
* | NB(1) | EL(16) | NA(1) | BL(8) | */ * | NB(1) | EL(16) | NA(1) | BL(8) | */
return (nb == 1 && el == ASE_SIZEOF(ase_size_t)*4)? ase_true: ase_false; return (rhdr->nb == 1 &&
rhdr->el == ASE_SIZEOF(ase_size_t)*4)? ase_true: ase_false;
} }
static int __build_pattern (builder_t* builder) static int __build_pattern (builder_t* builder)
@ -464,9 +435,9 @@ static int __build_pattern0 (builder_t* builder)
ase_size_t zero = 0; ase_size_t zero = 0;
ase_size_t old_size; ase_size_t old_size;
ase_size_t pos_nb, pos_el; ase_size_t pos_nb, pos_el;
rhdr_t* rhdr;
int n; int n;
old_size = builder->code.size; old_size = builder->code.size;
/* secure space for header and set the header fields to zero */ /* secure space for header and set the header fields to zero */
@ -485,9 +456,8 @@ static int __build_pattern0 (builder_t* builder)
return 0; return 0;
} }
/*CODEAT(builder,pos_nb,ase_size_t) += 1;*/ rhdr = (rhdr_t*)&builder->code.buf[pos_nb];
SET_CODE (builder, pos_nb, ase_size_t, rhdr->nb++;
GET_CODE (builder, pos_nb, ase_size_t) + 1);
/* handle subsequent branches if any */ /* handle subsequent branches if any */
while (builder->ptn.curc.type == CT_SPECIAL && while (builder->ptn.curc.type == CT_SPECIAL &&
@ -505,11 +475,13 @@ static int __build_pattern0 (builder_t* builder)
break; break;
} }
SET_CODE (builder, pos_nb, ase_size_t, rhdr = (rhdr_t*)&builder->code.buf[pos_nb];
GET_CODE (builder, pos_nb, ase_size_t) + 1); rhdr->nb++;
} }
SET_CODE (builder, pos_el, ase_size_t, builder->code.size - old_size); rhdr = (rhdr_t*)&builder->code.buf[pos_nb];
rhdr->el = builder->code.size - old_size;
return 1; return 1;
} }
@ -520,6 +492,7 @@ static int __build_branch (builder_t* builder)
ase_size_t old_size; ase_size_t old_size;
ase_size_t pos_na, pos_bl; ase_size_t pos_na, pos_bl;
code_t* cmd; code_t* cmd;
bhdr_t* bhdr;
old_size = builder->code.size; old_size = builder->code.size;
@ -552,11 +525,13 @@ static int __build_branch (builder_t* builder)
/* n == 0 no bound character. just continue */ /* n == 0 no bound character. just continue */
/* n == 1 bound has been applied by build_occurrences */ /* n == 1 bound has been applied by build_occurrences */
SET_CODE (builder, pos_na, ase_size_t, bhdr = (bhdr_t*)&builder->code.buf[pos_na];
GET_CODE (builder, pos_na, ase_size_t) + 1); bhdr->na++;
} }
SET_CODE (builder, pos_bl, ase_size_t, builder->code.size - old_size); bhdr = (bhdr_t*)&builder->code.buf[pos_na];
bhdr->bl = builder->code.size - old_size;
return (builder->code.size == old_size)? 0: 1; return (builder->code.size == old_size)? 0: 1;
} }
@ -670,6 +645,7 @@ static int __build_charset (builder_t* builder, code_t* cmd)
ase_size_t zero = 0; ase_size_t zero = 0;
ase_size_t old_size; ase_size_t old_size;
ase_size_t pos_csc, pos_csl; ase_size_t pos_csc, pos_csl;
cshdr_t* cshdr;
old_size = builder->code.size; old_size = builder->code.size;
@ -762,12 +738,12 @@ static int __build_charset (builder_t* builder, code_t* cmd)
return -1; return -1;
} }
/*CODEAT(builder,pos_csc,ase_size_t) += 1;*/ cshdr = (cshdr_t*)&builder->code.buf[pos_csc];
SET_CODE (builder, pos_csc, ase_size_t, cshdr->csc++;
GET_CODE (builder, pos_csc, ase_size_t) + 1);
} }
SET_CODE (builder, pos_csl, ase_size_t, builder->code.size - old_size); cshdr = (cshdr_t*)&builder->code.buf[pos_csc];
cshdr->csl = builder->code.size - old_size;
return 1; return 1;
} }
@ -1049,35 +1025,24 @@ static ase_bool_t __begin_with (
static const ase_byte_t* __match_pattern ( static const ase_byte_t* __match_pattern (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
const ase_byte_t* p;
match_t mat2; match_t mat2;
ase_size_t nb, el, i; ase_size_t i;
const ase_byte_t* p;
rhdr_t* rhdr;
p = base; p = base;
#if defined(__i386) || defined(__i386__) rhdr = (rhdr_t*) p; p += ASE_SIZEOF(*rhdr);
nb = *(ase_size_t*)p;
#else
ase_memcpy (&nb, p, ASE_SIZEOF(nb));
#endif
p += ASE_SIZEOF(nb);
#if defined(__i386) || defined(__i386__)
el = *(ase_size_t*)p;
#else
ase_memcpy (&el, p, ASE_SIZEOF(el));
#endif
p += ASE_SIZEOF(el);
#ifdef DEBUG_REX #ifdef DEBUG_REX
ase_dprintf ( ase_dprintf (
ASE_T("__match_pattern: NB = %u, EL = %u\n"), ASE_T("__match_pattern: NB = %u, EL = %u\n"),
(unsigned)nb, (unsigned)el); (unsigned int)rhdr->nb, (unsigned int)rhdr->el);
#endif #endif
mat->matched = ase_false; mat->matched = ase_false;
mat->match_len = 0; mat->match_len = 0;
for (i = 0; i < nb; i++) for (i = 0; i < rhdr->nb; i++)
{ {
mat2.match_ptr = mat->match_ptr; mat2.match_ptr = mat->match_ptr;
@ -1092,24 +1057,23 @@ static const ase_byte_t* __match_pattern (
} }
} }
return base + el; return base + rhdr->el;
} }
static const ase_byte_t* __match_branch ( static const ase_byte_t* __match_branch (
matcher_t* matcher, const ase_byte_t* base, match_t* mat) matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{ {
/* /* branch body base+sizeof(NA)+sizeof(BL)-----+
* branch body (base+sizeof(NA)+sizeof(BL)---+ * BL base+sizeof(NA) ----------+ |
* BL=base+sizeof(NA) ---------+ | * base NA ------+ | |
* base=NA ------+ | | * | | |
* | | |
* |NA(ase_size_t)|BL(ase_size_t)|ATOMS.........| * |NA(ase_size_t)|BL(ase_size_t)|ATOMS.........|
*/ */
mat->branch = base; mat->branch = base;
mat->branch_end = base + *((ase_size_t*)(base+ASE_SIZEOF(ase_size_t))); mat->branch_end = base + ((bhdr_t*)base)->bl;
return __match_branch_body ( return __match_branch_body (
matcher, base+ASE_SIZEOF(ase_size_t)*2, mat); matcher, (const ase_byte_t*)((bhdr_t*)base+1), mat);
} }
static const ase_byte_t* __match_branch_body ( static const ase_byte_t* __match_branch_body (
@ -1796,23 +1760,12 @@ void ase_awk_dprintrex (ase_awk_t* awk, void* rex)
static const ase_byte_t* __print_pattern (ase_awk_t* awk, const ase_byte_t* p) static const ase_byte_t* __print_pattern (ase_awk_t* awk, const ase_byte_t* p)
{ {
ase_size_t nb, el, i; ase_size_t i;
rhdr_t* rhdr;
#if defined(__i386) || defined(__i386__) rhdr = (rhdr_t*)p; p += ASE_SIZEOF(*rhdr);
nb = *(ase_size_t*)p;
#else
ase_memcpy (&nb, p, ASE_SIZEOF(nb));
#endif
p += ASE_SIZEOF(nb);
#if defined(__i386) || defined(__i386__) for (i = 0; i < rhdr->nb; i++)
el = *(ase_size_t*)p;
#else
ase_memcpy (&el, p, ASE_SIZEOF(el));
#endif
p += ASE_SIZEOF(el);
for (i = 0; i < nb; i++)
{ {
if (i != 0) DPRINTF (DCUSTOM, ASE_T("|")); if (i != 0) DPRINTF (DCUSTOM, ASE_T("|"));
p = __print_branch (awk, p); p = __print_branch (awk, p);