*** empty log message ***

This commit is contained in:
hyung-hwan 2007-03-01 07:43:54 +00:00
parent 6c7776f0c7
commit b139683413

View File

@ -1,5 +1,5 @@
/*
* $Id: rex.c,v 1.74 2007-03-01 04:31:27 bacon Exp $
* $Id: rex.c,v 1.75 2007-03-01 07:43:54 bacon Exp $
*
* {License}
*/
@ -58,6 +58,8 @@ typedef struct matcher_t matcher_t;
typedef struct match_t match_t;
typedef struct code_t code_t;
typedef struct rhdr_t rhdr_t;
typedef struct bhdr_t bhdr_t;
typedef struct cshdr_t cshdr_t;
struct builder_t
@ -136,6 +138,19 @@ ASE_BEGIN_PACKED_STRUCT (code_t)
ase_size_t ubound;
ASE_END_PACKED_STRUCT ()
/* compiled regular expression header */
ASE_BEGIN_PACKED_STRUCT (rhdr_t)
ase_size_t nb; /* number of branches */
ase_size_t el; /* expression length in bytes */
ASE_END_PACKED_STRUCT ()
/* branch header */
ASE_BEGIN_PACKED_STRUCT (bhdr_t)
ase_size_t na; /* number of atoms */
ase_size_t bl; /* branch length in bytes */
ASE_END_PACKED_STRUCT ()
/* character set header */
ASE_BEGIN_PACKED_STRUCT (cshdr_t)
ase_size_t csc; /* count */
ase_size_t csl; /* length */
@ -154,19 +169,6 @@ typedef const ase_byte_t* (*atom_matcher_t) (
#define ADD_CODE(rex,data,len) \
do { if (__add_code(rex,data,len) == -1) return -1; } while (0)
#if defined(__i386)||defined(__i386__)||defined(_M_IX86)||defined(__INTEL__)||defined(_X86_)||defined(__I86__)||defined(__THW_INTEL__)
#if !defined(__i386)
#define __i386
#endif
#define GET_CODE(rex,pos,type) (*((type*)&(rex)->code.buf[pos]))
#define SET_CODE(rex,pos,type,code) (GET_CODE(rex,pos,type) = (code))
#else
#define GET_CODE(rex,pos,type) __get_code(rex,pos)
#define SET_CODE(rex,pos,type,code) __set_code(rex,pos,code)
#endif
static int __build_pattern (builder_t* rex);
static int __build_pattern0 (builder_t* rex);
static int __build_branch (builder_t* rex);
@ -178,22 +180,6 @@ static int __build_range (builder_t* rex, code_t* cmd);
static int __next_char (builder_t* rex, int level);
static int __add_code (builder_t* rex, void* data, ase_size_t len);
#if !defined(__i386) && !defined(__i386__)
static ase_size_t __get_code (builder_t* builder, ase_size_t pos)
{
ase_size_t code;
ase_memcpy (&code, &builder->code.buf[pos], ASE_SIZEOF(code));
return code;
}
static void __set_code (builder_t* builder, ase_size_t pos, ase_size_t code)
{
ase_memcpy (&builder->code.buf[pos], &code, ASE_SIZEOF(code));
}
#endif
static ase_bool_t __begin_with (
const ase_char_t* str, ase_size_t len, const ase_char_t* what);
@ -415,31 +401,16 @@ void ase_awk_freerex (ase_awk_t* awk, void* code)
ase_bool_t ase_awk_isemptyrex (ase_awk_t* awk, void* code)
{
const ase_byte_t* p = code;
ase_size_t nb, el;
ASE_AWK_ASSERT (awk, p != ASE_NULL);
#if defined(__i386) || defined(__i386__)
nb = *(ase_size_t*)p;
#else
ase_memcpy (&nb, p, ASE_SIZEOF(nb));
#endif
p += ASE_SIZEOF(nb);
#if defined(__i386) || defined(__i386__)
el = *(ase_size_t*)p;
#else
ase_memcpy (&el, p, ASE_SIZEOF(el));
#endif
p += ASE_SIZEOF(el);
rhdr_t* rhdr = (rhdr_t*) code;
ASE_AWK_ASSERT (awk, rhdr != ASE_NULL);
/* an empty regular expression look like:
* | expression |
* | header | branch |
* | | branch header |
* | NB(1) | EL(16) | NA(1) | BL(8) | */
return (nb == 1 && el == ASE_SIZEOF(ase_size_t)*4)? ase_true: ase_false;
return (rhdr->nb == 1 &&
rhdr->el == ASE_SIZEOF(ase_size_t)*4)? ase_true: ase_false;
}
static int __build_pattern (builder_t* builder)
@ -464,9 +435,9 @@ static int __build_pattern0 (builder_t* builder)
ase_size_t zero = 0;
ase_size_t old_size;
ase_size_t pos_nb, pos_el;
rhdr_t* rhdr;
int n;
old_size = builder->code.size;
/* secure space for header and set the header fields to zero */
@ -485,9 +456,8 @@ static int __build_pattern0 (builder_t* builder)
return 0;
}
/*CODEAT(builder,pos_nb,ase_size_t) += 1;*/
SET_CODE (builder, pos_nb, ase_size_t,
GET_CODE (builder, pos_nb, ase_size_t) + 1);
rhdr = (rhdr_t*)&builder->code.buf[pos_nb];
rhdr->nb++;
/* handle subsequent branches if any */
while (builder->ptn.curc.type == CT_SPECIAL &&
@ -505,11 +475,13 @@ static int __build_pattern0 (builder_t* builder)
break;
}
SET_CODE (builder, pos_nb, ase_size_t,
GET_CODE (builder, pos_nb, ase_size_t) + 1);
rhdr = (rhdr_t*)&builder->code.buf[pos_nb];
rhdr->nb++;
}
SET_CODE (builder, pos_el, ase_size_t, builder->code.size - old_size);
rhdr = (rhdr_t*)&builder->code.buf[pos_nb];
rhdr->el = builder->code.size - old_size;
return 1;
}
@ -520,6 +492,7 @@ static int __build_branch (builder_t* builder)
ase_size_t old_size;
ase_size_t pos_na, pos_bl;
code_t* cmd;
bhdr_t* bhdr;
old_size = builder->code.size;
@ -552,11 +525,13 @@ static int __build_branch (builder_t* builder)
/* n == 0 no bound character. just continue */
/* n == 1 bound has been applied by build_occurrences */
SET_CODE (builder, pos_na, ase_size_t,
GET_CODE (builder, pos_na, ase_size_t) + 1);
bhdr = (bhdr_t*)&builder->code.buf[pos_na];
bhdr->na++;
}
SET_CODE (builder, pos_bl, ase_size_t, builder->code.size - old_size);
bhdr = (bhdr_t*)&builder->code.buf[pos_na];
bhdr->bl = builder->code.size - old_size;
return (builder->code.size == old_size)? 0: 1;
}
@ -670,6 +645,7 @@ static int __build_charset (builder_t* builder, code_t* cmd)
ase_size_t zero = 0;
ase_size_t old_size;
ase_size_t pos_csc, pos_csl;
cshdr_t* cshdr;
old_size = builder->code.size;
@ -762,12 +738,12 @@ static int __build_charset (builder_t* builder, code_t* cmd)
return -1;
}
/*CODEAT(builder,pos_csc,ase_size_t) += 1;*/
SET_CODE (builder, pos_csc, ase_size_t,
GET_CODE (builder, pos_csc, ase_size_t) + 1);
cshdr = (cshdr_t*)&builder->code.buf[pos_csc];
cshdr->csc++;
}
SET_CODE (builder, pos_csl, ase_size_t, builder->code.size - old_size);
cshdr = (cshdr_t*)&builder->code.buf[pos_csc];
cshdr->csl = builder->code.size - old_size;
return 1;
}
@ -1049,35 +1025,24 @@ static ase_bool_t __begin_with (
static const ase_byte_t* __match_pattern (
matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{
const ase_byte_t* p;
match_t mat2;
ase_size_t nb, el, i;
ase_size_t i;
const ase_byte_t* p;
rhdr_t* rhdr;
p = base;
#if defined(__i386) || defined(__i386__)
nb = *(ase_size_t*)p;
#else
ase_memcpy (&nb, p, ASE_SIZEOF(nb));
#endif
p += ASE_SIZEOF(nb);
#if defined(__i386) || defined(__i386__)
el = *(ase_size_t*)p;
#else
ase_memcpy (&el, p, ASE_SIZEOF(el));
#endif
p += ASE_SIZEOF(el);
rhdr = (rhdr_t*) p; p += ASE_SIZEOF(*rhdr);
#ifdef DEBUG_REX
ase_dprintf (
ASE_T("__match_pattern: NB = %u, EL = %u\n"),
(unsigned)nb, (unsigned)el);
(unsigned int)rhdr->nb, (unsigned int)rhdr->el);
#endif
mat->matched = ase_false;
mat->match_len = 0;
for (i = 0; i < nb; i++)
for (i = 0; i < rhdr->nb; i++)
{
mat2.match_ptr = mat->match_ptr;
@ -1092,24 +1057,23 @@ static const ase_byte_t* __match_pattern (
}
}
return base + el;
return base + rhdr->el;
}
static const ase_byte_t* __match_branch (
matcher_t* matcher, const ase_byte_t* base, match_t* mat)
{
/*
* branch body (base+sizeof(NA)+sizeof(BL)---+
* BL=base+sizeof(NA) ---------+ |
* base=NA ------+ | |
* | | |
/* branch body base+sizeof(NA)+sizeof(BL)-----+
* BL base+sizeof(NA) ----------+ |
* base NA ------+ | |
* | | |
* |NA(ase_size_t)|BL(ase_size_t)|ATOMS.........|
*/
mat->branch = base;
mat->branch_end = base + *((ase_size_t*)(base+ASE_SIZEOF(ase_size_t)));
mat->branch_end = base + ((bhdr_t*)base)->bl;
return __match_branch_body (
matcher, base+ASE_SIZEOF(ase_size_t)*2, mat);
matcher, (const ase_byte_t*)((bhdr_t*)base+1), mat);
}
static const ase_byte_t* __match_branch_body (
@ -1796,23 +1760,12 @@ void ase_awk_dprintrex (ase_awk_t* awk, void* rex)
static const ase_byte_t* __print_pattern (ase_awk_t* awk, const ase_byte_t* p)
{
ase_size_t nb, el, i;
ase_size_t i;
rhdr_t* rhdr;
#if defined(__i386) || defined(__i386__)
nb = *(ase_size_t*)p;
#else
ase_memcpy (&nb, p, ASE_SIZEOF(nb));
#endif
p += ASE_SIZEOF(nb);
rhdr = (rhdr_t*)p; p += ASE_SIZEOF(*rhdr);
#if defined(__i386) || defined(__i386__)
el = *(ase_size_t*)p;
#else
ase_memcpy (&el, p, ASE_SIZEOF(el));
#endif
p += ASE_SIZEOF(el);
for (i = 0; i < nb; i++)
for (i = 0; i < rhdr->nb; i++)
{
if (i != 0) DPRINTF (DCUSTOM, ASE_T("|"));
p = __print_branch (awk, p);