*** empty log message ***
This commit is contained in:
		
							
								
								
									
										174
									
								
								ase/awk/rex.c
									
									
									
									
									
								
							
							
						
						
									
										174
									
								
								ase/awk/rex.c
									
									
									
									
									
								
							| @ -1,5 +1,5 @@ | ||||
| /* | ||||
|  * $Id: rex.c,v 1.73 2007-02-28 14:46:08 bacon Exp $ | ||||
|  * $Id: rex.c,v 1.74 2007-03-01 04:31:27 bacon Exp $ | ||||
|  * | ||||
|  * {License} | ||||
|  */ | ||||
| @ -56,19 +56,12 @@ enum | ||||
| typedef struct builder_t builder_t; | ||||
| typedef struct matcher_t matcher_t; | ||||
| typedef struct match_t match_t; | ||||
|  | ||||
| typedef struct code_t code_t; | ||||
| typedef struct cshdr_t cshdr_t; | ||||
|  | ||||
| #include <ase/pack.h> | ||||
|  | ||||
| ASE_BEGIN_PACKED_STRUCT (code_t) | ||||
| 	/*ase_byte_t cmd;*/ | ||||
| 	short cmd; | ||||
| 	short negate; /* only for CMD_CHARSET */ | ||||
| 	ase_size_t lbound; | ||||
| 	ase_size_t ubound; | ||||
| ASE_END_PACKED_STRUCT () | ||||
|  | ||||
| ASE_BEGIN_PACKED_STRUCT (builder_t) | ||||
| struct builder_t | ||||
| { | ||||
| 	ase_awk_t* awk; | ||||
|  | ||||
| 	struct | ||||
| @ -97,9 +90,10 @@ ASE_BEGIN_PACKED_STRUCT (builder_t) | ||||
| 	} depth; | ||||
|  | ||||
| 	int errnum; | ||||
| ASE_END_PACKED_STRUCT () | ||||
| }; | ||||
|  | ||||
| ASE_BEGIN_PACKED_STRUCT (matcher_t) | ||||
| struct matcher_t | ||||
| { | ||||
| 	ase_awk_t* awk; | ||||
|  | ||||
| 	struct | ||||
| @ -119,9 +113,10 @@ ASE_BEGIN_PACKED_STRUCT (matcher_t) | ||||
|  | ||||
| 	int ignorecase; | ||||
| 	int errnum; | ||||
| ASE_END_PACKED_STRUCT () | ||||
| }; | ||||
|  | ||||
| ASE_BEGIN_PACKED_STRUCT (match_t) | ||||
| struct match_t | ||||
| { | ||||
| 	const ase_char_t* match_ptr; | ||||
|  | ||||
| 	ase_bool_t matched; | ||||
| @ -129,11 +124,21 @@ ASE_BEGIN_PACKED_STRUCT (match_t) | ||||
|  | ||||
| 	const ase_byte_t* branch; | ||||
| 	const ase_byte_t* branch_end; | ||||
| }; | ||||
|  | ||||
| #include <ase/pack.h> | ||||
|  | ||||
| ASE_BEGIN_PACKED_STRUCT (code_t) | ||||
| 	/*ase_byte_t cmd;*/ | ||||
| 	short cmd; | ||||
| 	short negate; /* only for CMD_CHARSET */ | ||||
| 	ase_size_t lbound; | ||||
| 	ase_size_t ubound; | ||||
| ASE_END_PACKED_STRUCT () | ||||
|  | ||||
| ASE_BEGIN_PACKED_STRUCT (cshdr_t) | ||||
| 	ase_size_t csc; | ||||
| 	ase_size_t csl; | ||||
| 	ase_size_t csc; /* count */ | ||||
| 	ase_size_t csl; /* length */ | ||||
| ASE_END_PACKED_STRUCT () | ||||
|  | ||||
| #include <ase/unpack.h> | ||||
| @ -166,10 +171,10 @@ static int __build_pattern (builder_t* rex); | ||||
| static int __build_pattern0 (builder_t* rex); | ||||
| static int __build_branch (builder_t* rex); | ||||
| static int __build_atom (builder_t* rex); | ||||
| static int __build_charset (builder_t* rex, struct code_t* cmd); | ||||
| static int __build_occurrences (builder_t* rex, struct code_t* cmd); | ||||
| static int __build_charset (builder_t* rex, code_t* cmd); | ||||
| static int __build_occurrences (builder_t* rex, code_t* cmd); | ||||
| static int __build_cclass (builder_t* rex, ase_char_t* cc); | ||||
| static int __build_range (builder_t* rex, struct code_t* cmd); | ||||
| static int __build_range (builder_t* rex, code_t* cmd); | ||||
| static int __next_char (builder_t* rex, int level); | ||||
| static int __add_code (builder_t* rex, void* data, ase_size_t len); | ||||
|  | ||||
| @ -514,7 +519,7 @@ static int __build_branch (builder_t* builder) | ||||
| 	ase_size_t zero = 0; | ||||
| 	ase_size_t old_size; | ||||
| 	ase_size_t pos_na, pos_bl; | ||||
| 	struct code_t* cmd; | ||||
| 	code_t* cmd; | ||||
|  | ||||
| 	old_size = builder->code.size; | ||||
|  | ||||
| @ -526,7 +531,7 @@ static int __build_branch (builder_t* builder) | ||||
|  | ||||
| 	while (1) | ||||
| 	{ | ||||
| 		cmd = (struct code_t*)&builder->code.buf[builder->code.size]; | ||||
| 		cmd = (code_t*)&builder->code.buf[builder->code.size]; | ||||
|  | ||||
| 		n = __build_atom (builder); | ||||
| 		if (n == -1)  | ||||
| @ -558,7 +563,7 @@ static int __build_branch (builder_t* builder) | ||||
| static int __build_atom (builder_t* builder) | ||||
| { | ||||
| 	int n; | ||||
| 	struct code_t tmp; | ||||
| 	code_t tmp; | ||||
|  | ||||
| 	if (builder->ptn.curc.type == CT_EOF) return 0; | ||||
|  | ||||
| @ -610,10 +615,9 @@ static int __build_atom (builder_t* builder) | ||||
| 		} | ||||
| 		else if (builder->ptn.curc.value == ASE_T('[')) | ||||
| 		{ | ||||
| 			struct code_t* cmd; | ||||
| 			code_t* cmd; | ||||
|  | ||||
| 			cmd = (struct code_t*) | ||||
| 				&builder->code.buf[builder->code.size]; | ||||
| 			cmd = (code_t*)&builder->code.buf[builder->code.size]; | ||||
|  | ||||
| 			tmp.cmd = CMD_CHARSET; | ||||
| 			tmp.negate = 0; | ||||
| @ -661,7 +665,7 @@ static int __build_atom (builder_t* builder) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| static int __build_charset (builder_t* builder, struct code_t* cmd) | ||||
| static int __build_charset (builder_t* builder, code_t* cmd) | ||||
| { | ||||
| 	ase_size_t zero = 0; | ||||
| 	ase_size_t old_size; | ||||
| @ -821,7 +825,7 @@ static int __build_cclass (builder_t* builder, ase_char_t* cc) | ||||
| 	return 1; | ||||
| } | ||||
|  | ||||
| static int __build_occurrences (builder_t* builder, struct code_t* cmd) | ||||
| static int __build_occurrences (builder_t* builder, code_t* cmd) | ||||
| { | ||||
| 	if (builder->ptn.curc.type != CT_SPECIAL) return 0; | ||||
|  | ||||
| @ -872,7 +876,7 @@ static int __build_occurrences (builder_t* builder, struct code_t* cmd) | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| static int __build_range (builder_t* builder, struct code_t* cmd) | ||||
| static int __build_range (builder_t* builder, code_t* cmd) | ||||
| { | ||||
| 	ase_size_t bound; | ||||
|  | ||||
| @ -1189,19 +1193,19 @@ static const ase_byte_t* __match_atom ( | ||||
| 	}; | ||||
|         | ||||
| 	ASE_AWK_ASSERT (matcher->awk,  | ||||
| 		((struct code_t*)base)->cmd >= 0 &&  | ||||
| 		((struct code_t*)base)->cmd < ASE_COUNTOF(matchers)); | ||||
| 		((code_t*)base)->cmd >= 0 &&  | ||||
| 		((code_t*)base)->cmd < ASE_COUNTOF(matchers)); | ||||
|  | ||||
| 	return matchers[((struct code_t*)base)->cmd] (matcher, base, mat); | ||||
| 	return matchers[((code_t*)base)->cmd] (matcher, base, mat); | ||||
| } | ||||
|  | ||||
| static const ase_byte_t* __match_bol ( | ||||
| 	matcher_t* matcher, const ase_byte_t* base, match_t* mat) | ||||
| { | ||||
| 	const ase_byte_t* p = base; | ||||
| 	const struct code_t* cp; | ||||
| 	const code_t* cp; | ||||
|  | ||||
| 	cp = (const struct code_t*)p; p += ASE_SIZEOF(*cp); | ||||
| 	cp = (const code_t*)p; p += ASE_SIZEOF(*cp); | ||||
| 	ASE_AWK_ASSERT (matcher->awk, cp->cmd == CMD_BOL); | ||||
|  | ||||
| 	mat->matched = (mat->match_ptr == matcher->match.str.ptr || | ||||
| @ -1215,9 +1219,9 @@ static const ase_byte_t* __match_eol ( | ||||
| 	matcher_t* matcher, const ase_byte_t* base, match_t* mat) | ||||
| { | ||||
| 	const ase_byte_t* p = base; | ||||
| 	const struct code_t* cp; | ||||
| 	const code_t* cp; | ||||
|  | ||||
| 	cp = (const struct code_t*)p; p += ASE_SIZEOF(*cp); | ||||
| 	cp = (const code_t*)p; p += ASE_SIZEOF(*cp); | ||||
| 	ASE_AWK_ASSERT (matcher->awk, cp->cmd == CMD_EOL); | ||||
|  | ||||
| 	mat->matched = (mat->match_ptr == matcher->match.str.end || | ||||
| @ -1231,10 +1235,10 @@ static const ase_byte_t* __match_any_char ( | ||||
| 	matcher_t* matcher, const ase_byte_t* base, match_t* mat) | ||||
| { | ||||
| 	const ase_byte_t* p = base; | ||||
| 	const struct code_t* cp; | ||||
| 	const code_t* cp; | ||||
| 	ase_size_t si = 0, lbound, ubound; | ||||
|  | ||||
| 	cp = (const struct code_t*)p; p += ASE_SIZEOF(*cp); | ||||
| 	cp = (const code_t*)p; p += ASE_SIZEOF(*cp); | ||||
| 	ASE_AWK_ASSERT (matcher->awk, cp->cmd == CMD_ANY_CHAR); | ||||
|  | ||||
| 	lbound = cp->lbound; | ||||
| @ -1245,10 +1249,10 @@ static const ase_byte_t* __match_any_char ( | ||||
|  | ||||
| 	/* merge the same consecutive codes */ | ||||
| 	while (p < mat->branch_end && | ||||
| 	       cp->cmd == ((const struct code_t*)p)->cmd) | ||||
| 	       cp->cmd == ((const code_t*)p)->cmd) | ||||
| 	{ | ||||
| 		lbound += ((const struct code_t*)p)->lbound; | ||||
| 		ubound += ((const struct code_t*)p)->ubound; | ||||
| 		lbound += ((const code_t*)p)->lbound; | ||||
| 		ubound += ((const code_t*)p)->ubound; | ||||
|  | ||||
| 		p += ASE_SIZEOF(*cp); | ||||
| 	} | ||||
| @ -1283,11 +1287,11 @@ static const ase_byte_t* __match_ord_char ( | ||||
| 	matcher_t* matcher, const ase_byte_t* base, match_t* mat) | ||||
| { | ||||
| 	const ase_byte_t* p = base; | ||||
| 	const struct code_t* cp; | ||||
| 	const code_t* cp; | ||||
| 	ase_size_t si = 0, lbound, ubound; | ||||
| 	ase_char_t cc; | ||||
|  | ||||
| 	cp = (const struct code_t*)p; p += ASE_SIZEOF(*cp); | ||||
| 	cp = (const code_t*)p; p += ASE_SIZEOF(*cp); | ||||
| 	ASE_AWK_ASSERT (matcher->awk, cp->cmd == CMD_ORD_CHAR); | ||||
|  | ||||
| 	lbound = cp->lbound;  | ||||
| @ -1301,12 +1305,12 @@ static const ase_byte_t* __match_ord_char ( | ||||
| 	if (matcher->ignorecase)  | ||||
| 	{ | ||||
| 		while (p < mat->branch_end && | ||||
| 		       cp->cmd == ((const struct code_t*)p)->cmd) | ||||
| 		       cp->cmd == ((const code_t*)p)->cmd) | ||||
| 		{ | ||||
| 			if (ASE_AWK_TOUPPER (matcher->awk, *(ase_char_t*)(p+ASE_SIZEOF(*cp))) != cc) break; | ||||
|  | ||||
| 			lbound += ((const struct code_t*)p)->lbound; | ||||
| 			ubound += ((const struct code_t*)p)->ubound; | ||||
| 			lbound += ((const code_t*)p)->lbound; | ||||
| 			ubound += ((const code_t*)p)->ubound; | ||||
|  | ||||
| 			p += ASE_SIZEOF(*cp) + ASE_SIZEOF(cc); | ||||
| 		} | ||||
| @ -1314,12 +1318,12 @@ static const ase_byte_t* __match_ord_char ( | ||||
| 	else | ||||
| 	{ | ||||
| 		while (p < mat->branch_end && | ||||
| 		       cp->cmd == ((const struct code_t*)p)->cmd) | ||||
| 		       cp->cmd == ((const code_t*)p)->cmd) | ||||
| 		{ | ||||
| 			if (*(ase_char_t*)(p+ASE_SIZEOF(*cp)) != cc) break; | ||||
|  | ||||
| 			lbound += ((const struct code_t*)p)->lbound; | ||||
| 			ubound += ((const struct code_t*)p)->ubound; | ||||
| 			lbound += ((const code_t*)p)->lbound; | ||||
| 			ubound += ((const code_t*)p)->ubound; | ||||
|  | ||||
| 			p += ASE_SIZEOF(*cp) + ASE_SIZEOF(cc); | ||||
| 		} | ||||
| @ -1382,73 +1386,52 @@ static const ase_byte_t* __match_charset ( | ||||
| 	matcher_t* matcher, const ase_byte_t* base, match_t* mat) | ||||
| { | ||||
| 	const ase_byte_t* p = base; | ||||
| 	const struct code_t* cp; | ||||
| 	ase_size_t si = 0, lbound, ubound, csc, csl; | ||||
| 	ase_size_t si = 0; | ||||
| 	ase_bool_t n; | ||||
| 	ase_char_t c; | ||||
|  | ||||
| 	code_t* cp; | ||||
| 	cshdr_t* cshdr; | ||||
|  | ||||
| 	cp = (const struct code_t*)p; p += ASE_SIZEOF(*cp); | ||||
| 	cp = (code_t*)p; p += ASE_SIZEOF(*cp); | ||||
| 	ASE_AWK_ASSERT (matcher->awk, cp->cmd == CMD_CHARSET); | ||||
|  | ||||
| 	lbound = cp->lbound; | ||||
| 	ubound = cp->ubound; | ||||
|  | ||||
| cshdr = (cshdr_t*)p; | ||||
| csc = cshdr->csc; | ||||
| csl = cshdr->csl; | ||||
| p += ASE_SIZEOF(*cshdr); | ||||
|  | ||||
| /* | ||||
| #if defined(__i386) || defined(__i386__) | ||||
| 	csc = *(ase_size_t*)p; | ||||
| #else | ||||
| 	ase_memcpy (&csc, p, ASE_SIZEOF(csc)); | ||||
| #endif | ||||
| 	p += ASE_SIZEOF(csc); | ||||
|  | ||||
| #if defined(__i386) || defined(__i386__) | ||||
| 	csl = *(ase_size_t*)p;  | ||||
| #else | ||||
| 	ase_memcpy (&csl, p, ASE_SIZEOF(csl)); | ||||
| #endif | ||||
| 	p += ASE_SIZEOF(csl); | ||||
| */ | ||||
| 	cshdr = (cshdr_t*)p; p += ASE_SIZEOF(*cshdr); | ||||
|  | ||||
| #ifdef DEBUG_REX | ||||
| 	ase_dprintf ( | ||||
| 		ASE_T("__match_charset: lbound = %u, ubound = %u\n"),  | ||||
| 		(unsigned int)lbound, (unsigned int)ubound); | ||||
| 		(unsigned int)cp->lbound, (unsigned int)cp->ubound); | ||||
| #endif | ||||
|  | ||||
| 	mat->matched = ase_false; | ||||
| 	mat->match_len = 0; | ||||
|  | ||||
| 	while (si < ubound) | ||||
| 	while (si < cp->ubound) | ||||
| 	{ | ||||
| 		if (&mat->match_ptr[si] >= matcher->match.str.end) break; | ||||
|  | ||||
| 		c = mat->match_ptr[si]; | ||||
| 		if (matcher->ignorecase) c = ASE_AWK_TOUPPER(matcher->awk, c); | ||||
|  | ||||
| 		n = __test_charset (matcher, p, csc, c); | ||||
| 		n = __test_charset (matcher, p, cshdr->csc, c); | ||||
| 		if (cp->negate) n = !n; | ||||
| 		if (!n) break; | ||||
|  | ||||
| 		si++; | ||||
| 	} | ||||
|  | ||||
| 	p = p + csl - (ASE_SIZEOF(csc) + ASE_SIZEOF(csl)); | ||||
| 	p = p + cshdr->csl - ASE_SIZEOF(*cshdr); | ||||
|  | ||||
| #ifdef DEBUG_REX | ||||
| 	ase_dprintf ( | ||||
| 		ASE_T("__match_charset: max occurrences=%u, lbound=%u, ubound=%u\n"),  | ||||
| 		(unsigned)si, (unsigned)lbound, (unsigned)ubound); | ||||
| 		(unsigned)si, (unsigned)cp->lbound, (unsigned)cp->ubound); | ||||
| #endif | ||||
| 	if (si >= lbound && si <= ubound) | ||||
|  | ||||
| 	if (si >= cp->lbound && si <= cp->ubound) | ||||
| 	{ | ||||
| 		p = __match_occurrences (matcher, si, p, lbound, ubound, mat); | ||||
| 		p = __match_occurrences (matcher, si, p, cp->lbound, cp->ubound, mat); | ||||
| 	} | ||||
|  | ||||
| 	return p; | ||||
| @ -1458,11 +1441,11 @@ static const ase_byte_t* __match_group ( | ||||
| 	matcher_t* matcher, const ase_byte_t* base, match_t* mat) | ||||
| { | ||||
| 	const ase_byte_t* p = base; | ||||
| 	const struct code_t* cp; | ||||
| 	const code_t* cp; | ||||
| 	match_t mat2; | ||||
| 	ase_size_t si = 0, grp_len_static[16], * grp_len; | ||||
|  | ||||
| 	cp = (const struct code_t*)p; p += ASE_SIZEOF(*cp); | ||||
| 	cp = (const code_t*)p; p += ASE_SIZEOF(*cp); | ||||
| 	ASE_AWK_ASSERT (matcher->awk, cp->cmd == CMD_GROUP); | ||||
|  | ||||
| 	mat->matched = ase_false; | ||||
| @ -1855,7 +1838,7 @@ static const ase_byte_t* __print_branch (ase_awk_t* awk, const ase_byte_t* p) | ||||
|  | ||||
| static const ase_byte_t* __print_atom (ase_awk_t* awk, const ase_byte_t* p) | ||||
| { | ||||
| 	const struct code_t* cp = (const struct code_t*)p; | ||||
| 	const code_t* cp = (const code_t*)p; | ||||
|  | ||||
| 	if (cp->cmd == CMD_BOL) | ||||
| 	{ | ||||
| @ -1880,27 +1863,16 @@ static const ase_byte_t* __print_atom (ase_awk_t* awk, const ase_byte_t* p) | ||||
| 	} | ||||
| 	else if (cp->cmd == CMD_CHARSET) | ||||
| 	{ | ||||
| 		ase_size_t csc, csl, i; | ||||
| 		ase_size_t i; | ||||
| 		cshdr_t* cshdr; | ||||
|  | ||||
| 		p += ASE_SIZEOF(*cp); | ||||
| 		DPRINTF (DCUSTOM, ASE_T("[")); | ||||
| 		if (cp->negate) DPRINTF (DCUSTOM, ASE_T("^")); | ||||
|  | ||||
| #if defined(__i386) || defined(__i386__) | ||||
| 		csc = *(ase_size_t*)p; | ||||
| #else | ||||
| 		ase_memcpy (&csc, p, ASE_SIZEOF(csc)); | ||||
| #endif | ||||
| 		p += ASE_SIZEOF(csc); | ||||
| 		cshdr = (cshdr_t*)p; p += ASE_SIZEOF(*cshdr); | ||||
|  | ||||
| #if defined(__i386) || defined(__i386__) | ||||
| 		csl = *(ase_size_t*)p;  | ||||
| #else | ||||
| 		ase_memcpy (&csl, p, ASE_SIZEOF(csl)); | ||||
| #endif | ||||
| 		p += ASE_SIZEOF(csl); | ||||
|  | ||||
| 		for (i = 0; i < csc; i++) | ||||
| 		for (i = 0; i < cshdr->csc; i++) | ||||
| 		{ | ||||
| 			ase_char_t c0, c1, c2; | ||||
|  | ||||
|  | ||||
		Reference in New Issue
	
	Block a user