made tre_ast_to_tnfa() iterative
fixed a bug in tre_expand_macro() fixed a bug in tre_parse()
This commit is contained in:
parent
fddfa537e5
commit
616539252c
@ -58,13 +58,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
TODO:
|
||||
- Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
|
||||
function calls.
|
||||
*/
|
||||
|
||||
|
||||
#include <hawk-tre.h>
|
||||
#include "tre-stack.h"
|
||||
#include "tre-ast.h"
|
||||
@ -758,6 +751,9 @@ tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
|
||||
*result = tre_ast_new_literal(mem, min, max, pos);
|
||||
if (*result == NULL) status = REG_ESPACE;
|
||||
|
||||
/* HAWK */
|
||||
((tre_literal_t*)(*result)->obj)->u.class = lit->u.class;
|
||||
/* END HAWK */
|
||||
if (pos > *max_pos)
|
||||
*max_pos = pos;
|
||||
break;
|
||||
@ -1812,6 +1808,8 @@ tre_make_trans(hawk_gem_t* gem, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
|
||||
labelled with one character range (there are no transitions on empty
|
||||
strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
|
||||
the regexp. */
|
||||
/* HAWK */
|
||||
#if 0
|
||||
static reg_errcode_t
|
||||
tre_ast_to_tnfa(hawk_gem_t* gem, tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
|
||||
int *counts, int *offs)
|
||||
@ -1867,7 +1865,75 @@ tre_ast_to_tnfa(hawk_gem_t* gem, tre_ast_node_t *node, tre_tnfa_transition_t *tr
|
||||
}
|
||||
return errcode;
|
||||
}
|
||||
#endif
|
||||
static reg_errcode_t
|
||||
__tre_ast_to_tnfa(hawk_gem_t *gem, tre_stack_t* stack, tre_ast_node_t *node, tre_tnfa_transition_t *transitions, int *counts, int *offs)
|
||||
{
|
||||
tre_union_t *uni;
|
||||
tre_catenation_t *cat;
|
||||
tre_iteration_t *iter;
|
||||
reg_errcode_t errcode = REG_OK;
|
||||
|
||||
STACK_PUSHR(stack, voidptr, node);
|
||||
|
||||
while (tre_stack_num_objects(stack))
|
||||
{
|
||||
node = (tre_ast_node_t*)tre_stack_pop_voidptr(stack);
|
||||
|
||||
switch (node->type)
|
||||
{
|
||||
case LITERAL:
|
||||
break;
|
||||
|
||||
case UNION:
|
||||
uni = (tre_union_t *)node->obj;
|
||||
STACK_PUSHR(stack, voidptr, uni->right);
|
||||
STACK_PUSHR(stack, voidptr, uni->left);
|
||||
break;
|
||||
|
||||
case CATENATION:
|
||||
cat = (tre_catenation_t *)node->obj;
|
||||
/* Add a transition from each position in cat->left->lastpos to each position in cat->right->firstpos. */
|
||||
errcode = tre_make_trans(gem, cat->left->lastpos, cat->right->firstpos, transitions, counts, offs);
|
||||
if (errcode != REG_OK) return errcode;
|
||||
|
||||
STACK_PUSHR(stack, voidptr, cat->right);
|
||||
STACK_PUSHR(stack, voidptr, cat->left);
|
||||
break;
|
||||
|
||||
case ITERATION:
|
||||
iter = (tre_iteration_t *)node->obj;
|
||||
if(!(iter->max == -1 || iter->max == 1)) return REG_BADBR;
|
||||
|
||||
if (iter->max == -1)
|
||||
{
|
||||
if(!(iter->min == 0 || iter->min == 1)) return REG_BADBR;
|
||||
/* Add a transition from each last position in the iterated expression to each first position. */
|
||||
errcode = tre_make_trans(gem, iter->arg->lastpos, iter->arg->firstpos, transitions, counts, offs);
|
||||
if (errcode != REG_OK) return errcode;
|
||||
}
|
||||
STACK_PUSHR(stack, voidptr, iter->arg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return REG_OK;
|
||||
}
|
||||
|
||||
static reg_errcode_t
|
||||
tre_ast_to_tnfa(hawk_gem_t* gem, tre_ast_node_t *node, tre_tnfa_transition_t *transitions, int *counts, int *offs)
|
||||
{
|
||||
reg_errcode_t x;
|
||||
tre_stack_t* stack;
|
||||
|
||||
stack = tre_stack_new(gem, 1024, -1, 4096);
|
||||
if (HAWK_UNLIKELY(!stack)) return REG_ESPACE;
|
||||
|
||||
x = __tre_ast_to_tnfa(gem, stack, node, transitions, counts, offs);
|
||||
|
||||
tre_stack_destroy(stack);
|
||||
return x;
|
||||
}
|
||||
/* END HAWK */
|
||||
|
||||
#define ERROR_EXIT(err) \
|
||||
do \
|
||||
|
@ -114,13 +114,15 @@ hawk_tre_mem_alloc_impl(hawk_tre_mem_t mem, int provided, void *provided_block,
|
||||
}
|
||||
else
|
||||
{
|
||||
int block_size;
|
||||
/* HAWK */
|
||||
/* int block_size;*/
|
||||
hawk_oow_t block_size;
|
||||
/* END HAWK */
|
||||
if (size * 8 > TRE_MEM_BLOCK_SIZE)
|
||||
block_size = size * 8;
|
||||
else
|
||||
block_size = TRE_MEM_BLOCK_SIZE;
|
||||
DPRINT(("tre_mem_alloc: allocating new %d byte block\n",
|
||||
block_size));
|
||||
DPRINT(("tre_mem_alloc: allocating new %lu byte block\n", (unsigned long int)block_size));
|
||||
|
||||
l = xmalloc(mem->gem, sizeof(*l));
|
||||
if (l == NULL)
|
||||
|
@ -134,8 +134,11 @@ tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end,
|
||||
unsigned int j;
|
||||
DPRINT(("Expanding macro '%c' => '%s'\n",
|
||||
tre_macros[i].c, tre_macros[i].expansion));
|
||||
for (j = 0; tre_macros[i].expansion[j] && j < buf_len; j++)
|
||||
/* HAWK */
|
||||
/*for (j = 0; tre_macros[i].expansion[j] && j < buf_len; j++)*/
|
||||
for (j = 0; tre_macros[i].expansion[j] && j < buf_len - 1; j++)
|
||||
buf[j] = tre_macros[i].expansion[j];
|
||||
/* END HAWK */
|
||||
buf[j] = 0;
|
||||
break;
|
||||
}
|
||||
@ -214,7 +217,10 @@ tre_compare_items(const void *a, const void *b, void* ctx)
|
||||
const tre_ast_node_t *node_a = *(tre_ast_node_t * const *)a;
|
||||
const tre_ast_node_t *node_b = *(tre_ast_node_t * const *)b;
|
||||
tre_literal_t *l_a = node_a->obj, *l_b = node_b->obj;
|
||||
int a_min = l_a->code_min, b_min = l_b->code_min;
|
||||
/* HAWK: changed int to long */
|
||||
/*int a_min = l_a->code_min, b_min = l_b->code_min;*/
|
||||
long a_min = l_a->code_min, b_min = l_b->code_min;
|
||||
/* END HAWK */
|
||||
|
||||
if (a_min < b_min)
|
||||
return -1;
|
||||
@ -295,7 +301,10 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
|
||||
&& *re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON)
|
||||
{
|
||||
const tre_char_t *endptr = re + 2;
|
||||
int len;
|
||||
/* HAWK: changed int to hawk_oow_t */
|
||||
/*int len;*/
|
||||
hawk_oow_t len;
|
||||
/* END HAWK */
|
||||
DPRINT(("tre_parse_bracket: class: '%.*" STRF "'\n", REST(re)));
|
||||
while (endptr < ctx->re_end && *endptr != CHAR_COLON) endptr++;
|
||||
if (endptr != ctx->re_end)
|
||||
@ -557,15 +566,25 @@ parse_bracket_done:
|
||||
static int
|
||||
tre_parse_int(const tre_char_t **regex, const tre_char_t *regex_end)
|
||||
{
|
||||
/* HAWK : added overflow check with other code optimizations */
|
||||
int num = -1;
|
||||
const tre_char_t *r = *regex;
|
||||
while (r < regex_end && *r >= HAWK_T('0') && *r <= HAWK_T('9'))
|
||||
|
||||
if (r < regex_end && *r >= HAWK_T('0') && *r <= HAWK_T('9'))
|
||||
{
|
||||
if (num < 0)
|
||||
int ever_overflowed = 0;
|
||||
|
||||
num = 0;
|
||||
do
|
||||
{
|
||||
if (num > (HAWK_TYPE_MAX(int) - 9) / 10) ever_overflowed = 1;
|
||||
num = num * 10 + *r - HAWK_T('0');
|
||||
r++;
|
||||
}
|
||||
while (r < regex_end && *r >= HAWK_T('0') && *r <= HAWK_T('9'));
|
||||
|
||||
if (ever_overflowed) num = -1;
|
||||
}
|
||||
*regex = r;
|
||||
return num;
|
||||
}
|
||||
@ -1355,9 +1374,13 @@ reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
|
||||
break;
|
||||
|
||||
case CHAR_RPAREN: /* end of current subexpression */
|
||||
if ((ctx->cflags & REG_EXTENDED && depth > 0)
|
||||
/* HAWK: fixed the condition */
|
||||
/* if ((ctx->cflags & REG_EXTENDED && depth > 0)
|
||||
|| (ctx->re > ctx->re_start
|
||||
&& *(ctx->re - 1) == CHAR_BACKSLASH))
|
||||
&& *(ctx->re - 1) == CHAR_BACKSLASH)) */
|
||||
if (((ctx->cflags & REG_EXTENDED) && depth > 0) ||
|
||||
(!(ctx->cflags & REG_EXTENDED) && ctx->re > ctx->re_start && *(ctx->re - 1) == CHAR_BACKSLASH))
|
||||
/* END HAWK */
|
||||
{
|
||||
DPRINT(("tre_parse: empty: '%.*" STRF "'\n", REST(ctx->re)));
|
||||
/* We were expecting an atom, but instead the current
|
||||
@ -1649,7 +1672,7 @@ reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
|
||||
|
||||
|
||||
/* We are expecting an atom. If the subexpression (or the whole
|
||||
regexp ends here, we interpret it as an empty expression
|
||||
regexp) ends here, we interpret it as an empty expression
|
||||
(which matches an empty string). */
|
||||
if (
|
||||
#ifdef REG_LITERAL
|
||||
|
@ -76,7 +76,10 @@ typedef struct
|
||||
const tre_char_t *re_start;
|
||||
/* The first character after the end of the regexp. */
|
||||
const tre_char_t *re_end;
|
||||
int len;
|
||||
/* HAWK: changed int to hawk_oow_t */
|
||||
/*int len;*/
|
||||
hawk_oow_t len;
|
||||
/* END HAWK */
|
||||
/* Current submatch ID. */
|
||||
int submatch_id;
|
||||
/* Current position (number of literal). */
|
||||
|
Loading…
Reference in New Issue
Block a user