made tre_ast_to_tnfa() iterative
fixed a bug in tre_expand_macro() fixed a bug in tre_parse()
This commit is contained in:
parent
fddfa537e5
commit
616539252c
@ -58,13 +58,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
|
||||||
TODO:
|
|
||||||
- Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
|
|
||||||
function calls.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#include <hawk-tre.h>
|
#include <hawk-tre.h>
|
||||||
#include "tre-stack.h"
|
#include "tre-stack.h"
|
||||||
#include "tre-ast.h"
|
#include "tre-ast.h"
|
||||||
@ -758,6 +751,9 @@ tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
|
|||||||
*result = tre_ast_new_literal(mem, min, max, pos);
|
*result = tre_ast_new_literal(mem, min, max, pos);
|
||||||
if (*result == NULL) status = REG_ESPACE;
|
if (*result == NULL) status = REG_ESPACE;
|
||||||
|
|
||||||
|
/* HAWK */
|
||||||
|
((tre_literal_t*)(*result)->obj)->u.class = lit->u.class;
|
||||||
|
/* END HAWK */
|
||||||
if (pos > *max_pos)
|
if (pos > *max_pos)
|
||||||
*max_pos = pos;
|
*max_pos = pos;
|
||||||
break;
|
break;
|
||||||
@ -1812,6 +1808,8 @@ tre_make_trans(hawk_gem_t* gem, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
|
|||||||
labelled with one character range (there are no transitions on empty
|
labelled with one character range (there are no transitions on empty
|
||||||
strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
|
strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
|
||||||
the regexp. */
|
the regexp. */
|
||||||
|
/* HAWK */
|
||||||
|
#if 0
|
||||||
static reg_errcode_t
|
static reg_errcode_t
|
||||||
tre_ast_to_tnfa(hawk_gem_t* gem, tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
|
tre_ast_to_tnfa(hawk_gem_t* gem, tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
|
||||||
int *counts, int *offs)
|
int *counts, int *offs)
|
||||||
@ -1867,7 +1865,75 @@ tre_ast_to_tnfa(hawk_gem_t* gem, tre_ast_node_t *node, tre_tnfa_transition_t *tr
|
|||||||
}
|
}
|
||||||
return errcode;
|
return errcode;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
static reg_errcode_t
|
||||||
|
__tre_ast_to_tnfa(hawk_gem_t *gem, tre_stack_t* stack, tre_ast_node_t *node, tre_tnfa_transition_t *transitions, int *counts, int *offs)
|
||||||
|
{
|
||||||
|
tre_union_t *uni;
|
||||||
|
tre_catenation_t *cat;
|
||||||
|
tre_iteration_t *iter;
|
||||||
|
reg_errcode_t errcode = REG_OK;
|
||||||
|
|
||||||
|
STACK_PUSHR(stack, voidptr, node);
|
||||||
|
|
||||||
|
while (tre_stack_num_objects(stack))
|
||||||
|
{
|
||||||
|
node = (tre_ast_node_t*)tre_stack_pop_voidptr(stack);
|
||||||
|
|
||||||
|
switch (node->type)
|
||||||
|
{
|
||||||
|
case LITERAL:
|
||||||
|
break;
|
||||||
|
|
||||||
|
case UNION:
|
||||||
|
uni = (tre_union_t *)node->obj;
|
||||||
|
STACK_PUSHR(stack, voidptr, uni->right);
|
||||||
|
STACK_PUSHR(stack, voidptr, uni->left);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CATENATION:
|
||||||
|
cat = (tre_catenation_t *)node->obj;
|
||||||
|
/* Add a transition from each position in cat->left->lastpos to each position in cat->right->firstpos. */
|
||||||
|
errcode = tre_make_trans(gem, cat->left->lastpos, cat->right->firstpos, transitions, counts, offs);
|
||||||
|
if (errcode != REG_OK) return errcode;
|
||||||
|
|
||||||
|
STACK_PUSHR(stack, voidptr, cat->right);
|
||||||
|
STACK_PUSHR(stack, voidptr, cat->left);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ITERATION:
|
||||||
|
iter = (tre_iteration_t *)node->obj;
|
||||||
|
if(!(iter->max == -1 || iter->max == 1)) return REG_BADBR;
|
||||||
|
|
||||||
|
if (iter->max == -1)
|
||||||
|
{
|
||||||
|
if(!(iter->min == 0 || iter->min == 1)) return REG_BADBR;
|
||||||
|
/* Add a transition from each last position in the iterated expression to each first position. */
|
||||||
|
errcode = tre_make_trans(gem, iter->arg->lastpos, iter->arg->firstpos, transitions, counts, offs);
|
||||||
|
if (errcode != REG_OK) return errcode;
|
||||||
|
}
|
||||||
|
STACK_PUSHR(stack, voidptr, iter->arg);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return REG_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
static reg_errcode_t
|
||||||
|
tre_ast_to_tnfa(hawk_gem_t* gem, tre_ast_node_t *node, tre_tnfa_transition_t *transitions, int *counts, int *offs)
|
||||||
|
{
|
||||||
|
reg_errcode_t x;
|
||||||
|
tre_stack_t* stack;
|
||||||
|
|
||||||
|
stack = tre_stack_new(gem, 1024, -1, 4096);
|
||||||
|
if (HAWK_UNLIKELY(!stack)) return REG_ESPACE;
|
||||||
|
|
||||||
|
x = __tre_ast_to_tnfa(gem, stack, node, transitions, counts, offs);
|
||||||
|
|
||||||
|
tre_stack_destroy(stack);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
/* END HAWK */
|
||||||
|
|
||||||
#define ERROR_EXIT(err) \
|
#define ERROR_EXIT(err) \
|
||||||
do \
|
do \
|
||||||
|
@ -114,13 +114,15 @@ hawk_tre_mem_alloc_impl(hawk_tre_mem_t mem, int provided, void *provided_block,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int block_size;
|
/* HAWK */
|
||||||
|
/* int block_size;*/
|
||||||
|
hawk_oow_t block_size;
|
||||||
|
/* END HAWK */
|
||||||
if (size * 8 > TRE_MEM_BLOCK_SIZE)
|
if (size * 8 > TRE_MEM_BLOCK_SIZE)
|
||||||
block_size = size * 8;
|
block_size = size * 8;
|
||||||
else
|
else
|
||||||
block_size = TRE_MEM_BLOCK_SIZE;
|
block_size = TRE_MEM_BLOCK_SIZE;
|
||||||
DPRINT(("tre_mem_alloc: allocating new %d byte block\n",
|
DPRINT(("tre_mem_alloc: allocating new %lu byte block\n", (unsigned long int)block_size));
|
||||||
block_size));
|
|
||||||
|
|
||||||
l = xmalloc(mem->gem, sizeof(*l));
|
l = xmalloc(mem->gem, sizeof(*l));
|
||||||
if (l == NULL)
|
if (l == NULL)
|
||||||
|
@ -134,8 +134,11 @@ tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end,
|
|||||||
unsigned int j;
|
unsigned int j;
|
||||||
DPRINT(("Expanding macro '%c' => '%s'\n",
|
DPRINT(("Expanding macro '%c' => '%s'\n",
|
||||||
tre_macros[i].c, tre_macros[i].expansion));
|
tre_macros[i].c, tre_macros[i].expansion));
|
||||||
for (j = 0; tre_macros[i].expansion[j] && j < buf_len; j++)
|
/* HAWK */
|
||||||
|
/*for (j = 0; tre_macros[i].expansion[j] && j < buf_len; j++)*/
|
||||||
|
for (j = 0; tre_macros[i].expansion[j] && j < buf_len - 1; j++)
|
||||||
buf[j] = tre_macros[i].expansion[j];
|
buf[j] = tre_macros[i].expansion[j];
|
||||||
|
/* END HAWK */
|
||||||
buf[j] = 0;
|
buf[j] = 0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -214,7 +217,10 @@ tre_compare_items(const void *a, const void *b, void* ctx)
|
|||||||
const tre_ast_node_t *node_a = *(tre_ast_node_t * const *)a;
|
const tre_ast_node_t *node_a = *(tre_ast_node_t * const *)a;
|
||||||
const tre_ast_node_t *node_b = *(tre_ast_node_t * const *)b;
|
const tre_ast_node_t *node_b = *(tre_ast_node_t * const *)b;
|
||||||
tre_literal_t *l_a = node_a->obj, *l_b = node_b->obj;
|
tre_literal_t *l_a = node_a->obj, *l_b = node_b->obj;
|
||||||
int a_min = l_a->code_min, b_min = l_b->code_min;
|
/* HAWK: changed int to long */
|
||||||
|
/*int a_min = l_a->code_min, b_min = l_b->code_min;*/
|
||||||
|
long a_min = l_a->code_min, b_min = l_b->code_min;
|
||||||
|
/* END HAWK */
|
||||||
|
|
||||||
if (a_min < b_min)
|
if (a_min < b_min)
|
||||||
return -1;
|
return -1;
|
||||||
@ -295,7 +301,10 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
|
|||||||
&& *re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON)
|
&& *re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON)
|
||||||
{
|
{
|
||||||
const tre_char_t *endptr = re + 2;
|
const tre_char_t *endptr = re + 2;
|
||||||
int len;
|
/* HAWK: changed int to hawk_oow_t */
|
||||||
|
/*int len;*/
|
||||||
|
hawk_oow_t len;
|
||||||
|
/* END HAWK */
|
||||||
DPRINT(("tre_parse_bracket: class: '%.*" STRF "'\n", REST(re)));
|
DPRINT(("tre_parse_bracket: class: '%.*" STRF "'\n", REST(re)));
|
||||||
while (endptr < ctx->re_end && *endptr != CHAR_COLON) endptr++;
|
while (endptr < ctx->re_end && *endptr != CHAR_COLON) endptr++;
|
||||||
if (endptr != ctx->re_end)
|
if (endptr != ctx->re_end)
|
||||||
@ -557,14 +566,24 @@ parse_bracket_done:
|
|||||||
static int
|
static int
|
||||||
tre_parse_int(const tre_char_t **regex, const tre_char_t *regex_end)
|
tre_parse_int(const tre_char_t **regex, const tre_char_t *regex_end)
|
||||||
{
|
{
|
||||||
|
/* HAWK : added overflow check with other code optimizations */
|
||||||
int num = -1;
|
int num = -1;
|
||||||
const tre_char_t *r = *regex;
|
const tre_char_t *r = *regex;
|
||||||
while (r < regex_end && *r >= HAWK_T('0') && *r <= HAWK_T('9'))
|
|
||||||
|
if (r < regex_end && *r >= HAWK_T('0') && *r <= HAWK_T('9'))
|
||||||
{
|
{
|
||||||
if (num < 0)
|
int ever_overflowed = 0;
|
||||||
num = 0;
|
|
||||||
num = num * 10 + *r - HAWK_T('0');
|
num = 0;
|
||||||
r++;
|
do
|
||||||
|
{
|
||||||
|
if (num > (HAWK_TYPE_MAX(int) - 9) / 10) ever_overflowed = 1;
|
||||||
|
num = num * 10 + *r - HAWK_T('0');
|
||||||
|
r++;
|
||||||
|
}
|
||||||
|
while (r < regex_end && *r >= HAWK_T('0') && *r <= HAWK_T('9'));
|
||||||
|
|
||||||
|
if (ever_overflowed) num = -1;
|
||||||
}
|
}
|
||||||
*regex = r;
|
*regex = r;
|
||||||
return num;
|
return num;
|
||||||
@ -1355,9 +1374,13 @@ reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case CHAR_RPAREN: /* end of current subexpression */
|
case CHAR_RPAREN: /* end of current subexpression */
|
||||||
if ((ctx->cflags & REG_EXTENDED && depth > 0)
|
/* HAWK: fixed the condition */
|
||||||
|
/* if ((ctx->cflags & REG_EXTENDED && depth > 0)
|
||||||
|| (ctx->re > ctx->re_start
|
|| (ctx->re > ctx->re_start
|
||||||
&& *(ctx->re - 1) == CHAR_BACKSLASH))
|
&& *(ctx->re - 1) == CHAR_BACKSLASH)) */
|
||||||
|
if (((ctx->cflags & REG_EXTENDED) && depth > 0) ||
|
||||||
|
(!(ctx->cflags & REG_EXTENDED) && ctx->re > ctx->re_start && *(ctx->re - 1) == CHAR_BACKSLASH))
|
||||||
|
/* END HAWK */
|
||||||
{
|
{
|
||||||
DPRINT(("tre_parse: empty: '%.*" STRF "'\n", REST(ctx->re)));
|
DPRINT(("tre_parse: empty: '%.*" STRF "'\n", REST(ctx->re)));
|
||||||
/* We were expecting an atom, but instead the current
|
/* We were expecting an atom, but instead the current
|
||||||
@ -1649,7 +1672,7 @@ reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
|
|||||||
|
|
||||||
|
|
||||||
/* We are expecting an atom. If the subexpression (or the whole
|
/* We are expecting an atom. If the subexpression (or the whole
|
||||||
regexp ends here, we interpret it as an empty expression
|
regexp) ends here, we interpret it as an empty expression
|
||||||
(which matches an empty string). */
|
(which matches an empty string). */
|
||||||
if (
|
if (
|
||||||
#ifdef REG_LITERAL
|
#ifdef REG_LITERAL
|
||||||
|
@ -76,7 +76,10 @@ typedef struct
|
|||||||
const tre_char_t *re_start;
|
const tre_char_t *re_start;
|
||||||
/* The first character after the end of the regexp. */
|
/* The first character after the end of the regexp. */
|
||||||
const tre_char_t *re_end;
|
const tre_char_t *re_end;
|
||||||
int len;
|
/* HAWK: changed int to hawk_oow_t */
|
||||||
|
/*int len;*/
|
||||||
|
hawk_oow_t len;
|
||||||
|
/* END HAWK */
|
||||||
/* Current submatch ID. */
|
/* Current submatch ID. */
|
||||||
int submatch_id;
|
int submatch_id;
|
||||||
/* Current position (number of literal). */
|
/* Current position (number of literal). */
|
||||||
|
Loading…
Reference in New Issue
Block a user