made tre_ast_to_tnfa() iterative

fixed a bug in tre_expand_macro()
fixed a bug in tre_parse()
This commit is contained in:
hyung-hwan 2020-12-02 16:07:06 +00:00
parent fddfa537e5
commit 616539252c
4 changed files with 116 additions and 22 deletions

View File

@ -58,13 +58,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
TODO:
- Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
function calls.
*/
#include <hawk-tre.h>
#include "tre-stack.h"
#include "tre-ast.h"
@ -758,6 +751,9 @@ tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
*result = tre_ast_new_literal(mem, min, max, pos);
if (*result == NULL) status = REG_ESPACE;
/* HAWK */
((tre_literal_t*)(*result)->obj)->u.class = lit->u.class;
/* END HAWK */
if (pos > *max_pos)
*max_pos = pos;
break;
@ -1812,6 +1808,8 @@ tre_make_trans(hawk_gem_t* gem, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
labelled with one character range (there are no transitions on empty
strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
the regexp. */
/* HAWK */
#if 0
static reg_errcode_t
tre_ast_to_tnfa(hawk_gem_t* gem, tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
int *counts, int *offs)
@ -1867,7 +1865,75 @@ tre_ast_to_tnfa(hawk_gem_t* gem, tre_ast_node_t *node, tre_tnfa_transition_t *tr
}
return errcode;
}
#endif
static reg_errcode_t
__tre_ast_to_tnfa(hawk_gem_t *gem, tre_stack_t* stack, tre_ast_node_t *node, tre_tnfa_transition_t *transitions, int *counts, int *offs)
{
tre_union_t *uni;
tre_catenation_t *cat;
tre_iteration_t *iter;
reg_errcode_t errcode = REG_OK;
STACK_PUSHR(stack, voidptr, node);
while (tre_stack_num_objects(stack))
{
node = (tre_ast_node_t*)tre_stack_pop_voidptr(stack);
switch (node->type)
{
case LITERAL:
break;
case UNION:
uni = (tre_union_t *)node->obj;
STACK_PUSHR(stack, voidptr, uni->right);
STACK_PUSHR(stack, voidptr, uni->left);
break;
case CATENATION:
cat = (tre_catenation_t *)node->obj;
/* Add a transition from each position in cat->left->lastpos to each position in cat->right->firstpos. */
errcode = tre_make_trans(gem, cat->left->lastpos, cat->right->firstpos, transitions, counts, offs);
if (errcode != REG_OK) return errcode;
STACK_PUSHR(stack, voidptr, cat->right);
STACK_PUSHR(stack, voidptr, cat->left);
break;
case ITERATION:
iter = (tre_iteration_t *)node->obj;
if(!(iter->max == -1 || iter->max == 1)) return REG_BADBR;
if (iter->max == -1)
{
if(!(iter->min == 0 || iter->min == 1)) return REG_BADBR;
/* Add a transition from each last position in the iterated expression to each first position. */
errcode = tre_make_trans(gem, iter->arg->lastpos, iter->arg->firstpos, transitions, counts, offs);
if (errcode != REG_OK) return errcode;
}
STACK_PUSHR(stack, voidptr, iter->arg);
break;
}
}
return REG_OK;
}
static reg_errcode_t
tre_ast_to_tnfa(hawk_gem_t* gem, tre_ast_node_t *node, tre_tnfa_transition_t *transitions, int *counts, int *offs)
{
reg_errcode_t x;
tre_stack_t* stack;
stack = tre_stack_new(gem, 1024, -1, 4096);
if (HAWK_UNLIKELY(!stack)) return REG_ESPACE;
x = __tre_ast_to_tnfa(gem, stack, node, transitions, counts, offs);
tre_stack_destroy(stack);
return x;
}
/* END HAWK */
#define ERROR_EXIT(err) \
do \

View File

@ -114,13 +114,15 @@ hawk_tre_mem_alloc_impl(hawk_tre_mem_t mem, int provided, void *provided_block,
}
else
{
int block_size;
/* HAWK */
/* int block_size;*/
hawk_oow_t block_size;
/* END HAWK */
if (size * 8 > TRE_MEM_BLOCK_SIZE)
block_size = size * 8;
else
block_size = TRE_MEM_BLOCK_SIZE;
DPRINT(("tre_mem_alloc: allocating new %d byte block\n",
block_size));
DPRINT(("tre_mem_alloc: allocating new %lu byte block\n", (unsigned long int)block_size));
l = xmalloc(mem->gem, sizeof(*l));
if (l == NULL)

View File

@ -134,8 +134,11 @@ tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end,
unsigned int j;
DPRINT(("Expanding macro '%c' => '%s'\n",
tre_macros[i].c, tre_macros[i].expansion));
for (j = 0; tre_macros[i].expansion[j] && j < buf_len; j++)
/* HAWK */
/*for (j = 0; tre_macros[i].expansion[j] && j < buf_len; j++)*/
for (j = 0; tre_macros[i].expansion[j] && j < buf_len - 1; j++)
buf[j] = tre_macros[i].expansion[j];
/* END HAWK */
buf[j] = 0;
break;
}
@ -214,7 +217,10 @@ tre_compare_items(const void *a, const void *b, void* ctx)
const tre_ast_node_t *node_a = *(tre_ast_node_t * const *)a;
const tre_ast_node_t *node_b = *(tre_ast_node_t * const *)b;
tre_literal_t *l_a = node_a->obj, *l_b = node_b->obj;
int a_min = l_a->code_min, b_min = l_b->code_min;
/* HAWK: changed int to long */
/*int a_min = l_a->code_min, b_min = l_b->code_min;*/
long a_min = l_a->code_min, b_min = l_b->code_min;
/* END HAWK */
if (a_min < b_min)
return -1;
@ -295,7 +301,10 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
&& *re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON)
{
const tre_char_t *endptr = re + 2;
int len;
/* HAWK: changed int to hawk_oow_t */
/*int len;*/
hawk_oow_t len;
/* END HAWK */
DPRINT(("tre_parse_bracket: class: '%.*" STRF "'\n", REST(re)));
while (endptr < ctx->re_end && *endptr != CHAR_COLON) endptr++;
if (endptr != ctx->re_end)
@ -557,15 +566,25 @@ parse_bracket_done:
static int
tre_parse_int(const tre_char_t **regex, const tre_char_t *regex_end)
{
/* HAWK : added overflow check with other code optimizations */
int num = -1;
const tre_char_t *r = *regex;
while (r < regex_end && *r >= HAWK_T('0') && *r <= HAWK_T('9'))
if (r < regex_end && *r >= HAWK_T('0') && *r <= HAWK_T('9'))
{
if (num < 0)
int ever_overflowed = 0;
num = 0;
do
{
if (num > (HAWK_TYPE_MAX(int) - 9) / 10) ever_overflowed = 1;
num = num * 10 + *r - HAWK_T('0');
r++;
}
while (r < regex_end && *r >= HAWK_T('0') && *r <= HAWK_T('9'));
if (ever_overflowed) num = -1;
}
*regex = r;
return num;
}
@ -1355,9 +1374,13 @@ reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
break;
case CHAR_RPAREN: /* end of current subexpression */
if ((ctx->cflags & REG_EXTENDED && depth > 0)
/* HAWK: fixed the condition */
/* if ((ctx->cflags & REG_EXTENDED && depth > 0)
|| (ctx->re > ctx->re_start
&& *(ctx->re - 1) == CHAR_BACKSLASH))
&& *(ctx->re - 1) == CHAR_BACKSLASH)) */
if (((ctx->cflags & REG_EXTENDED) && depth > 0) ||
(!(ctx->cflags & REG_EXTENDED) && ctx->re > ctx->re_start && *(ctx->re - 1) == CHAR_BACKSLASH))
/* END HAWK */
{
DPRINT(("tre_parse: empty: '%.*" STRF "'\n", REST(ctx->re)));
/* We were expecting an atom, but instead the current
@ -1649,7 +1672,7 @@ reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
/* We are expecting an atom. If the subexpression (or the whole
regexp ends here, we interpret it as an empty expression
regexp) ends here, we interpret it as an empty expression
(which matches an empty string). */
if (
#ifdef REG_LITERAL

View File

@ -76,7 +76,10 @@ typedef struct
const tre_char_t *re_start;
/* The first character after the end of the regexp. */
const tre_char_t *re_end;
int len;
/* HAWK: changed int to hawk_oow_t */
/*int len;*/
hawk_oow_t len;
/* END HAWK */
/* Current submatch ID. */
int submatch_id;
/* Current position (number of literal). */