From fddfa537e52b6d289de9d4404dcf2cbd6a29f725 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Wed, 2 Dec 2020 15:11:13 +0000 Subject: [PATCH] redefined TRE_CHAR_MAX to avoid the overflow issue --- hawk/lib/tre-ast.c | 2 +- hawk/lib/tre-ast.h | 4 ++-- hawk/lib/tre-compile.c | 4 ++-- hawk/lib/tre-compile.h | 4 ++-- hawk/lib/tre-match-bt.c | 4 ++-- hawk/lib/tre-parse.h | 4 ++-- hawk/lib/tre-prv.h | 20 +++++++++++++++++++- 7 files changed, 30 insertions(+), 12 deletions(-) diff --git a/hawk/lib/tre-ast.c b/hawk/lib/tre-ast.c index a95de245..e8b6d526 100644 --- a/hawk/lib/tre-ast.c +++ b/hawk/lib/tre-ast.c @@ -220,7 +220,7 @@ tre_do_print(FILE *stream, tre_ast_node_t *ast, int indent) else { fprintf(stream, "literal (%c, %c) (%d, %d), pos %d, sub %d, " - "%d tags\n", code_min, code_max, code_min, code_max, pos, + "%d tags\n", (int)code_min, (int)code_max, (int)code_min, (int)code_max, pos, ast->submatch_id, num_tags); } break; diff --git a/hawk/lib/tre-ast.h b/hawk/lib/tre-ast.h index 4e0f1bb0..f89090bc 100644 --- a/hawk/lib/tre-ast.h +++ b/hawk/lib/tre-ast.h @@ -59,8 +59,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _HAWK_LIB_CMN_TRE_AST_H_ -#define _HAWK_LIB_CMN_TRE_AST_H_ +#ifndef _HAWK_LIB_TRE_AST_H_ +#define _HAWK_LIB_TRE_AST_H_ #include "tre-prv.h" #include "tre-mem.h" diff --git a/hawk/lib/tre-compile.c b/hawk/lib/tre-compile.c index 2fa2de38..83647d12 100644 --- a/hawk/lib/tre-compile.c +++ b/hawk/lib/tre-compile.c @@ -1753,9 +1753,9 @@ tre_make_trans(hawk_gem_t* gem, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2, int *tags; DPRINT((" %2d -> %2d on %3d", p1->position, p2->position, - p1->code_min)); + (int)p1->code_min)); if (p1->code_max != p1->code_min) - DPRINT(("-%3d", p1->code_max)); + DPRINT(("-%3d", (int)p1->code_max)); tags = trans->tags; if (tags) { diff --git a/hawk/lib/tre-compile.h b/hawk/lib/tre-compile.h index 56dacaa2..9580088c 100644 --- a/hawk/lib/tre-compile.h +++ b/hawk/lib/tre-compile.h @@ -59,8 +59,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _HAWK_LIB_CMN_TRE_COMPILE_H_ -#define _HAWK_LIB_CMN_TRE_COMPILE_H_ +#ifndef _HAWK_LIB_TRE_COMPILE_H_ +#define _HAWK_LIB_TRE_COMPILE_H_ typedef struct { diff --git a/hawk/lib/tre-match-bt.c b/hawk/lib/tre-match-bt.c index 3a333544..3c2708af 100644 --- a/hawk/lib/tre-match-bt.c +++ b/hawk/lib/tre-match-bt.c @@ -231,7 +231,6 @@ tre_tnfa_run_backtrack(hawk_gem_t* gem, const tre_tnfa_t *tnfa, const void *stri /* Current TNFA state. */ tre_tnfa_transition_t *state; int *states_seen = NULL; - /* Memory allocator to for allocating the backtracking stack. */ tre_mem_t mem = tre_bt_mem_new(gem); @@ -581,7 +580,8 @@ backtrack: if (stack->prev) { DPRINT((" backtracking\n")); - if (stack->item.state->assertions && ASSERT_BACKREF) + /*if (stack->item.state->assertions && ASSERT_BACKREF)*/ + if (stack->item.state->assertions & ASSERT_BACKREF) { DPRINT((" states_seen[%d] = 0\n", stack->item.state_id)); states_seen[stack->item.state_id] = 0; diff --git a/hawk/lib/tre-parse.h b/hawk/lib/tre-parse.h index 2c0da71d..c55ef33a 100644 --- a/hawk/lib/tre-parse.h +++ b/hawk/lib/tre-parse.h @@ -58,8 +58,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef _HAWK_LIB_CMN_TRE_PARSE_H_ -#define _HAWK_LIB_CMN_TRE_PARSE_H_ +#ifndef _HAWK_LIB_TRE_PARSE_H_ +#define _HAWK_LIB_TRE_PARSE_H_ /* Parse context. */ typedef struct diff --git a/hawk/lib/tre-prv.h b/hawk/lib/tre-prv.h index db49dc96..1c99a494 100644 --- a/hawk/lib/tre-prv.h +++ b/hawk/lib/tre-prv.h @@ -263,7 +263,24 @@ typedef hawk_ooci_t tre_cint_t; /* Define the character types and functions. */ #ifdef TRE_WCHAR -# define TRE_CHAR_MAX HAWK_TYPE_MAX(hawk_uch_t) +/* [HAWK] + * the TRE code uses the int type to represent a code point + * in various part. in fact, it uses int, long, tre_cint_t intermixedly. + * it's not easy to switch to a single type because lit->code_max is bitwise-ORed + * with the assertions field which is of the int type. + * + * if TRE_CHAR_MAX is greater than INT_MAX, some comparion fails as TRE_CHAR_MAX + * is treated as -1. here let me define TRE_CHAR_MAX to avoid this issue. + * + * however, if int is 2 bytes long,TRE_CHAR_MAX becomes 32767 which is way too small + * to represent even upper-half of the UCS-2 codepoints. + */ +# if (HAWK_SIZEOF_UCH_T < HAWK_SIZEOF_INT) +# define TRE_CHAR_MAX HAWK_TYPE_MAX(hawk_uch_t) +# else +# define TRE_CHAR_MAX HAWK_TYPE_MAX(int) +# endif + /* # ifdef TRE_MULTIBYTE # define TRE_MB_CUR_MAX (hawk_getmbcurmax()) @@ -313,6 +330,7 @@ struct tnfa_transition /* END HAWK */ tre_cint_t code_min; tre_cint_t code_max; + /* Pointer to the destination state. */ tre_tnfa_transition_t *state; /* ID number of the destination state. */