changed awk to handle IGNORECASE with a regular expression engine that doesn't have a run-time option but has a compile-time option
This commit is contained in:
@ -60,11 +60,9 @@ tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
|
||||
tre_ast_node_t *node;
|
||||
|
||||
node = tre_mem_calloc(mem, sizeof(*node));
|
||||
if (!node)
|
||||
return NULL;
|
||||
if (!node) return NULL;
|
||||
node->obj = tre_mem_calloc(mem, size);
|
||||
if (!node->obj)
|
||||
return NULL;
|
||||
if (!node->obj) return NULL;
|
||||
node->type = type;
|
||||
node->nullable = -1;
|
||||
node->submatch_id = -1;
|
||||
@ -72,15 +70,13 @@ tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
|
||||
return node;
|
||||
}
|
||||
|
||||
tre_ast_node_t *
|
||||
tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
|
||||
tre_ast_node_t * tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
|
||||
{
|
||||
tre_ast_node_t *node;
|
||||
tre_literal_t *lit;
|
||||
|
||||
node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t));
|
||||
if (!node)
|
||||
return NULL;
|
||||
if (!node) return NULL;
|
||||
lit = node->obj;
|
||||
lit->code_min = code_min;
|
||||
lit->code_max = code_max;
|
||||
@ -97,8 +93,7 @@ tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
|
||||
tre_iteration_t *iter;
|
||||
|
||||
node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t));
|
||||
if (!node)
|
||||
return NULL;
|
||||
if (!node) return NULL;
|
||||
iter = node->obj;
|
||||
iter->arg = arg;
|
||||
iter->min = min;
|
||||
@ -115,8 +110,7 @@ tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
|
||||
tre_ast_node_t *node;
|
||||
|
||||
node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t));
|
||||
if (node == NULL)
|
||||
return NULL;
|
||||
if (node == NULL) return NULL;
|
||||
((tre_union_t *)node->obj)->left = left;
|
||||
((tre_union_t *)node->obj)->right = right;
|
||||
node->num_submatches = left->num_submatches + right->num_submatches;
|
||||
@ -131,8 +125,7 @@ tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
|
||||
tre_ast_node_t *node;
|
||||
|
||||
node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t));
|
||||
if (node == NULL)
|
||||
return NULL;
|
||||
if (node == NULL) return NULL;
|
||||
((tre_catenation_t *)node->obj)->left = left;
|
||||
((tre_catenation_t *)node->obj)->right = right;
|
||||
node->num_submatches = left->num_submatches + right->num_submatches;
|
||||
|
@ -88,14 +88,11 @@ tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
|
||||
DPRINT(("add_tag_left: tag %d\n", tag_id));
|
||||
|
||||
c = tre_mem_alloc(mem, sizeof(*c));
|
||||
if (c == NULL)
|
||||
return REG_ESPACE;
|
||||
if (c == NULL) return REG_ESPACE;
|
||||
c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
|
||||
if (c->left == NULL)
|
||||
return REG_ESPACE;
|
||||
if (c->left == NULL) return REG_ESPACE;
|
||||
c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
|
||||
if (c->right == NULL)
|
||||
return REG_ESPACE;
|
||||
if (c->right == NULL) return REG_ESPACE;
|
||||
|
||||
c->right->obj = node->obj;
|
||||
c->right->type = node->type;
|
||||
@ -152,7 +149,6 @@ typedef enum
|
||||
ADDTAGS_SET_SUBMATCH_END
|
||||
} tre_addtags_symbol_t;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int tag;
|
||||
@ -763,8 +759,7 @@ tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
|
||||
first_tag = 0;
|
||||
}
|
||||
*result = tre_ast_new_literal(mem, min, max, pos);
|
||||
if (*result == NULL)
|
||||
status = REG_ESPACE;
|
||||
if (*result == NULL) status = REG_ESPACE;
|
||||
|
||||
if (pos > *max_pos)
|
||||
*max_pos = pos;
|
||||
@ -1121,8 +1116,7 @@ tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
|
||||
tre_pos_and_tags_t *new_set;
|
||||
|
||||
new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
|
||||
if (new_set == NULL)
|
||||
return NULL;
|
||||
if (new_set == NULL) return NULL;
|
||||
|
||||
new_set[0].position = position;
|
||||
new_set[0].code_min = code_min;
|
||||
@ -1150,8 +1144,7 @@ tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
|
||||
for (s1 = 0; set1[s1].position >= 0; s1++);
|
||||
for (s2 = 0; set2[s2].position >= 0; s2++);
|
||||
new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
|
||||
if (!new_set )
|
||||
return NULL;
|
||||
if (!new_set) return NULL;
|
||||
|
||||
for (s1 = 0; set1[s1].position >= 0; s1++)
|
||||
{
|
||||
@ -1395,15 +1388,10 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
|
||||
/* Back references: nullable = false, firstpos = {i},
|
||||
lastpos = {i}. */
|
||||
node->nullable = 0;
|
||||
node->firstpos = tre_set_one(mem, lit->position, 0,
|
||||
TRE_CHAR_MAX, 0, NULL, -1);
|
||||
if (!node->firstpos)
|
||||
return REG_ESPACE;
|
||||
node->lastpos = tre_set_one(mem, lit->position, 0,
|
||||
TRE_CHAR_MAX, 0, NULL,
|
||||
(int)lit->code_max);
|
||||
if (!node->lastpos)
|
||||
return REG_ESPACE;
|
||||
node->firstpos = tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX, 0, NULL, -1);
|
||||
if (!node->firstpos) return REG_ESPACE;
|
||||
node->lastpos = tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX, 0, NULL, (int)lit->code_max);
|
||||
if (!node->lastpos) return REG_ESPACE;
|
||||
}
|
||||
else if (lit->code_min < 0)
|
||||
{
|
||||
@ -1422,18 +1410,10 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
|
||||
/* Literal at position i: nullable = false, firstpos = {i},
|
||||
lastpos = {i}. */
|
||||
node->nullable = 0;
|
||||
node->firstpos =
|
||||
tre_set_one(mem, lit->position, (int)lit->code_min,
|
||||
(int)lit->code_max, 0, NULL, -1);
|
||||
if (!node->firstpos)
|
||||
return REG_ESPACE;
|
||||
node->lastpos = tre_set_one(mem, lit->position,
|
||||
(int)lit->code_min,
|
||||
(int)lit->code_max,
|
||||
lit->u.class, lit->neg_classes,
|
||||
-1);
|
||||
if (!node->lastpos)
|
||||
return REG_ESPACE;
|
||||
node->firstpos = tre_set_one(mem, lit->position, (int)lit->code_min, (int)lit->code_max, 0, NULL, -1);
|
||||
if (!node->firstpos) return REG_ESPACE;
|
||||
node->lastpos = tre_set_one(mem, lit->position, (int)lit->code_min, (int)lit->code_max, lit->u.class, lit->neg_classes, -1);
|
||||
if (!node->lastpos) return REG_ESPACE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -1628,6 +1608,7 @@ tre_make_trans(qse_mmgr_t* mmgr, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
|
||||
int i, j, k, l, dup, prev_p2_pos;
|
||||
|
||||
if (transitions != NULL)
|
||||
{
|
||||
while (p1->position >= 0)
|
||||
{
|
||||
p2 = orig_p2;
|
||||
@ -1814,7 +1795,9 @@ tre_make_trans(qse_mmgr_t* mmgr, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
|
||||
}
|
||||
p1++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Compute a maximum limit for the number of transitions leaving
|
||||
from each state. */
|
||||
while (p1->position >= 0)
|
||||
@ -1827,6 +1810,7 @@ tre_make_trans(qse_mmgr_t* mmgr, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
|
||||
}
|
||||
p1++;
|
||||
}
|
||||
}
|
||||
return REG_OK;
|
||||
}
|
||||
|
||||
|
@ -168,23 +168,18 @@ typedef struct tre_backtrack_struct
|
||||
while (/*CONSTCOND*/0)
|
||||
|
||||
#define BT_STACK_POP() \
|
||||
do \
|
||||
{ \
|
||||
int i; \
|
||||
assert(stack->prev); \
|
||||
pos = stack->item.pos; \
|
||||
if (type == STR_USER) \
|
||||
str_source->rewind(pos + pos_add_next, str_source->context); \
|
||||
str_byte = stack->item.str_byte; \
|
||||
BT_STACK_WIDE_OUT; \
|
||||
state = stack->item.state; \
|
||||
next_c = stack->item.next_c; \
|
||||
for (i = 0; i < tnfa->num_tags; i++) \
|
||||
tags[i] = stack->item.tags[i]; \
|
||||
BT_STACK_MBSTATE_OUT; \
|
||||
stack = stack->prev; \
|
||||
} \
|
||||
while (/*CONSTCOND*/0)
|
||||
do { \
|
||||
int i; \
|
||||
assert(stack->prev); \
|
||||
pos = stack->item.pos; \
|
||||
str_byte = stack->item.str_byte; \
|
||||
BT_STACK_WIDE_OUT; \
|
||||
state = stack->item.state; \
|
||||
next_c = stack->item.next_c; \
|
||||
for (i = 0; i < tnfa->num_tags; i++) tags[i] = stack->item.tags[i]; \
|
||||
BT_STACK_MBSTATE_OUT; \
|
||||
stack = stack->prev; \
|
||||
} while (/*CONSTCOND*/0)
|
||||
|
||||
#undef MIN
|
||||
#define MIN(a, b) ((a) <= (b) ? (a) : (b))
|
||||
@ -208,7 +203,6 @@ tre_tnfa_run_backtrack(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *str
|
||||
int reg_notbol = eflags & REG_NOTBOL;
|
||||
int reg_noteol = eflags & REG_NOTEOL;
|
||||
int reg_newline = tnfa->cflags & REG_NEWLINE;
|
||||
int str_user_end = 0;
|
||||
|
||||
/* These are used to remember the necessary values of the above
|
||||
variables to return to the position where the current search
|
||||
@ -302,8 +296,6 @@ retry:
|
||||
|
||||
state = NULL;
|
||||
pos = pos_start;
|
||||
if (type == STR_USER)
|
||||
str_source->rewind(pos + pos_add_next, str_source->context);
|
||||
GET_NEXT_WCHAR();
|
||||
pos_start = pos;
|
||||
next_c_start = next_c;
|
||||
@ -446,15 +438,11 @@ retry:
|
||||
|
||||
if (len < 0)
|
||||
{
|
||||
if (type == STR_USER)
|
||||
result = str_source->compare((unsigned)so, (unsigned)pos,
|
||||
(unsigned)bt_len,
|
||||
str_source->context);
|
||||
#ifdef TRE_WCHAR
|
||||
else if (type == STR_WIDE)
|
||||
if (type == STR_WIDE)
|
||||
result = qse_wcszcmp((const qse_wchar_t*)string + so, str_wide - 1, (size_t)bt_len);
|
||||
#endif /* TRE_WCHAR */
|
||||
else
|
||||
#endif /* TRE_WCHAR */
|
||||
result = qse_mbszcmp((const char*)string + so, str_byte - 1, (size_t)bt_len);
|
||||
}
|
||||
else if (len - pos < bt_len)
|
||||
@ -508,12 +496,7 @@ retry:
|
||||
/* Check for end of string. */
|
||||
if (len < 0)
|
||||
{
|
||||
if (type == STR_USER)
|
||||
{
|
||||
if (str_user_end)
|
||||
goto backtrack;
|
||||
}
|
||||
else if (next_c == QSE_T('\0'))
|
||||
if (next_c == QSE_T('\0'))
|
||||
goto backtrack;
|
||||
}
|
||||
else
|
||||
@ -533,8 +516,8 @@ retry:
|
||||
trans_i->code_min, trans_i->code_max,
|
||||
trans_i->code_min, trans_i->code_max,
|
||||
trans_i->assertions, trans_i->state_id));
|
||||
if (trans_i->code_min <= (tre_cint_t)prev_c &&
|
||||
trans_i->code_max >= (tre_cint_t)prev_c)
|
||||
|
||||
if (trans_i->code_min <= (tre_cint_t)prev_c && trans_i->code_max >= (tre_cint_t)prev_c)
|
||||
{
|
||||
if (trans_i->assertions
|
||||
&& (CHECK_ASSERTIONS(trans_i->assertions)
|
||||
|
@ -325,12 +325,7 @@ tre_tnfa_run_parallel(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *stri
|
||||
/* Check for end of string. */
|
||||
if (len < 0)
|
||||
{
|
||||
if (type == STR_USER)
|
||||
{
|
||||
if (str_user_end)
|
||||
break;
|
||||
}
|
||||
else if (next_c == QSE_T('\0'))
|
||||
if (next_c == QSE_T('\0'))
|
||||
break;
|
||||
}
|
||||
else
|
||||
@ -408,28 +403,28 @@ tre_tnfa_run_parallel(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *stri
|
||||
for (trans_i = reach_i->state; trans_i->state; trans_i++)
|
||||
{
|
||||
/* Does this transition match the input symbol? */
|
||||
if (trans_i->code_min <= (tre_cint_t)prev_c &&
|
||||
trans_i->code_max >= (tre_cint_t)prev_c)
|
||||
if (trans_i->code_min <= (tre_cint_t)prev_c && trans_i->code_max >= (tre_cint_t)prev_c)
|
||||
{
|
||||
if (trans_i->assertions
|
||||
&& (CHECK_ASSERTIONS(trans_i->assertions)
|
||||
|| CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
|
||||
if (trans_i->assertions &&
|
||||
(CHECK_ASSERTIONS(trans_i->assertions) ||
|
||||
CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
|
||||
{
|
||||
DPRINT(("assertion failed\n"));
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Compute the tags after this transition. */
|
||||
for (i = 0; i < num_tags; i++)
|
||||
tmp_tags[i] = reach_i->tags[i];
|
||||
for (i = 0; i < num_tags; i++) tmp_tags[i] = reach_i->tags[i];
|
||||
tag_i = trans_i->tags;
|
||||
if (tag_i != NULL)
|
||||
{
|
||||
while (*tag_i >= 0)
|
||||
{
|
||||
if (*tag_i < num_tags)
|
||||
tmp_tags[*tag_i] = pos;
|
||||
tag_i++;
|
||||
}
|
||||
}
|
||||
|
||||
if (reach_pos[trans_i->state_id].pos < pos)
|
||||
{
|
||||
@ -442,15 +437,12 @@ tre_tnfa_run_parallel(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *stri
|
||||
reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
|
||||
|
||||
if (reach_next_i->state == tnfa->final
|
||||
&& (match_eo == -1
|
||||
|| (num_tags > 0
|
||||
&& reach_next_i->tags[0] <= match_tags[0])))
|
||||
&& (match_eo == -1 || (num_tags > 0 && reach_next_i->tags[0] <= match_tags[0])))
|
||||
{
|
||||
DPRINT((" found match %p\n", trans_i->state));
|
||||
match_eo = pos;
|
||||
new_match = 1;
|
||||
for (i = 0; i < num_tags; i++)
|
||||
match_tags[i] = reach_next_i->tags[i];
|
||||
for (i = 0; i < num_tags; i++) match_tags[i] = reach_next_i->tags[i];
|
||||
}
|
||||
reach_next_i++;
|
||||
|
||||
|
@ -52,8 +52,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define str_source ((const tre_str_source*)string)
|
||||
|
||||
#ifdef TRE_WCHAR
|
||||
|
||||
#ifdef TRE_MULTIBYTE
|
||||
@ -116,12 +114,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else if (type == STR_USER) \
|
||||
{ \
|
||||
pos += pos_add_next; \
|
||||
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
|
||||
str_source->context); \
|
||||
} \
|
||||
} while(/*CONSTCOND*/0)
|
||||
|
||||
#else /* !TRE_MULTIBYTE */
|
||||
@ -143,11 +135,6 @@ do { \
|
||||
if (len >= 0 && pos >= len) next_c = QSE_T('\0'); \
|
||||
else next_c = *str_wide++; \
|
||||
} \
|
||||
else if (type == STR_USER) \
|
||||
{ \
|
||||
pos += pos_add_next; \
|
||||
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, str_source->context); \
|
||||
} \
|
||||
} while(/*CONSTCOND*/0)
|
||||
|
||||
#endif /* !TRE_MULTIBYTE */
|
||||
@ -156,24 +143,16 @@ do { \
|
||||
|
||||
/* No wide character or multibyte support. */
|
||||
|
||||
#define GET_NEXT_WCHAR() \
|
||||
do { \
|
||||
prev_c = next_c; \
|
||||
if (type == STR_BYTE) \
|
||||
{ \
|
||||
pos++; \
|
||||
if (len >= 0 && pos >= len) \
|
||||
next_c = '\0'; \
|
||||
else \
|
||||
next_c = (unsigned char)(*str_byte++); \
|
||||
} \
|
||||
else if (type == STR_USER) \
|
||||
{ \
|
||||
pos += pos_add_next; \
|
||||
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
|
||||
str_source->context); \
|
||||
} \
|
||||
} while(/*CONSTCOND*/0)
|
||||
#define GET_NEXT_WCHAR() \
|
||||
do { \
|
||||
prev_c = next_c; \
|
||||
if (type == STR_BYTE) \
|
||||
{ \
|
||||
pos++; \
|
||||
if (len >= 0 && pos >= len) next_c = '\0'; \
|
||||
else next_c = (unsigned char)(*str_byte++); \
|
||||
} \
|
||||
} while(/*CONSTCOND*/0)
|
||||
|
||||
#endif /* !TRE_WCHAR */
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -205,15 +205,6 @@ static int tre_match(
|
||||
if (tnfa->have_backrefs || (eflags & REG_BACKTRACKING_MATCHER))
|
||||
{
|
||||
/* The regex has back references, use the backtracking matcher. */
|
||||
if (type == STR_USER)
|
||||
{
|
||||
const tre_str_source *source = string;
|
||||
if (source->rewind == QSE_NULL || source->compare == QSE_NULL)
|
||||
/* The backtracking matcher requires rewind and compare
|
||||
capabilities from the input stream. */
|
||||
return REG_BADPAT;
|
||||
}
|
||||
|
||||
status = tre_tnfa_run_backtrack (
|
||||
preg->mmgr, tnfa, string, (int)len, type,
|
||||
tags, eflags, &eo);
|
||||
@ -266,15 +257,6 @@ int qse_tre_exec (
|
||||
return qse_tre_execx (tre, str, (qse_size_t)-1, pmatch, nmatch, eflags);
|
||||
}
|
||||
|
||||
#if 0
|
||||
int qse_tre_execsrc (
|
||||
const regex_t *preg, const tre_str_source *str,
|
||||
qse_size_t nmatch, regmatch_t pmatch[], int eflags)
|
||||
{
|
||||
return tre_match (preg, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags);
|
||||
}
|
||||
#endif
|
||||
|
||||
qse_tre_errnum_t qse_tre_geterrnum (qse_tre_t* tre)
|
||||
{
|
||||
return tre->errnum;
|
||||
|
@ -177,7 +177,6 @@ typedef qse_cint_t tre_cint_t;
|
||||
#define regex_t qse_tre_t
|
||||
#define regmatch_t qse_tre_match_t
|
||||
#define reg_errcode_t qse_tre_errnum_t
|
||||
#define tre_str_source qse_tre_strsrc_t
|
||||
|
||||
|
||||
#define REG_OK QSE_TRE_ENOERR
|
||||
@ -278,7 +277,7 @@ typedef qse_pma_t* tre_mem_t;
|
||||
typedef qse_ctype_t tre_ctype_t;
|
||||
#define tre_isctype(c,t) QSE_ISCTYPE(c,t)
|
||||
|
||||
typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t;
|
||||
typedef enum { STR_WIDE, STR_BYTE, STR_MBS } tre_str_type_t;
|
||||
|
||||
/* Returns number of bytes to add to (char *)ptr to make it
|
||||
properly aligned for the type. */
|
||||
@ -305,6 +304,9 @@ typedef struct tnfa_transition tre_tnfa_transition_t;
|
||||
struct tnfa_transition
|
||||
{
|
||||
/* Range of accepted characters. */
|
||||
/* QSE indicate that code_min .. code_max is not yet negated for ^ in a bracket */
|
||||
int negate_range;
|
||||
/* END QSE */
|
||||
tre_cint_t code_min;
|
||||
tre_cint_t code_max;
|
||||
/* Pointer to the destination state. */
|
||||
|
Reference in New Issue
Block a user