changed awk to handle IGNORECASE with a regular expression engine that doesn't have a run-time option but has a compile-time option

This commit is contained in:
2013-08-23 15:19:29 +00:00
parent 47677ca566
commit d841c9f62f
21 changed files with 1127 additions and 1236 deletions

View File

@ -60,11 +60,9 @@ tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
tre_ast_node_t *node;
node = tre_mem_calloc(mem, sizeof(*node));
if (!node)
return NULL;
if (!node) return NULL;
node->obj = tre_mem_calloc(mem, size);
if (!node->obj)
return NULL;
if (!node->obj) return NULL;
node->type = type;
node->nullable = -1;
node->submatch_id = -1;
@ -72,15 +70,13 @@ tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
return node;
}
tre_ast_node_t *
tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
tre_ast_node_t * tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
{
tre_ast_node_t *node;
tre_literal_t *lit;
node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t));
if (!node)
return NULL;
if (!node) return NULL;
lit = node->obj;
lit->code_min = code_min;
lit->code_max = code_max;
@ -97,8 +93,7 @@ tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
tre_iteration_t *iter;
node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t));
if (!node)
return NULL;
if (!node) return NULL;
iter = node->obj;
iter->arg = arg;
iter->min = min;
@ -115,8 +110,7 @@ tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
tre_ast_node_t *node;
node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t));
if (node == NULL)
return NULL;
if (node == NULL) return NULL;
((tre_union_t *)node->obj)->left = left;
((tre_union_t *)node->obj)->right = right;
node->num_submatches = left->num_submatches + right->num_submatches;
@ -131,8 +125,7 @@ tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
tre_ast_node_t *node;
node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t));
if (node == NULL)
return NULL;
if (node == NULL) return NULL;
((tre_catenation_t *)node->obj)->left = left;
((tre_catenation_t *)node->obj)->right = right;
node->num_submatches = left->num_submatches + right->num_submatches;

View File

@ -88,14 +88,11 @@ tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
DPRINT(("add_tag_left: tag %d\n", tag_id));
c = tre_mem_alloc(mem, sizeof(*c));
if (c == NULL)
return REG_ESPACE;
if (c == NULL) return REG_ESPACE;
c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
if (c->left == NULL)
return REG_ESPACE;
if (c->left == NULL) return REG_ESPACE;
c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
if (c->right == NULL)
return REG_ESPACE;
if (c->right == NULL) return REG_ESPACE;
c->right->obj = node->obj;
c->right->type = node->type;
@ -152,7 +149,6 @@ typedef enum
ADDTAGS_SET_SUBMATCH_END
} tre_addtags_symbol_t;
typedef struct
{
int tag;
@ -763,8 +759,7 @@ tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
first_tag = 0;
}
*result = tre_ast_new_literal(mem, min, max, pos);
if (*result == NULL)
status = REG_ESPACE;
if (*result == NULL) status = REG_ESPACE;
if (pos > *max_pos)
*max_pos = pos;
@ -1121,8 +1116,7 @@ tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
tre_pos_and_tags_t *new_set;
new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
if (new_set == NULL)
return NULL;
if (new_set == NULL) return NULL;
new_set[0].position = position;
new_set[0].code_min = code_min;
@ -1150,8 +1144,7 @@ tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
for (s1 = 0; set1[s1].position >= 0; s1++);
for (s2 = 0; set2[s2].position >= 0; s2++);
new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
if (!new_set )
return NULL;
if (!new_set) return NULL;
for (s1 = 0; set1[s1].position >= 0; s1++)
{
@ -1395,15 +1388,10 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
/* Back references: nullable = false, firstpos = {i},
lastpos = {i}. */
node->nullable = 0;
node->firstpos = tre_set_one(mem, lit->position, 0,
TRE_CHAR_MAX, 0, NULL, -1);
if (!node->firstpos)
return REG_ESPACE;
node->lastpos = tre_set_one(mem, lit->position, 0,
TRE_CHAR_MAX, 0, NULL,
(int)lit->code_max);
if (!node->lastpos)
return REG_ESPACE;
node->firstpos = tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX, 0, NULL, -1);
if (!node->firstpos) return REG_ESPACE;
node->lastpos = tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX, 0, NULL, (int)lit->code_max);
if (!node->lastpos) return REG_ESPACE;
}
else if (lit->code_min < 0)
{
@ -1422,18 +1410,10 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
/* Literal at position i: nullable = false, firstpos = {i},
lastpos = {i}. */
node->nullable = 0;
node->firstpos =
tre_set_one(mem, lit->position, (int)lit->code_min,
(int)lit->code_max, 0, NULL, -1);
if (!node->firstpos)
return REG_ESPACE;
node->lastpos = tre_set_one(mem, lit->position,
(int)lit->code_min,
(int)lit->code_max,
lit->u.class, lit->neg_classes,
-1);
if (!node->lastpos)
return REG_ESPACE;
node->firstpos = tre_set_one(mem, lit->position, (int)lit->code_min, (int)lit->code_max, 0, NULL, -1);
if (!node->firstpos) return REG_ESPACE;
node->lastpos = tre_set_one(mem, lit->position, (int)lit->code_min, (int)lit->code_max, lit->u.class, lit->neg_classes, -1);
if (!node->lastpos) return REG_ESPACE;
}
break;
}
@ -1628,6 +1608,7 @@ tre_make_trans(qse_mmgr_t* mmgr, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
int i, j, k, l, dup, prev_p2_pos;
if (transitions != NULL)
{
while (p1->position >= 0)
{
p2 = orig_p2;
@ -1814,7 +1795,9 @@ tre_make_trans(qse_mmgr_t* mmgr, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
}
p1++;
}
}
else
{
/* Compute a maximum limit for the number of transitions leaving
from each state. */
while (p1->position >= 0)
@ -1827,6 +1810,7 @@ tre_make_trans(qse_mmgr_t* mmgr, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
}
p1++;
}
}
return REG_OK;
}

View File

@ -168,23 +168,18 @@ typedef struct tre_backtrack_struct
while (/*CONSTCOND*/0)
#define BT_STACK_POP() \
do \
{ \
int i; \
assert(stack->prev); \
pos = stack->item.pos; \
if (type == STR_USER) \
str_source->rewind(pos + pos_add_next, str_source->context); \
str_byte = stack->item.str_byte; \
BT_STACK_WIDE_OUT; \
state = stack->item.state; \
next_c = stack->item.next_c; \
for (i = 0; i < tnfa->num_tags; i++) \
tags[i] = stack->item.tags[i]; \
BT_STACK_MBSTATE_OUT; \
stack = stack->prev; \
} \
while (/*CONSTCOND*/0)
do { \
int i; \
assert(stack->prev); \
pos = stack->item.pos; \
str_byte = stack->item.str_byte; \
BT_STACK_WIDE_OUT; \
state = stack->item.state; \
next_c = stack->item.next_c; \
for (i = 0; i < tnfa->num_tags; i++) tags[i] = stack->item.tags[i]; \
BT_STACK_MBSTATE_OUT; \
stack = stack->prev; \
} while (/*CONSTCOND*/0)
#undef MIN
#define MIN(a, b) ((a) <= (b) ? (a) : (b))
@ -208,7 +203,6 @@ tre_tnfa_run_backtrack(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *str
int reg_notbol = eflags & REG_NOTBOL;
int reg_noteol = eflags & REG_NOTEOL;
int reg_newline = tnfa->cflags & REG_NEWLINE;
int str_user_end = 0;
/* These are used to remember the necessary values of the above
variables to return to the position where the current search
@ -302,8 +296,6 @@ retry:
state = NULL;
pos = pos_start;
if (type == STR_USER)
str_source->rewind(pos + pos_add_next, str_source->context);
GET_NEXT_WCHAR();
pos_start = pos;
next_c_start = next_c;
@ -446,15 +438,11 @@ retry:
if (len < 0)
{
if (type == STR_USER)
result = str_source->compare((unsigned)so, (unsigned)pos,
(unsigned)bt_len,
str_source->context);
#ifdef TRE_WCHAR
else if (type == STR_WIDE)
if (type == STR_WIDE)
result = qse_wcszcmp((const qse_wchar_t*)string + so, str_wide - 1, (size_t)bt_len);
#endif /* TRE_WCHAR */
else
#endif /* TRE_WCHAR */
result = qse_mbszcmp((const char*)string + so, str_byte - 1, (size_t)bt_len);
}
else if (len - pos < bt_len)
@ -508,12 +496,7 @@ retry:
/* Check for end of string. */
if (len < 0)
{
if (type == STR_USER)
{
if (str_user_end)
goto backtrack;
}
else if (next_c == QSE_T('\0'))
if (next_c == QSE_T('\0'))
goto backtrack;
}
else
@ -533,8 +516,8 @@ retry:
trans_i->code_min, trans_i->code_max,
trans_i->code_min, trans_i->code_max,
trans_i->assertions, trans_i->state_id));
if (trans_i->code_min <= (tre_cint_t)prev_c &&
trans_i->code_max >= (tre_cint_t)prev_c)
if (trans_i->code_min <= (tre_cint_t)prev_c && trans_i->code_max >= (tre_cint_t)prev_c)
{
if (trans_i->assertions
&& (CHECK_ASSERTIONS(trans_i->assertions)

View File

@ -325,12 +325,7 @@ tre_tnfa_run_parallel(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *stri
/* Check for end of string. */
if (len < 0)
{
if (type == STR_USER)
{
if (str_user_end)
break;
}
else if (next_c == QSE_T('\0'))
if (next_c == QSE_T('\0'))
break;
}
else
@ -408,28 +403,28 @@ tre_tnfa_run_parallel(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *stri
for (trans_i = reach_i->state; trans_i->state; trans_i++)
{
/* Does this transition match the input symbol? */
if (trans_i->code_min <= (tre_cint_t)prev_c &&
trans_i->code_max >= (tre_cint_t)prev_c)
if (trans_i->code_min <= (tre_cint_t)prev_c && trans_i->code_max >= (tre_cint_t)prev_c)
{
if (trans_i->assertions
&& (CHECK_ASSERTIONS(trans_i->assertions)
|| CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
if (trans_i->assertions &&
(CHECK_ASSERTIONS(trans_i->assertions) ||
CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
{
DPRINT(("assertion failed\n"));
continue;
}
/* Compute the tags after this transition. */
for (i = 0; i < num_tags; i++)
tmp_tags[i] = reach_i->tags[i];
for (i = 0; i < num_tags; i++) tmp_tags[i] = reach_i->tags[i];
tag_i = trans_i->tags;
if (tag_i != NULL)
{
while (*tag_i >= 0)
{
if (*tag_i < num_tags)
tmp_tags[*tag_i] = pos;
tag_i++;
}
}
if (reach_pos[trans_i->state_id].pos < pos)
{
@ -442,15 +437,12 @@ tre_tnfa_run_parallel(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *stri
reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
if (reach_next_i->state == tnfa->final
&& (match_eo == -1
|| (num_tags > 0
&& reach_next_i->tags[0] <= match_tags[0])))
&& (match_eo == -1 || (num_tags > 0 && reach_next_i->tags[0] <= match_tags[0])))
{
DPRINT((" found match %p\n", trans_i->state));
match_eo = pos;
new_match = 1;
for (i = 0; i < num_tags; i++)
match_tags[i] = reach_next_i->tags[i];
for (i = 0; i < num_tags; i++) match_tags[i] = reach_next_i->tags[i];
}
reach_next_i++;

View File

@ -52,8 +52,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define str_source ((const tre_str_source*)string)
#ifdef TRE_WCHAR
#ifdef TRE_MULTIBYTE
@ -116,12 +114,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
} \
} \
} \
else if (type == STR_USER) \
{ \
pos += pos_add_next; \
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
str_source->context); \
} \
} while(/*CONSTCOND*/0)
#else /* !TRE_MULTIBYTE */
@ -143,11 +135,6 @@ do { \
if (len >= 0 && pos >= len) next_c = QSE_T('\0'); \
else next_c = *str_wide++; \
} \
else if (type == STR_USER) \
{ \
pos += pos_add_next; \
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, str_source->context); \
} \
} while(/*CONSTCOND*/0)
#endif /* !TRE_MULTIBYTE */
@ -156,24 +143,16 @@ do { \
/* No wide character or multibyte support. */
#define GET_NEXT_WCHAR() \
do { \
prev_c = next_c; \
if (type == STR_BYTE) \
{ \
pos++; \
if (len >= 0 && pos >= len) \
next_c = '\0'; \
else \
next_c = (unsigned char)(*str_byte++); \
} \
else if (type == STR_USER) \
{ \
pos += pos_add_next; \
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
str_source->context); \
} \
} while(/*CONSTCOND*/0)
#define GET_NEXT_WCHAR() \
do { \
prev_c = next_c; \
if (type == STR_BYTE) \
{ \
pos++; \
if (len >= 0 && pos >= len) next_c = '\0'; \
else next_c = (unsigned char)(*str_byte++); \
} \
} while(/*CONSTCOND*/0)
#endif /* !TRE_WCHAR */

File diff suppressed because it is too large Load Diff

View File

@ -205,15 +205,6 @@ static int tre_match(
if (tnfa->have_backrefs || (eflags & REG_BACKTRACKING_MATCHER))
{
/* The regex has back references, use the backtracking matcher. */
if (type == STR_USER)
{
const tre_str_source *source = string;
if (source->rewind == QSE_NULL || source->compare == QSE_NULL)
/* The backtracking matcher requires rewind and compare
capabilities from the input stream. */
return REG_BADPAT;
}
status = tre_tnfa_run_backtrack (
preg->mmgr, tnfa, string, (int)len, type,
tags, eflags, &eo);
@ -266,15 +257,6 @@ int qse_tre_exec (
return qse_tre_execx (tre, str, (qse_size_t)-1, pmatch, nmatch, eflags);
}
#if 0
int qse_tre_execsrc (
const regex_t *preg, const tre_str_source *str,
qse_size_t nmatch, regmatch_t pmatch[], int eflags)
{
return tre_match (preg, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags);
}
#endif
qse_tre_errnum_t qse_tre_geterrnum (qse_tre_t* tre)
{
return tre->errnum;

View File

@ -177,7 +177,6 @@ typedef qse_cint_t tre_cint_t;
#define regex_t qse_tre_t
#define regmatch_t qse_tre_match_t
#define reg_errcode_t qse_tre_errnum_t
#define tre_str_source qse_tre_strsrc_t
#define REG_OK QSE_TRE_ENOERR
@ -278,7 +277,7 @@ typedef qse_pma_t* tre_mem_t;
typedef qse_ctype_t tre_ctype_t;
#define tre_isctype(c,t) QSE_ISCTYPE(c,t)
typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t;
typedef enum { STR_WIDE, STR_BYTE, STR_MBS } tre_str_type_t;
/* Returns number of bytes to add to (char *)ptr to make it
properly aligned for the type. */
@ -305,6 +304,9 @@ typedef struct tnfa_transition tre_tnfa_transition_t;
struct tnfa_transition
{
/* Range of accepted characters. */
/* QSE indicate that code_min .. code_max is not yet negated for ^ in a bracket */
int negate_range;
/* END QSE */
tre_cint_t code_min;
tre_cint_t code_max;
/* Pointer to the destination state. */