hawk/lib/tre-parse.c

/*
    Copyright (c) 2006-2020 Chung, Hyung-Hwan. All rights reserved.

    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions
    are met:
    1. Redistributions of source code must retain the above copyright
       notice, this list of conditions and the following disclaimer.
    2. Redistributions in binary form must reproduce the above copyright
       notice, this list of conditions and the following disclaimer in the
       documentation and/or other materials provided with the distribution.

    THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
    OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
    IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
    NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
    THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
  tre-parse.c - Regexp parser

This is the license, copyright notice, and disclaimer for TRE, a regex
matching package (library and tools) with support for approximate
matching.

Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

  1. Redistributions of source code must retain the above copyright
     notice, this list of conditions and the following disclaimer.

  2. Redistributions in binary form must reproduce the above copyright
     notice, this list of conditions and the following disclaimer in the
     documentation and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

/*
  This parser is just a simple recursive descent parser for POSIX.2
  regexps.  The parser supports both the obsolete default syntax and
  the "extended" syntax, and some nonstandard extensions.
*/


#include "tre-prv.h"
#include "tre-ast.h"
#include "tre-stack.h"
#include "tre-parse.h"

/* Characters with special meanings in regexp syntax. */
#define CHAR_PIPE	   HAWK_T('|')
#define CHAR_LPAREN	   HAWK_T('(')
#define CHAR_RPAREN	   HAWK_T(')')
#define CHAR_LBRACE	   HAWK_T('{')
#define CHAR_RBRACE	   HAWK_T('}')
#define CHAR_LBRACKET	   HAWK_T('[')
#define CHAR_RBRACKET	   HAWK_T(']')
#define CHAR_MINUS	   HAWK_T('-')
#define CHAR_STAR	   HAWK_T('*')
#define CHAR_QUESTIONMARK  HAWK_T('?')
#define CHAR_PLUS	   HAWK_T('+')
#define CHAR_PERIOD	   HAWK_T('.')
#define CHAR_COLON	   HAWK_T(':')
#define CHAR_EQUAL	   HAWK_T('=')
#define CHAR_COMMA	   HAWK_T(',')
#define CHAR_CARET	   HAWK_T('^')
#define CHAR_DOLLAR	   HAWK_T('$')
#define CHAR_BACKSLASH	   HAWK_T('\\')
#define CHAR_HASH	   HAWK_T('#')
#define CHAR_TILDE	   HAWK_T('~')


/* Some macros for expanding \w, \s, etc. */
static const struct tre_macro_struct
{
	const char c;
	const char *expansion;
} tre_macros[] =
{
	{'t', "\t"},	   {'n', "\n"},		   {'r', "\r"},
	{'f', "\f"},	   {'a', "\a"},		   {'e', "\033"},
	{'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
	{'S', "[^[:space:]]"}, {'d', "[[:digit:]]"},   {'D', "[^[:digit:]]"},
	{ 0, NULL }
};

static HAWK_INLINE int xdigit_to_num (hawk_ooch_t c)
{
	return (c >= '0' && c <= '9')? (c - '0'):
	       (c >= 'A' && c <= 'F')? (c - 'A' + 10):
	       (c >= 'a' && c <= 'f')? (c - 'a' + 10): -1;
}

/* Expands a macro delimited by `regex' and `regex_end' to `buf', which
   must have at least `len' items.  Sets buf[0] to zero if the there
   is no match in `tre_macros'. */
static void
tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end,
                 tre_char_t *buf, size_t buf_len)
{
	int i;

	buf[0] = 0;
	if (regex >= regex_end)
		return;

	for (i = 0; tre_macros[i].expansion; i++)
	{
		if (tre_macros[i].c == *regex)
		{
			unsigned int j;
			DPRINT(("Expanding macro '%c' => '%s'\n",
			        tre_macros[i].c, tre_macros[i].expansion));
			/* HAWK */
			/*for (j = 0; tre_macros[i].expansion[j] && j < buf_len; j++)*/
			for (j = 0; tre_macros[i].expansion[j] && j < buf_len - 1; j++)
				buf[j] = tre_macros[i].expansion[j];
			/* END HAWK */
			buf[j] = 0;
			break;
		}
	}
}

static reg_errcode_t
tre_new_item(tre_mem_t mem, int min, int max, int *i, int *max_i, tre_ast_node_t ***items)
{
	reg_errcode_t status;
	tre_ast_node_t **array = *items;
	/* Allocate more space if necessary. */
	if (*i >= *max_i)
	{
		tre_ast_node_t **new_items;
		DPRINT(("out of array space, i = %d\n", *i));
		/* If the array is already 1024 items large, give up -- there's
		 probably an error in the regexp (e.g. not a '\0' terminated
		 string and missing ']') */
		if (*max_i > 1024)
			return REG_ESPACE;
		*max_i *= 2;
		new_items = xrealloc(mem->gem, array, sizeof(*items) * *max_i);
		if (new_items == NULL)
			return REG_ESPACE;
		*items = array = new_items;
	}
	array[*i] = tre_ast_new_literal(mem, min, max, -1);
	status = array[*i] == NULL ? REG_ESPACE : REG_OK;
	(*i)++;
	return status;
}


#if defined(HAWK_OOCH_IS_BCH)
/* Expands a character class to character ranges. */
static reg_errcode_t
tre_expand_ctype(tre_mem_t mem, tre_ctype_t class, tre_ast_node_t ***items,
                 int *i, int *max_i, int cflags)
{
	reg_errcode_t status = REG_OK;
	tre_cint_t c;
	int j, min = -1, max = 0;
	/* HAWK: deleted */
	/*assert(TRE_MB_CUR_MAX == 1);*/
	/* END HAWK */

	DPRINT(("  expanding class to character ranges\n"));
	for (j = 0; (j < 256) && (status == REG_OK); j++)
	{
		c = j;
		if (tre_isctype(c, class) ||
		    ((cflags & REG_ICASE) && (tre_isctype(tre_tolower(c), class) ||
		                              tre_isctype(tre_toupper(c), class))))
		{
			if (min < 0) min = c;
			max = c;
		}
		else if (min >= 0)
		{
			DPRINT(("  range %c (%d) to %c (%d)\n", min, min, max, max));
			status = tre_new_item(mem, min, max, i, max_i, items);
			min = -1;
		}
	}
	if (min >= 0 && status == REG_OK)
		status = tre_new_item(mem, min, max, i, max_i, items);
	return status;
}
#endif


static int
tre_compare_items(const void *a, const void *b, void* ctx)
{
	const tre_ast_node_t *node_a = *(tre_ast_node_t * const *)a;
	const tre_ast_node_t *node_b = *(tre_ast_node_t * const *)b;
	tre_literal_t *l_a = node_a->obj, *l_b = node_b->obj;
	/* HAWK: changed int to long */
	/*int a_min = l_a->code_min, b_min = l_b->code_min;*/
	long a_min = l_a->code_min, b_min = l_b->code_min;
	/* END HAWK */

	if (a_min < b_min)
		return -1;
	else if (a_min > b_min)
		return 1;
	else
		return 0;
}

/* Maximum number of character classes that can occur in a negated bracket
   expression.	*/
#define MAX_NEG_CLASSES 64

/* Maximum length of character class names. */
#define MAX_CLASS_NAME

#define REST(re) (int)(ctx->re_end - (re)), (re)

static reg_errcode_t
tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
                        tre_ctype_t neg_classes[], int *num_neg_classes,
                        tre_ast_node_t ***items, int *num_items,
                        int *items_size)
{
	const tre_char_t *re = ctx->re;
	reg_errcode_t status = REG_OK;
	tre_ctype_t class = (tre_ctype_t)0;
	int i = *num_items;
	int max_i = *items_size;
	int skip;

	/* Build an array of the items in the bracket expression. */
	while (status == REG_OK)
	{
		skip = 0;
		if (re == ctx->re_end)
		{
			status = REG_EBRACK;
		}
		else if (*re == CHAR_RBRACKET && re > ctx->re)
		{
			DPRINT(("tre_parse_bracket:	done: '%.*" STRF "'\n", REST(re)));
			re++;
			break;
		}
		else
		{
			tre_cint_t min = 0, max = 0;

			class = (tre_ctype_t)0;
			if (re + 2 < ctx->re_end
			        && *(re + 1) == CHAR_MINUS && *(re + 2) != CHAR_RBRACKET)
			{
				DPRINT(("tre_parse_bracket:  range: '%.*" STRF "'\n", REST(re)));
				min = *re;
				max = *(re + 2);
				re += 3;
				/* XXX - Should use collation order instead of encoding values
				 in character ranges. */
				if (min > max)
					status = REG_ERANGE;
			}
		/* HAWK: handle \ as an escaper  */
			else if (re + 1 < ctx->re_end && *re == CHAR_BACKSLASH)
			{
				/* escaped character inside [] */
				min = max = *(re + 1);
				re += 2;
			}
		/* END HAWK */
			else if (re + 1 < ctx->re_end
			         && *re == CHAR_LBRACKET && *(re + 1) == CHAR_PERIOD)
				status = REG_ECOLLATE;
			else if (re + 1 < ctx->re_end
			         && *re == CHAR_LBRACKET && *(re + 1) == CHAR_EQUAL)
				status = REG_ECOLLATE;
			else if (re + 1 < ctx->re_end
			         && *re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON)
			{
				const tre_char_t *endptr = re + 2;
				/* HAWK: changed int to hawk_oow_t */
				/*int len;*/
				hawk_oow_t len;
				/* END HAWK */
				DPRINT(("tre_parse_bracket:  class: '%.*" STRF "'\n", REST(re)));
				while (endptr < ctx->re_end && *endptr != CHAR_COLON) endptr++;
				if (endptr != ctx->re_end)
				{
					/* HAWK: bug fix of not checking ending ] */
					if (*(endptr + 1) != CHAR_RBRACKET) status = REG_ECTYPE;
					else
					{
					/* END HAWK */
						len = MIN(endptr - re - 2, 63);

						if (hawk_oochars_to_ooch_prop(re + 2, len, &class) <= -1) status = REG_ECTYPE;

						/* Optimize character classes for 8 bit character sets. */
#if defined(HAWK_OOCH_IS_BCH)
						/* HAWK: not possible to count on MB_CUR_MAX since
						 *      this library is designed to support per-object
						 *      or per-context character encoding using hawk_cmgr_t */
						/* if (status == REG_OK && TRE_MB_CUR_MAX == 1) */
						/* END HAWK */
						if (status == REG_OK)
						{
							status = tre_expand_ctype(ctx->mem, class, items, &i, &max_i, ctx->cflags);
							class = (tre_ctype_t)0;
							skip = 1;
						}
#endif
						re = endptr + 2;
					}
				}
				else status = REG_ECTYPE;
				min = 0;
				max = TRE_CHAR_MAX;
			}
			else
			{
				DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
				if (*re == CHAR_MINUS && *(re + 1) != CHAR_RBRACKET && ctx->re != re)
					/* Two ranges are not allowed to share and endpoint. */
					status = REG_ERANGE;
				min = max = *re++;
			}

			if (status != REG_OK) break;

			if (class && negate)
			{
				if (*num_neg_classes >= MAX_NEG_CLASSES)
					status = REG_ESPACE;
				else
					neg_classes[(*num_neg_classes)++] = class;
			}
			else if (!skip)
			{
				status = tre_new_item(ctx->mem, min, max, &i, &max_i, items);
				if (status != REG_OK) break;
				((tre_literal_t*)((*items)[i-1])->obj)->u.class = class;
			}

			/* Add opposite-case counterpoints if REG_ICASE is present.
			   This is broken if there are more than two "same" characters. */
			if ((ctx->cflags & REG_ICASE) && !class && status == REG_OK && !skip)
			{
				tre_cint_t cmin, ccurr;

				DPRINT(("adding opposite-case counterpoints\n"));
				while (min <= max)
				{
					if (tre_islower(min))
					{
						cmin = ccurr = tre_toupper(min++);
						while (tre_islower(min) && tre_toupper(min) == ccurr + 1 && min <= max)
							ccurr = tre_toupper(min++);
						status = tre_new_item(ctx->mem, cmin, ccurr, &i, &max_i, items);
					}
					else if (tre_isupper(min))
					{
						cmin = ccurr = tre_tolower(min++);
						while (tre_isupper(min) && tre_tolower(min) == ccurr + 1 && min <= max)
							ccurr = tre_tolower(min++);
						status = tre_new_item(ctx->mem, cmin, ccurr, &i, &max_i, items);
					}
					else min++;
					if (status != REG_OK) break;
				}
				if (status != REG_OK) break;
			}
		}
	}
	*num_items = i;
	*items_size = max_i;
	ctx->re = re;
	return status;
}

static reg_errcode_t
tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
{
	tre_ast_node_t *node = NULL;
	int negate = 0;
	reg_errcode_t status = REG_OK;
	tre_ast_node_t **items, *u, *n;
	int i = 0, j, max_i = 32, curr_max, curr_min;
	tre_ctype_t neg_classes[MAX_NEG_CLASSES];
	int num_neg_classes = 0;

	/* Start off with an array of `max_i' elements. */
	items = xmalloc(ctx->mem->gem, sizeof(*items) * max_i);
	if (items == NULL) return REG_ESPACE;

	if (*ctx->re == CHAR_CARET)
	{
		DPRINT(("tre_parse_bracket: negate: '%.*" STRF "'\n", REST(ctx->re)));
		negate = 1;
		ctx->re++;
	}

	status = tre_parse_bracket_items(ctx, negate, neg_classes, &num_neg_classes, &items, &i, &max_i);
	if (status != REG_OK) goto parse_bracket_done;

	/* Sort the array if we need to negate it. */
	if (negate) hawk_qsort(items, (unsigned)i, sizeof(*items), tre_compare_items, HAWK_NULL);

	curr_max = curr_min = 0;
	/* Build a union of the items in the array, negated if necessary. */
	for (j = 0; j < i && status == REG_OK; j++)
	{
		int min, max;
		tre_literal_t *l = items[j]->obj;
		min = l->code_min;
		max = l->code_max;

		DPRINT(("item: %d - %d, class %ld, curr_max = %d\n",
		        (int)l->code_min, (int)l->code_max, (long)l->u.class, curr_max));

		if (negate)
		{
			if (min < curr_max)
			{
				/* Overlap. */
				curr_max = MAX(max + 1, curr_max);
				DPRINT(("overlap, curr_max = %d\n", curr_max));
				l = NULL;
			}
			else
			{
				/* No overlap. */
				curr_max = min - 1;
				if (curr_max >= curr_min)
				{
					DPRINT(("no overlap\n"));
					l->code_min = curr_min;
					l->code_max = curr_max;
				}
				else
				{
					DPRINT(("no overlap, zero room\n"));
					l = NULL;
				}
				curr_min = curr_max = max + 1;
			}
		}

		if (l != NULL)
		{
			int k;
			DPRINT(("creating %d - %d\n", (int)l->code_min, (int)l->code_max));
			l->position = ctx->position;
			if (num_neg_classes > 0)
			{
				l->neg_classes = tre_mem_alloc(ctx->mem, (sizeof(l->neg_classes) * (num_neg_classes + 1)));
				if (l->neg_classes == NULL)
				{
					status = REG_ESPACE;
					break;
				}
				for (k = 0; k < num_neg_classes; k++) l->neg_classes[k] = neg_classes[k];
				l->neg_classes[k] = (tre_ctype_t)0;
			}
			else
			{
				l->neg_classes = NULL;
			}
			if (node == NULL)
			{
				node = items[j];
			}
			else
			{
				u = tre_ast_new_union(ctx->mem, node, items[j]);
				if (u == NULL)
					status = REG_ESPACE;
				node = u;
			}
		}
	}

	if (status != REG_OK) goto parse_bracket_done;

	if (negate)
	{
		int k;
		DPRINT(("final: creating %d - %d\n", curr_min, (int)TRE_CHAR_MAX));
		n = tre_ast_new_literal(ctx->mem, curr_min, TRE_CHAR_MAX, ctx->position);
		if (n == NULL)
		{
			status = REG_ESPACE;
		}
		else
		{
			tre_literal_t *l = n->obj;
			if (num_neg_classes > 0)
			{
				l->neg_classes = tre_mem_alloc(ctx->mem,
				                               (sizeof(l->neg_classes)
				                                * (num_neg_classes + 1)));
				if (l->neg_classes == NULL)
				{
					status = REG_ESPACE;
					goto parse_bracket_done;
				}
				for (k = 0; k < num_neg_classes; k++)
					l->neg_classes[k] = neg_classes[k];
				l->neg_classes[k] = (tre_ctype_t)0;
			}
			else
			{
				l->neg_classes = NULL;
			}
			if (node == NULL)
			{
				node = n;
			}
			else
			{
				u = tre_ast_new_union(ctx->mem, node, n);
				if (u == NULL) status = REG_ESPACE;
				node = u;
			}
		}
	}

	if (status != REG_OK) goto parse_bracket_done;

#ifdef TRE_DEBUG
	tre_ast_print(node);
#endif /* TRE_DEBUG */

parse_bracket_done:
	xfree(ctx->mem->gem, items);
	ctx->position++;
	*result = node;
	return status;
}


/* Parses a positive decimal integer.  Returns -1 if the string does not
   contain a valid number. */
static int
tre_parse_int(const tre_char_t **regex, const tre_char_t *regex_end)
{
	/* HAWK : added overflow check with other code optimizations */
	int num = -1;
	const tre_char_t *r = *regex;

	if (r < regex_end && *r >= HAWK_T('0') && *r <= HAWK_T('9'))
	{
		int ever_overflowed = 0;

		num = 0;
		do
		{
			if (num > (HAWK_TYPE_MAX(int) - 9) / 10) ever_overflowed = 1;
			num = num * 10 + *r - HAWK_T('0');
			r++;
		}
		while (r < regex_end && *r >= HAWK_T('0') && *r <= HAWK_T('9'));

		if (ever_overflowed) num = -1;
	}
	*regex = r;
	return num;
}


static reg_errcode_t
tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
{
	int min, max, i;
	int cost_ins, cost_del, cost_subst, cost_max;
	int limit_ins, limit_del, limit_subst, limit_err;
	const tre_char_t *r = ctx->re;
	const tre_char_t *start;
	int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
	int approx = 0;
	int costs_set = 0;
	int counts_set = 0;

	cost_ins = cost_del = cost_subst = cost_max = TRE_PARAM_UNSET;
	limit_ins = limit_del = limit_subst = limit_err = TRE_PARAM_UNSET;

	/* Parse number (minimum repetition count). */
	min = -1;
	if (r < ctx->re_end && *r >= HAWK_T('0') && *r <= HAWK_T('9'))
	{
		DPRINT(("tre_parse:	  min count: '%.*" STRF "'\n", REST(r)));
		min = tre_parse_int(&r, ctx->re_end);
	}

	/* Parse comma and second number (maximum repetition count). */
	max = min;
	if (r < ctx->re_end && *r == CHAR_COMMA)
	{
		r++;
		DPRINT(("tre_parse:   max count: '%.*" STRF "'\n", REST(r)));
		max = tre_parse_int(&r, ctx->re_end);
	}

	/* Check that the repeat counts are sane. */
	/*if ((max >= 0 && min > max) || max > RE_DUP_MAX) return REG_BADBR;

	hyunghwan.chung:
	this original check still allows something like {100000,}
	while it does not allow {1,256}. Why is RE_DUP_MAX necessary?
	*/
	if ((max >= 0 && min > max)) return REG_BADBR;


	/*
	 '{'
	   optionally followed immediately by a number == minimum repcount
	   optionally followed by , then a number == maximum repcount
	    + then a number == maximum insertion count
	    - then a number == maximum deletion count
	    # then a number == maximum substitution count
	    ~ then a number == maximum number of errors
	    Any of +, -, # or ~ without followed by a number means that
	    the maximum count/number of errors is infinite.

	    An equation of the form
	Xi + Yd + Zs < C
	    can be specified to set costs and the cost limit to a value
	    different from the default value:
	- X is the cost of an insertion
	- Y is the cost of a deletion
	- Z is the cost of a substitution
	- C is the maximum cost

	    If no count limit or cost is set for an operation, the operation
	    is not allowed at all.
	*/


	do
	{
		int done;
		start = r;

		/* Parse count limit settings */
		done = 0;
		if (!counts_set)
			while (r + 1 < ctx->re_end && !done)
			{
				switch (*r)
				{
				case CHAR_PLUS:  /* Insert limit */
					DPRINT(("tre_parse:   ins limit: '%.*" STRF "'\n", REST(r)));
					r++;
					limit_ins = tre_parse_int(&r, ctx->re_end);
					if (limit_ins < 0)
						limit_ins = HAWK_TYPE_MAX(int);
					counts_set = 1;
					break;
				case CHAR_MINUS: /* Delete limit */
					DPRINT(("tre_parse:   del limit: '%.*" STRF "'\n", REST(r)));
					r++;
					limit_del = tre_parse_int(&r, ctx->re_end);
					if (limit_del < 0)
						limit_del = HAWK_TYPE_MAX(int);
					counts_set = 1;
					break;
				case CHAR_HASH:  /* Substitute limit */
					DPRINT(("tre_parse: subst limit: '%.*" STRF "'\n", REST(r)));
					r++;
					limit_subst = tre_parse_int(&r, ctx->re_end);
					if (limit_subst < 0)
						limit_subst = HAWK_TYPE_MAX(int);
					counts_set = 1;
					break;
				case CHAR_TILDE: /* Maximum number of changes */
					DPRINT(("tre_parse: count limit: '%.*" STRF "'\n", REST(r)));
					r++;
					limit_err = tre_parse_int(&r, ctx->re_end);
					if (limit_err < 0)
						limit_err = HAWK_TYPE_MAX(int);
					approx = 1;
					break;
				case CHAR_COMMA:
					r++;
					break;
				case HAWK_T(' '):
					r++;
					break;
				case HAWK_T('}'):
					done = 1;
					break;
				default:
					done = 1;
					break;
				}
			}

		/* Parse cost restriction equation. */
		done = 0;
		if (!costs_set)
			while (r + 1 < ctx->re_end && !done)
			{
				switch (*r)
				{
				case CHAR_PLUS:
				case HAWK_T(' '):
					r++;
					break;
				case HAWK_T('<'):
					DPRINT(("tre_parse:    max cost: '%.*" STRF "'\n", REST(r)));
					r++;
					while (*r == HAWK_T(' '))
						r++;
					cost_max = tre_parse_int(&r, ctx->re_end);
					if (cost_max < 0)
						cost_max = HAWK_TYPE_MAX(int);
					else
						cost_max--;
					approx = 1;
					break;
				case CHAR_COMMA:
					r++;
					done = 1;
					break;
				default:
					if (*r >= HAWK_T('0') && *r <= HAWK_T('9'))
					{
#ifdef TRE_DEBUG
						const tre_char_t *sr = r;
#endif /* TRE_DEBUG */
						int cost = tre_parse_int(&r, ctx->re_end);
						/* XXX - make sure r is not past end. */
						switch (*r)
						{
						case HAWK_T('i'):	/* Insert cost */
							DPRINT(("tre_parse:    ins cost: '%.*" STRF "'\n",
							        REST(sr)));
							r++;
							cost_ins = cost;
							costs_set = 1;
							break;
						case HAWK_T('d'):	/* Delete cost */
							DPRINT(("tre_parse:    del cost: '%.*" STRF "'\n",
							        REST(sr)));
							r++;
							cost_del = cost;
							costs_set = 1;
							break;
						case HAWK_T('s'):	/* Substitute cost */
							DPRINT(("tre_parse:  subst cost: '%.*" STRF "'\n",
							        REST(sr)));
							r++;
							cost_subst = cost;
							costs_set = 1;
							break;
						default:
							return REG_BADBR;
						}
					}
					else
					{
						done = 1;
						break;
					}
				}
			}
	}
	while (start != r);

	/* Missing }. */
	if (r >= ctx->re_end)
		return REG_EBRACE;

	/* Empty contents of {}. */
	if (r == ctx->re)
		return REG_BADBR;

	/* Parse the ending '}' or '\}'.*/
	if (ctx->cflags & REG_EXTENDED)
	{
		if (r >= ctx->re_end || *r != CHAR_RBRACE)
			return REG_BADBR;
		r++;
	}
	else
	{
		if (r + 1 >= ctx->re_end
		        || *r != CHAR_BACKSLASH
		        || *(r + 1) != CHAR_RBRACE)
			return REG_BADBR;
		r += 2;
	}


	/* Parse trailing '?' marking minimal repetition. */
	if (r < ctx->re_end)
	{
		if (*r == CHAR_QUESTIONMARK)
		{
			minimal = !(ctx->cflags & REG_UNGREEDY);
			r++;
		}
/* HAWK - commented out for minimal impact on backward compatibility.
 *       X{x,y}* X{x,y}+ */
#if 0
		else if (*r == CHAR_STAR || *r == CHAR_PLUS)
		{
			/* These are reserved for future extensions. */
			return REG_BADRPT;
		}
#endif
	}

	/* Create the AST node(s). */
	if (min == 0 && max == 0)
	{
		*result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
		if (*result == NULL) return REG_ESPACE;
	}
	else
	{
		if (min < 0 && max < 0)
			/* Only approximate parameters set, no repetitions. */
			min = max = 1;

		*result = tre_ast_new_iter(ctx->mem, *result, min, max, minimal);
		if (!*result)
			return REG_ESPACE;

		/* If approximate matching parameters are set, add them to the
		 iteration node. */
		if (approx || costs_set || counts_set)
		{
			int *params;
			tre_iteration_t *iter = (*result)->obj;

			if (costs_set || counts_set)
			{
				if (limit_ins == TRE_PARAM_UNSET)
				{
					if (cost_ins == TRE_PARAM_UNSET)
						limit_ins = 0;
					else
						limit_ins = HAWK_TYPE_MAX(int);
				}

				if (limit_del == TRE_PARAM_UNSET)
				{
					if (cost_del == TRE_PARAM_UNSET)
						limit_del = 0;
					else
						limit_del = HAWK_TYPE_MAX(int);
				}

				if (limit_subst == TRE_PARAM_UNSET)
				{
					if (cost_subst == TRE_PARAM_UNSET)
						limit_subst = 0;
					else
						limit_subst = HAWK_TYPE_MAX(int);
				}
			}

			if (cost_max == TRE_PARAM_UNSET)
				cost_max = HAWK_TYPE_MAX(int);
			if (limit_err == TRE_PARAM_UNSET)
				limit_err = HAWK_TYPE_MAX(int);

			ctx->have_approx = 1;
			params = tre_mem_alloc(ctx->mem, sizeof(*params) * TRE_PARAM_LAST);
			if (!params)
				return REG_ESPACE;
			for (i = 0; i < TRE_PARAM_LAST; i++)
				params[i] = TRE_PARAM_UNSET;
			params[TRE_PARAM_COST_INS] = cost_ins;
			params[TRE_PARAM_COST_DEL] = cost_del;
			params[TRE_PARAM_COST_SUBST] = cost_subst;
			params[TRE_PARAM_COST_MAX] = cost_max;
			params[TRE_PARAM_MAX_INS] = limit_ins;
			params[TRE_PARAM_MAX_DEL] = limit_del;
			params[TRE_PARAM_MAX_SUBST] = limit_subst;
			params[TRE_PARAM_MAX_ERR] = limit_err;
			iter->params = params;
		}
	}

	DPRINT(("tre_parse_bound: min %d, max %d, costs [%d,%d,%d, total %d], "
	        "limits [%d,%d,%d, total %d]\n",
	        min, max, cost_ins, cost_del, cost_subst, cost_max,
	        limit_ins, limit_del, limit_subst, limit_err));


	ctx->re = r;
	return REG_OK;
}

typedef enum
{
	PARSE_RE = 0,
	PARSE_ATOM,
	PARSE_MARK_FOR_SUBMATCH,
	PARSE_BRANCH,
	PARSE_PIECE,
	PARSE_CATENATION,
	PARSE_POST_CATENATION,
	PARSE_UNION,
	PARSE_POST_UNION,
	PARSE_POSTFIX,
	PARSE_RESTORE_CFLAGS
} tre_parse_re_stack_symbol_t;

reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
{
	tre_ast_node_t *result = NULL;
	tre_parse_re_stack_symbol_t symbol;
	reg_errcode_t status = REG_OK;
	tre_stack_t *stack = ctx->stack;
	int bottom = tre_stack_num_objects(stack);
	int depth = 0;
	int temporary_cflags = 0;

	DPRINT(("tre_parse: parsing '%.*" STRF "', len = %d\n",
	        ctx->len, ctx->re, ctx->len));

	if (!ctx->nofirstsub)
	{
		STACK_PUSH(stack, int, ctx->submatch_id);
		STACK_PUSH(stack, int, PARSE_MARK_FOR_SUBMATCH);
		ctx->submatch_id++;
	}
	STACK_PUSH(stack, int, PARSE_RE);
	ctx->re_start = ctx->re;
	ctx->re_end = ctx->re + ctx->len;


	/* The following is basically just a recursive descent parser.  I use
	   an explicit stack instead of recursive functions mostly because of
	   two reasons: compatibility with systems which have an overflowable
	   call stack, and efficiency (both in lines of code and speed).  */
	while (tre_stack_num_objects(stack) > bottom && status == REG_OK)
	{
		if (status != REG_OK) break;

		symbol = tre_stack_pop_int(stack);
		switch (symbol)
		{
		case PARSE_RE:
			/* Parse a full regexp.  A regexp is one or more branches,
			   separated by the union operator `|'. */
#ifdef REG_LITERAL
			if (!(ctx->cflags & REG_LITERAL)
			        && ctx->cflags & REG_EXTENDED)
#endif /* REG_LITERAL */
				STACK_PUSHX(stack, int, PARSE_UNION);
			STACK_PUSHX(stack, int, PARSE_BRANCH);
			break;

		case PARSE_BRANCH:
			/* Parse a branch.  A branch is one or more pieces, concatenated.
			   A piece is an atom possibly followed by a postfix operator. */
			STACK_PUSHX(stack, int, PARSE_CATENATION);
			STACK_PUSHX(stack, int, PARSE_PIECE);
			break;

		case PARSE_PIECE:
			/* Parse a piece.  A piece is an atom possibly followed by one
			   or more postfix operators. */
#ifdef REG_LITERAL
			if (!(ctx->cflags & REG_LITERAL))
#endif /* REG_LITERAL */
				STACK_PUSHX(stack, int, PARSE_POSTFIX);
			STACK_PUSHX(stack, int, PARSE_ATOM);
			break;

		case PARSE_CATENATION:
			/* If the expression has not ended, parse another piece. */
		{
			tre_char_t c;

			if (ctx->re >= ctx->re_end) break;
			c = *ctx->re;
#ifdef REG_LITERAL
			if (!(ctx->cflags & REG_LITERAL))
			{
#endif /* REG_LITERAL */
				if (ctx->cflags & REG_EXTENDED && c == CHAR_PIPE)
					break;
				if ((ctx->cflags & REG_EXTENDED
				        && c == CHAR_RPAREN && depth > 0)
				        || (!(ctx->cflags & REG_EXTENDED)
				            && (c == CHAR_BACKSLASH
				                && *(ctx->re + 1) == CHAR_RPAREN)))
				{
					if (!(ctx->cflags & REG_EXTENDED) && depth == 0)
						status = REG_EPAREN;
					DPRINT(("tre_parse:	  group end: '%.*" STRF "'\n",
					        REST(ctx->re)));
					depth--;
					if (!(ctx->cflags & REG_EXTENDED))
						ctx->re += 2;
					break;
				}
#ifdef REG_LITERAL
			}
#endif /* REG_LITERAL */

#ifdef REG_RIGHT_ASSOC
			if (ctx->cflags & REG_RIGHT_ASSOC)
			{
				/* Right associative concatenation. */
				STACK_PUSHX(stack, voidptr, result);
				STACK_PUSHX(stack, int, PARSE_POST_CATENATION);
				STACK_PUSHX(stack, int, PARSE_CATENATION);
				STACK_PUSHX(stack, int, PARSE_PIECE);
			}
			else
#endif
			{ /* REG_RIGHT_ASSOC */
				/* Default case, left associative concatenation. */
				STACK_PUSHX(stack, int, PARSE_CATENATION);
				STACK_PUSHX(stack, voidptr, result);
				STACK_PUSHX(stack, int, PARSE_POST_CATENATION);
				STACK_PUSHX(stack, int, PARSE_PIECE);
			}
			break;
		}

		case PARSE_POST_CATENATION:
		{
			tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
			tre_ast_node_t *tmp_node;
			tmp_node = tre_ast_new_catenation(ctx->mem, tree, result);
			if (!tmp_node)
				return REG_ESPACE;
			result = tmp_node;
			break;
		}

		case PARSE_UNION:
			if (ctx->re >= ctx->re_end) break;
	#ifdef REG_LITERAL
			if (ctx->cflags & REG_LITERAL) break;
	#endif /* REG_LITERAL */
			switch (*ctx->re)
			{
			case CHAR_PIPE:
				DPRINT(("tre_parse:	union: '%.*" STRF "'\n",
				        REST(ctx->re)));
				STACK_PUSHX(stack, int, PARSE_UNION);
				STACK_PUSHX(stack, voidptr, result);
				STACK_PUSHX(stack, int, PARSE_POST_UNION);
				STACK_PUSHX(stack, int, PARSE_BRANCH);
				ctx->re++;
				break;

			case CHAR_RPAREN:
				ctx->re++;
				break;

			default:
				break;
			}
			break;

		case PARSE_POST_UNION:
		{
			tre_ast_node_t *tmp_node;
			tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
			tmp_node = tre_ast_new_union(ctx->mem, tree, result);
			if (!tmp_node)
				return REG_ESPACE;
			result = tmp_node;
			break;
		}

		case PARSE_POSTFIX:
			/* Parse postfix operators. */
			if (ctx->re >= ctx->re_end)
				break;
	#ifdef REG_LITERAL
			if (ctx->cflags & REG_LITERAL)
				break;
	#endif /* REG_LITERAL */
			switch (*ctx->re)
			{
			case CHAR_PLUS:
			case CHAR_QUESTIONMARK:
				if (!(ctx->cflags & REG_EXTENDED)) break;
				/*FALLTHROUGH*/
			case CHAR_STAR:
	/* HAWK - added this label */
	parse_star:
	/* END HAWK */
			{
				tre_ast_node_t *tmp_node;
				int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
				int rep_min = 0;
				int rep_max = -1;
	#ifdef TRE_DEBUG
				const tre_char_t *tmp_re;
	#endif

				if (*ctx->re == CHAR_PLUS) /* HAWK: case CHAR_PLUS fell through down here */
					rep_min = 1;
				if (*ctx->re == CHAR_QUESTIONMARK) /* HAWK: case CHAR_QUESTIONMARK fell though down here */
					rep_max = 1;
	#ifdef TRE_DEBUG
				tmp_re = ctx->re;
	#endif

				if (ctx->re + 1 < ctx->re_end)
				{
					if (*(ctx->re + 1) == CHAR_QUESTIONMARK) /* HAWK: +?, ??, *? */
					{
						minimal = !(ctx->cflags & REG_UNGREEDY);
						ctx->re++;
					}
	/* HAWK - TRE has provisions for ** or *+ as a special repetition operator.
	 *       however, that seems to break backward compatibility.
	 *       '+' in 'a*+' is not treated as a normal character with the
	 *       following block enabled. So let me comment it out */
	#if 0
					else if (*(ctx->re + 1) == CHAR_STAR
					         || *(ctx->re + 1) == CHAR_PLUS)
					{
						/* These are reserved for future extensions. */
						return REG_BADRPT;
					}
	#endif
				}

				DPRINT(("tre_parse: %s star: '%.*" STRF "'\n",
				        minimal ? "  minimal" : "greedy", REST(tmp_re)));
				ctx->re++;
				tmp_node = tre_ast_new_iter(ctx->mem, result, rep_min, rep_max,
				                            minimal);
				if (tmp_node == NULL)
					return REG_ESPACE;
				result = tmp_node;
				STACK_PUSHX(stack, int, PARSE_POSTFIX);

				break;
			}

			case CHAR_BACKSLASH:
				/* "\{" is special without REG_EXTENDED */
				/* HAWK  - also handle \+ and \? */
				/*
				if (!(ctx->cflags & REG_EXTENDED)
				        && ctx->re + 1 < ctx->re_end
				        && *(ctx->re + 1) == CHAR_LBRACE)
				{
					ctx->re++;
					goto parse_brace;
				}
				else
					break;
				*/
				if (!(ctx->cflags & REG_EXTENDED) && ctx->re + 1 < ctx->re_end)
				{
					if (*(ctx->re + 1) == CHAR_LBRACE)
					{
						ctx->re++;
						goto parse_brace;
					}
					else if (*(ctx->re + 1) == CHAR_PLUS ||
					         *(ctx->re + 1) == CHAR_QUESTIONMARK)
					{
						ctx->re++;
						goto parse_star;
					}
				}
				break;
				/* END HAWK */


			case CHAR_LBRACE:
				/* "{" is literal without REG_EXTENDED */
				if (!(ctx->cflags & REG_EXTENDED)) break;
				/* HAWK */
				if (ctx->cflags & REG_NOBOUND) break;
				/* END HAWK */

	parse_brace:
				DPRINT(("tre_parse:	bound: '%.*" STRF "'\n",
				        REST(ctx->re)));
				ctx->re++;

				status = tre_parse_bound(ctx, &result);
				if (status != REG_OK)
					return status;
				STACK_PUSHX(stack, int, PARSE_POSTFIX);
				break;
			}

			break;

		case PARSE_ATOM:

			/* Parse an atom.  An atom is a regular expression enclosed in `()',
			   an empty set of `()', a bracket expression, `.', `^', `$',
			   a `\' followed by a character, or a single character. */

			/* End of regexp? (empty string). */
			if (ctx->re >= ctx->re_end) goto parse_literal;

	#ifdef REG_LITERAL
			if (ctx->cflags & REG_LITERAL) goto parse_literal;
	#endif /* REG_LITERAL */

			switch (*ctx->re)
			{
			case CHAR_LPAREN:  /* parenthesized subexpression */

				/* Handle "(?...)" extensions.  They work in a way similar
				 to Perls corresponding extensions. */
				/* HAWK: added ctx->cflags & REG_NONSTDEXT */
				if ((ctx->cflags & REG_NONSTDEXT) &&
				    (ctx->cflags & REG_EXTENDED) &&
				    *(ctx->re + 1) == CHAR_QUESTIONMARK)
				{
					int new_cflags = ctx->cflags;
					int bit = 1;
					DPRINT(("tre_parse:	extension: '%.*" STRF "\n", REST(ctx->re)));
					ctx->re += 2;
					while (/*CONSTCOND*/1)
					{
						if (*ctx->re == HAWK_T('i'))
						{
							DPRINT(("tre_parse:	    icase: '%.*" STRF "\n", REST(ctx->re)));
							if (bit)
								new_cflags |= REG_ICASE;
							else
								new_cflags &= ~REG_ICASE;
							ctx->re++;
						}
						else if (*ctx->re == HAWK_T('n'))
						{
							DPRINT(("tre_parse:	  newline: '%.*" STRF "\n", REST(ctx->re)));
							if (bit)
								new_cflags |= REG_NEWLINE;
							else
								new_cflags &= ~REG_NEWLINE;
							ctx->re++;
						}
	#ifdef REG_RIGHT_ASSOC
						else if (*ctx->re == HAWK_T('r'))
						{
							DPRINT(("tre_parse: right assoc: '%.*" STRF "\n", REST(ctx->re)));
							if (bit)
								new_cflags |= REG_RIGHT_ASSOC;
							else
								new_cflags &= ~REG_RIGHT_ASSOC;
							ctx->re++;
						}
	#endif /* REG_RIGHT_ASSOC */
	#ifdef REG_UNGREEDY
						else if (*ctx->re == HAWK_T('U'))
						{
							DPRINT(("tre_parse:    ungreedy: '%.*" STRF "\n", REST(ctx->re)));
							if (bit)
								new_cflags |= REG_UNGREEDY;
							else
								new_cflags &= ~REG_UNGREEDY;
							ctx->re++;
						}
	#endif /* REG_UNGREEDY */
						else if (*ctx->re == CHAR_MINUS)
						{
							DPRINT(("tre_parse:	 turn off: '%.*" STRF "\n",
							        REST(ctx->re)));
							ctx->re++;
							bit = 0;
						}
						else if (*ctx->re == CHAR_COLON)
						{
							DPRINT(("tre_parse:	 no group: '%.*" STRF "\n",
							        REST(ctx->re)));
							ctx->re++;
							depth++;
							break;
						}
						else if (*ctx->re == CHAR_HASH)
						{
							DPRINT(("tre_parse:    comment: '%.*" STRF "\n",
							        REST(ctx->re)));
							/* A comment can contain any character except a
							   right parenthesis */
							while (*ctx->re != CHAR_RPAREN
							        && ctx->re < ctx->re_end)
								ctx->re++;
							if (*ctx->re == CHAR_RPAREN && ctx->re < ctx->re_end)
							{
								ctx->re++;
								break;
							}
							else
								return REG_BADPAT;
						}
						else if (*ctx->re == CHAR_RPAREN)
						{
							ctx->re++;
							break;
						}
						else
							return REG_BADPAT;
					}

					/* Turn on the cflags changes for the rest of the
					   enclosing group. */
					STACK_PUSHX(stack, int, ctx->cflags);
					STACK_PUSHX(stack, int, PARSE_RESTORE_CFLAGS);
					STACK_PUSHX(stack, int, PARSE_RE);
					ctx->cflags = new_cflags;
					break;
				}

				if (ctx->cflags & REG_EXTENDED
				        || (ctx->re > ctx->re_start
				            && *(ctx->re - 1) == CHAR_BACKSLASH))
				{
					depth++;
					/* HAWK: added ctx->cflags & REG_NONSTDEXT */
					if ((ctx->cflags & REG_NONSTDEXT) &&
					     ctx->re + 2 < ctx->re_end &&
					     *(ctx->re + 1) == CHAR_QUESTIONMARK &&
					     *(ctx->re + 2) == CHAR_COLON)
					{
						/* HAWK: \(?: or (?: depending on REG_EXTENDED */
						DPRINT(("tre_parse: group begin: '%.*" STRF
						        "', no submatch\n", REST(ctx->re)));
						/* Don't mark for submatching. */
						ctx->re += 3;
						STACK_PUSHX(stack, int, PARSE_RE);
					}
					else
					{
						DPRINT(("tre_parse: group begin: '%.*" STRF
						        "', submatch %d\n", REST(ctx->re),
						        ctx->submatch_id));
						ctx->re++;
						/* First parse a whole RE, then mark the resulting tree
						 for submatching. */
						STACK_PUSHX(stack, int, ctx->submatch_id);
						STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH);
						STACK_PUSHX(stack, int, PARSE_RE);
						ctx->submatch_id++;
					}
				}
				else
					goto parse_literal;
				break;

			case CHAR_RPAREN:  /* end of current subexpression */
				/* HAWK: fixed the condition */
				/* if ((ctx->cflags & REG_EXTENDED && depth > 0)
				        || (ctx->re > ctx->re_start
				            && *(ctx->re - 1) == CHAR_BACKSLASH)) */
				if (((ctx->cflags & REG_EXTENDED) && depth > 0) ||
				    (!(ctx->cflags & REG_EXTENDED) && ctx->re > ctx->re_start && *(ctx->re - 1) == CHAR_BACKSLASH))
				/* END HAWK */
				{
					DPRINT(("tre_parse:	    empty: '%.*" STRF "'\n", REST(ctx->re)));
					/* We were expecting an atom, but instead the current
					   subexpression was closed.	POSIX leaves the meaning of
					   this to be implementation-defined.	 We interpret this as
					   an empty expression (which matches an empty string).  */
					result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
					if (result == NULL) return REG_ESPACE;
					if (!(ctx->cflags & REG_EXTENDED)) ctx->re--;
				}
				else
					goto parse_literal;
				break;

			case CHAR_LBRACKET: /* bracket expression */
				DPRINT(("tre_parse:     bracket: '%.*" STRF "'\n", REST(ctx->re)));
				ctx->re++;
				status = tre_parse_bracket(ctx, &result);
				if (status != REG_OK) return status;
				break;

			case CHAR_BACKSLASH:
				/* If this is "\(" or "\)" chew off the backslash and
				 try again. */
				if (!(ctx->cflags & REG_EXTENDED)
				        && ctx->re + 1 < ctx->re_end
				        && (*(ctx->re + 1) == CHAR_LPAREN
				            || *(ctx->re + 1) == CHAR_RPAREN))
				{
					ctx->re++;
					STACK_PUSHX(stack, int, PARSE_ATOM);
					break;
				}

				/* If a macro is used, parse the expanded macro recursively. */
				{
					tre_char_t buf[64];
					tre_expand_macro(ctx->re + 1, ctx->re_end, buf, HAWK_COUNTOF(buf));
					if (buf[0] != 0)
					{
						tre_parse_ctx_t subctx;
						HAWK_MEMCPY (&subctx, ctx, sizeof(subctx));
						subctx.re = buf;
						subctx.len = tre_strlen(buf);
						subctx.nofirstsub = 1;
						status = tre_parse(&subctx);
						if (status != REG_OK) return status;
						ctx->re += 2;
						ctx->position = subctx.position;
						result = subctx.result;
						break;
					}
				}

				if (ctx->re + 1 >= ctx->re_end)
				{
					/* Trailing backslash. */
					return REG_EESCAPE;
				}

	#ifdef REG_LITERAL
				if (*(ctx->re + 1) == HAWK_T('Q'))
				{
					DPRINT(("tre_parse: tmp literal: '%.*" STRF "'\n",
					        REST(ctx->re)));
					ctx->cflags |= REG_LITERAL;
					temporary_cflags |= REG_LITERAL;
					ctx->re += 2;
					STACK_PUSHX(stack, int, PARSE_ATOM);
					break;
				}
	#endif /* REG_LITERAL */

				DPRINT(("tre_parse:  bleep: '%.*" STRF "'\n", REST(ctx->re)));
				ctx->re++;
				switch (*ctx->re)
				{
				case HAWK_T('b'):
					result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
					ctx->re++;
					break;
				case HAWK_T('B'):
					result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
					ctx->re++;
					break;
				case HAWK_T('<'):
					result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
					ctx->re++;
					break;
				case HAWK_T('>'):
					result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
					ctx->re++;
					break;
				case HAWK_T('x'):
					ctx->re++;
					if (ctx->re[0] != CHAR_LBRACE && ctx->re < ctx->re_end)
					{
					/* HAWK */
					#if 0
						/* 8 bit hex char. */
						char tmp[3] = {0, 0, 0};
						long val;
						DPRINT(("tre_parse:  8 bit hex: '%.*" STRF "'\n",
						        REST(ctx->re - 2)));

						if (tre_isxdigit(ctx->re[0]) && ctx->re < ctx->re_end)
						{
							tmp[0] = (char)ctx->re[0];
							ctx->re++;
						}
						if (tre_isxdigit(ctx->re[0]) && ctx->re < ctx->re_end)
						{
							tmp[1] = (char)ctx->re[0];
							ctx->re++;
						}
						val = strtol(tmp, NULL, 16);
					#endif
						long val = 0;
						int tmp;
						if ((tmp = xdigit_to_num(ctx->re[0])) >= 0 && ctx->re < ctx->re_end)
						{
							val = val * 16 + tmp;
							ctx->re++;
						}
						if ((tmp = xdigit_to_num(ctx->re[1])) >= 0 && ctx->re < ctx->re_end)
						{
							val = val * 16 + tmp;
							ctx->re++;
						}

						result = tre_ast_new_literal(ctx->mem, (int)val, (int)val, ctx->position);
						ctx->position++;
						break;
					}
					else if (ctx->re < ctx->re_end)
					{
						/* Wide char. */
					/* HAWK */
					#if 0
						char tmp[32];
						long val;
						int i = 0;
						ctx->re++;
						while (ctx->re_end - ctx->re >= 0)
						{
							if (ctx->re[0] == CHAR_RBRACE)
								break;
							if (tre_isxdigit(ctx->re[0]))
							{
								tmp[i] = (char)ctx->re[0];
								i++;
								ctx->re++;
								continue;
							}
							return REG_EBRACE;
						}
						ctx->re++;
						tmp[i] = 0;
						val = strtol(tmp, NULL, 16);
					#endif
						long val = 0;
						int tmp;

						ctx->re++;
						while (ctx->re_end - ctx->re >= 0)
						{
							if (ctx->re[0] == CHAR_RBRACE)
								break;
							tmp = xdigit_to_num(ctx->re[0]);
							if (tmp >= 0)
							{
								val = val * 16 + tmp;
								ctx->re++;
								continue;
							}
							return REG_EBRACE;
						}

						result = tre_ast_new_literal(ctx->mem, (int)val, (int)val, ctx->position);
						ctx->position++;
						break;
					}
					/*FALLTHROUGH*/

				default:
					if (tre_isdigit(*ctx->re))
					{
						/* Back reference. */
						int val = *ctx->re - HAWK_T('0');
						DPRINT(("tre_parse:     backref: '%.*" STRF "'\n", REST(ctx->re - 1)));
						result = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position);
						if (result == NULL) return REG_ESPACE;
						ctx->position++;
						ctx->max_backref = MAX(val, ctx->max_backref);
						ctx->re++;
					}
					else
					{
						/* Escaped character. */
						DPRINT(("tre_parse:     escaped: '%.*" STRF "'\n", REST(ctx->re - 1)));
						result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re, ctx->position);
						ctx->position++;
						ctx->re++;
					}
					break;
				}
				if (result == NULL)
					return REG_ESPACE;
				break;

			case CHAR_PERIOD:	 /* the any-symbol */
				DPRINT(("tre_parse:	  any: '%.*" STRF "'\n",
				        REST(ctx->re)));
				if (ctx->cflags & REG_NEWLINE)
				{
					tre_ast_node_t *tmp1;
					tre_ast_node_t *tmp2;
					/* exclude new line */
					tmp1 = tre_ast_new_literal(ctx->mem, 0, HAWK_T('\n') - 1, ctx->position);
					if (!tmp1) return REG_ESPACE;
					tmp2 = tre_ast_new_literal(ctx->mem, HAWK_T('\n') + 1, TRE_CHAR_MAX, ctx->position + 1);
					if (!tmp2) return REG_ESPACE;
					result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
					if (!result) return REG_ESPACE;
					ctx->position += 2;
				}
				else
				{
					/* all characters */
					result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position);
					if (!result) return REG_ESPACE;
					ctx->position++;
				}
				ctx->re++;
				break;

			case CHAR_CARET:	 /* beginning of line assertion */
				/* '^' has a special meaning everywhere in EREs, and in the
				 beginning of the RE and after \( is BREs. */
				if (ctx->cflags & REG_EXTENDED
				        || (ctx->re - 2 >= ctx->re_start
				            && *(ctx->re - 2) == CHAR_BACKSLASH
				            && *(ctx->re - 1) == CHAR_LPAREN)
				        || ctx->re == ctx->re_start)
				{
					DPRINT(("tre_parse:	      BOL: '%.*" STRF "'\n",
					        REST(ctx->re)));
					result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
					if (result == NULL) return REG_ESPACE;
					ctx->re++;
				}
				else
					goto parse_literal;
				break;

			case CHAR_DOLLAR:	 /* end of line assertion. */
				/* '$' is special everywhere in EREs, and in the end of the
				 string and before \) is BREs. */
				if (ctx->cflags & REG_EXTENDED
				        || (ctx->re + 2 < ctx->re_end
				            && *(ctx->re + 1) == CHAR_BACKSLASH
				            && *(ctx->re + 2) == CHAR_RPAREN)
				        || ctx->re + 1 == ctx->re_end)
				{
					DPRINT(("tre_parse:	      EOL: '%.*" STRF "'\n",
					        REST(ctx->re)));
					result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
					if (result == NULL)
						return REG_ESPACE;
					ctx->re++;
				}
				else
					goto parse_literal;
				break;

			default:
	parse_literal:

				if (temporary_cflags && ctx->re + 1 < ctx->re_end
				        && *ctx->re == CHAR_BACKSLASH && *(ctx->re + 1) == HAWK_T('E'))
				{
					DPRINT(("tre_parse:	 end tmps: '%.*" STRF "'\n", REST(ctx->re)));
					ctx->cflags &= ~temporary_cflags;
					temporary_cflags = 0;
					ctx->re += 2;
					STACK_PUSHX(stack, int, PARSE_PIECE);
					break;
				}


				/* We are expecting an atom.  If the subexpression (or the whole
				 regexp) ends here, we interpret it as an empty expression
				 (which matches an empty string).  */
				if (
	#ifdef REG_LITERAL
				    !(ctx->cflags & REG_LITERAL) &&
	#endif /* REG_LITERAL */
				    (ctx->re >= ctx->re_end
				     || *ctx->re == CHAR_STAR
				     || (ctx->cflags & REG_EXTENDED
				         && (*ctx->re == CHAR_PIPE
					/* HAWK */
				             /*|| *ctx->re == CHAR_LBRACE*/
				             || (*ctx->re == CHAR_LBRACE && !(ctx->cflags & REG_NOBOUND))
					/* END HAWK */
				             || *ctx->re == CHAR_PLUS
				             || *ctx->re == CHAR_QUESTIONMARK))
				     /* Test for "\)" in BRE mode. */
				     || (!(ctx->cflags & REG_EXTENDED)
				         && ctx->re + 1 < ctx->re_end
				         && *ctx->re == CHAR_BACKSLASH
				         && *(ctx->re + 1) == CHAR_LBRACE)))
				{
					DPRINT(("tre_parse:	    empty: '%.*" STRF "'\n", REST(ctx->re)));
					result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
					if (!result) return REG_ESPACE;
					break;
				}

				DPRINT(("tre_parse:     literal: '%.*" STRF "'\n",
				        REST(ctx->re)));
				/* Note that we can't use an tre_isalpha() test here, since there
				 may be characters which are alphabetic but neither upper or
				 lower case. */
				if (ctx->cflags & REG_ICASE && (tre_isupper(*ctx->re) || tre_islower(*ctx->re)))
				{
					tre_ast_node_t *tmp1;
					tre_ast_node_t *tmp2;

					/* XXX - Can there be more than one opposite-case
					   counterpoints for some character in some locale?  Or
					   more than two characters which all should be regarded
					   the same character if case is ignored?  If yes, there
					   does not seem to be a portable way to detect it.  I guess
					   that at least for multi-character collating elements there
					   could be several opposite-case counterpoints, but they
					   cannot be supported portably anyway. */
					tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(*ctx->re), tre_toupper(*ctx->re), ctx->position);
					if (!tmp1) return REG_ESPACE;
					tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(*ctx->re), tre_tolower(*ctx->re), ctx->position);
					if (!tmp2) return REG_ESPACE;
					result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
					if (!result) return REG_ESPACE;
				}
				else
				{
					result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re, ctx->position);
					if (!result) return REG_ESPACE;
				}
				ctx->position++;
				ctx->re++;
				break;
			}
			break;

		case PARSE_MARK_FOR_SUBMATCH:
		{
			int submatch_id = tre_stack_pop_int(stack);

			if (result->submatch_id >= 0)
			{
				tre_ast_node_t *n, *tmp_node;
				n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
				if (n == NULL) return REG_ESPACE;
				tmp_node = tre_ast_new_catenation(ctx->mem, n, result);
				if (tmp_node == NULL) return REG_ESPACE;
				tmp_node->num_submatches = result->num_submatches;
				result = tmp_node;
			}
			result->submatch_id = submatch_id;
			result->num_submatches++;
			break;
		}

		case PARSE_RESTORE_CFLAGS:
			ctx->cflags = tre_stack_pop_int(stack);
			break;

		default:
			assert(0);
			break;
		}
	}

	/* Check for missing closing parentheses. */
	if (depth > 0)
		return REG_EPAREN;

	if (status == REG_OK)
		ctx->result = result;

	return status;
}

/* EOF */