265 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			265 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|     Copyright (c) 2006-2020 Chung, Hyung-Hwan. All rights reserved.
 | |
| 
 | |
|     Redistribution and use in source and binary forms, with or without
 | |
|     modification, are permitted provided that the following conditions
 | |
|     are met:
 | |
|     1. Redistributions of source code must retain the above copyright
 | |
|        notice, this list of conditions and the following disclaimer.
 | |
|     2. Redistributions in binary form must reproduce the above copyright
 | |
|        notice, this list of conditions and the following disclaimer in the
 | |
|        documentation and/or other materials provided with the distribution.
 | |
| 
 | |
|     THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
 | |
|     IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 | |
|     OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 | |
|     IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 | |
|     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 | |
|     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | |
|     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | |
|     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | |
|     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 | |
|     THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
|  */
 | |
| 
 | |
| /*
 | |
|   tre-ast.c - Abstract syntax tree (AST) routines
 | |
| 
 | |
| This is the license, copyright notice, and disclaimer for TRE, a regex
 | |
| matching package (library and tools) with support for approximate
 | |
| matching.
 | |
| 
 | |
| Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
 | |
| All rights reserved.
 | |
| 
 | |
| Redistribution and use in source and binary forms, with or without
 | |
| modification, are permitted provided that the following conditions
 | |
| are met:
 | |
| 
 | |
|   1. Redistributions of source code must retain the above copyright
 | |
|      notice, this list of conditions and the following disclaimer.
 | |
| 
 | |
|   2. Redistributions in binary form must reproduce the above copyright
 | |
|      notice, this list of conditions and the following disclaimer in the
 | |
|      documentation and/or other materials provided with the distribution.
 | |
| 
 | |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
 | |
| ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 | |
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 | |
| A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
 | |
| HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 | |
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 | |
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | |
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | |
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | |
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | |
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
| */
 | |
| 
 | |
| #include "tre-ast.h"
 | |
| 
 | |
| tre_ast_node_t *
 | |
| tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
 | |
| {
 | |
| 	tre_ast_node_t *node;
 | |
| 
 | |
| 	node = tre_mem_calloc(mem, sizeof(*node));
 | |
| 	if (!node) return NULL;
 | |
| 	node->obj = tre_mem_calloc(mem, size);
 | |
| 	if (!node->obj) return NULL;
 | |
| 	node->type = type;
 | |
| 	node->nullable = -1;
 | |
| 	node->submatch_id = -1;
 | |
| 
 | |
| 	return node;
 | |
| }
 | |
| 
 | |
| tre_ast_node_t * tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
 | |
| {
 | |
| 	tre_ast_node_t *node;
 | |
| 	tre_literal_t *lit;
 | |
| 
 | |
| 	node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t));
 | |
| 	if (!node) return NULL;
 | |
| 	lit = node->obj;
 | |
| 	lit->code_min = code_min;
 | |
| 	lit->code_max = code_max;
 | |
| 	lit->position = position;
 | |
| 
 | |
| 	return node;
 | |
| }
 | |
| 
 | |
| tre_ast_node_t *
 | |
| tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
 | |
|                  int minimal)
 | |
| {
 | |
| 	tre_ast_node_t *node;
 | |
| 	tre_iteration_t *iter;
 | |
| 
 | |
| 	node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t));
 | |
| 	if (!node) return NULL;
 | |
| 	iter = node->obj;
 | |
| 	iter->arg = arg;
 | |
| 	iter->min = min;
 | |
| 	iter->max = max;
 | |
| 	iter->minimal = minimal;
 | |
| 	node->num_submatches = arg->num_submatches;
 | |
| 
 | |
| 	return node;
 | |
| }
 | |
| 
 | |
| tre_ast_node_t *
 | |
| tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
 | |
| {
 | |
| 	tre_ast_node_t *node;
 | |
| 
 | |
| 	node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t));
 | |
| 	if (node == NULL) return NULL;
 | |
| 	((tre_union_t *)node->obj)->left = left;
 | |
| 	((tre_union_t *)node->obj)->right = right;
 | |
| 	node->num_submatches = left->num_submatches + right->num_submatches;
 | |
| 
 | |
| 	return node;
 | |
| }
 | |
| 
 | |
| tre_ast_node_t *
 | |
| tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
 | |
|                        tre_ast_node_t *right)
 | |
| {
 | |
| 	tre_ast_node_t *node;
 | |
| 
 | |
| 	node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t));
 | |
| 	if (node == NULL) return NULL;
 | |
| 	((tre_catenation_t *)node->obj)->left = left;
 | |
| 	((tre_catenation_t *)node->obj)->right = right;
 | |
| 	node->num_submatches = left->num_submatches + right->num_submatches;
 | |
| 
 | |
| 	return node;
 | |
| }
 | |
| 
 | |
| #ifdef TRE_DEBUG
 | |
| 
 | |
| static void
 | |
| tre_findent(FILE *stream, int i)
 | |
| {
 | |
| 	while (i-- > 0)
 | |
| 		fputc(' ', stream);
 | |
| }
 | |
| 
 | |
| void
 | |
| tre_print_params(int *params)
 | |
| {
 | |
| 	int i;
 | |
| 	if (params)
 | |
| 	{
 | |
| 		DPRINT(("params ["));
 | |
| 		for (i = 0; i < TRE_PARAM_LAST; i++)
 | |
| 		{
 | |
| 			if (params[i] == TRE_PARAM_UNSET)
 | |
| 				DPRINT(("unset"));
 | |
| 			else if (params[i] == TRE_PARAM_DEFAULT)
 | |
| 				DPRINT(("default"));
 | |
| 			else
 | |
| 				DPRINT(("%d", params[i]));
 | |
| 			if (i < TRE_PARAM_LAST - 1)
 | |
| 				DPRINT((", "));
 | |
| 		}
 | |
| 		DPRINT(("]"));
 | |
| 	}
 | |
| }
 | |
| 
 | |
| static void
 | |
| tre_do_print(FILE *stream, tre_ast_node_t *ast, int indent)
 | |
| {
 | |
| 	int code_min, code_max, pos;
 | |
| 	int num_tags = ast->num_tags;
 | |
| 	tre_literal_t *lit;
 | |
| 	tre_iteration_t *iter;
 | |
| 
 | |
| 	tre_findent(stream, indent);
 | |
| 	switch (ast->type)
 | |
| 	{
 | |
| 	case LITERAL:
 | |
| 		lit = ast->obj;
 | |
| 		code_min = lit->code_min;
 | |
| 		code_max = lit->code_max;
 | |
| 		pos = lit->position;
 | |
| 		if (IS_EMPTY(lit))
 | |
| 		{
 | |
| 			fprintf(stream, "literal empty\n");
 | |
| 		}
 | |
| 		else if (IS_ASSERTION(lit))
 | |
| 		{
 | |
| 			int i;
 | |
| 			char *assertions[] = { "bol", "eol", "ctype", "!ctype",
 | |
| 			                       "bow", "eow", "wb", "!wb"
 | |
| 			                     };
 | |
| 			if (code_max >= ASSERT_LAST << 1)
 | |
| 				assert(0);
 | |
| 			fprintf(stream, "assertions: ");
 | |
| 			for (i = 0; (1 << i) <= ASSERT_LAST; i++)
 | |
| 				if (code_max & (1 << i))
 | |
| 					fprintf(stream, "%s ", assertions[i]);
 | |
| 			fprintf(stream, "\n");
 | |
| 		}
 | |
| 		else if (IS_TAG(lit))
 | |
| 		{
 | |
| 			fprintf(stream, "tag %d\n", code_max);
 | |
| 		}
 | |
| 		else if (IS_BACKREF(lit))
 | |
| 		{
 | |
| 			fprintf(stream, "backref %d, pos %d\n", code_max, pos);
 | |
| 		}
 | |
| 		else if (IS_PARAMETER(lit))
 | |
| 		{
 | |
| 			tre_print_params(lit->u.params);
 | |
| 			fprintf(stream, "\n");
 | |
| 		}
 | |
| 		else
 | |
| 		{
 | |
| 			fprintf(stream, "literal (%c, %c) (%d, %d), pos %d, sub %d, "
 | |
| 			        "%d tags\n", (int)code_min, (int)code_max, (int)code_min, (int)code_max, pos,
 | |
| 			        ast->submatch_id, num_tags);
 | |
| 		}
 | |
| 		break;
 | |
| 	case ITERATION:
 | |
| 		iter = ast->obj;
 | |
| 		fprintf(stream, "iteration {%d, %d}, sub %d, %d tags, %s\n",
 | |
| 		        iter->min, iter->max, ast->submatch_id, num_tags,
 | |
| 		        iter->minimal ? "minimal" : "greedy");
 | |
| 		tre_do_print(stream, iter->arg, indent + 2);
 | |
| 		break;
 | |
| 	case UNION:
 | |
| 		fprintf(stream, "union, sub %d, %d tags\n", ast->submatch_id, num_tags);
 | |
| 		tre_do_print(stream, ((tre_union_t *)ast->obj)->left, indent + 2);
 | |
| 		tre_do_print(stream, ((tre_union_t *)ast->obj)->right, indent + 2);
 | |
| 		break;
 | |
| 	case CATENATION:
 | |
| 		fprintf(stream, "catenation, sub %d, %d tags\n", ast->submatch_id,
 | |
| 		        num_tags);
 | |
| 		tre_do_print(stream, ((tre_catenation_t *)ast->obj)->left, indent + 2);
 | |
| 		tre_do_print(stream, ((tre_catenation_t *)ast->obj)->right, indent + 2);
 | |
| 		break;
 | |
| 	default:
 | |
| 		assert(0);
 | |
| 		break;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| static void
 | |
| tre_ast_fprint(FILE *stream, tre_ast_node_t *ast)
 | |
| {
 | |
| 	tre_do_print(stream, ast, 0);
 | |
| }
 | |
| 
 | |
| void
 | |
| tre_ast_print(tre_ast_node_t *tree)
 | |
| {
 | |
| 	printf("AST:\n");
 | |
| 	tre_ast_fprint(stdout, tree);
 | |
| }
 | |
| 
 | |
| #endif /* TRE_DEBUG */
 | |
| 
 | |
| /* EOF */
 |