diff --git a/qse/cmd/sed/sed.c b/qse/cmd/sed/sed.c index 28249951..3135c750 100644 --- a/qse/cmd/sed/sed.c +++ b/qse/cmd/sed/sed.c @@ -115,6 +115,7 @@ static qse_ssize_t out ( return 1; case QSE_SED_IO_CLOSE: + qse_sio_flush (arg->handle); if (arg->handle != qse_sio_out) qse_sio_close (arg->handle); return 0; @@ -136,11 +137,12 @@ static void print_usage (QSE_FILE* out, int argc, qse_char_t* argv[]) qse_fprintf (out, QSE_T("options as follows:\n")); qse_fprintf (out, QSE_T(" -h show this message\n")); qse_fprintf (out, QSE_T(" -n disable auto-print\n")); - qse_fprintf (out, QSE_T(" -a perform strict address check\n")); - qse_fprintf (out, QSE_T(" -r use the extended regular expression\n")); - qse_fprintf (out, QSE_T(" -s allow text on the same line as c, a, i\n")); - qse_fprintf (out, QSE_T(" -l ensure a newline at text end\n")); qse_fprintf (out, QSE_T(" -f file specify a script file\n")); + qse_fprintf (out, QSE_T(" -r use the extended regular expression\n")); + qse_fprintf (out, QSE_T(" -a perform strict address check\n")); + qse_fprintf (out, QSE_T(" -w allow address format of start~step\n")); + qse_fprintf (out, QSE_T(" -x allow text on the same line as c, a, i\n")); + qse_fprintf (out, QSE_T(" -y ensure a newline at text end\n")); qse_fprintf (out, QSE_T(" -m number specify the maximum amount of memory to use in bytes\n")); } @@ -148,7 +150,7 @@ static int handle_args (int argc, qse_char_t* argv[]) { static qse_opt_t opt = { - QSE_T("hnarslf:m:"), + QSE_T("hnf:rawxym:"), QSE_NULL }; qse_cint_t c; @@ -185,26 +187,30 @@ static int handle_args (int argc, qse_char_t* argv[]) g_option |= QSE_SED_QUIET; break; - case QSE_T('a'): - g_option |= QSE_SED_STRICT; + case QSE_T('f'): + g_script_file = opt.arg; break; case QSE_T('r'): g_option |= QSE_SED_EXTENDEDREX; break; - case QSE_T('s'): + case QSE_T('a'): + g_option |= QSE_SED_STRICT; + break; + + case QSE_T('w'): + g_option |= QSE_SED_STARTSTEP; + break; + + case QSE_T('x'): g_option |= QSE_SED_SAMELINE; break; - case QSE_T('l'): + case QSE_T('y'): g_option |= QSE_SED_ENSURENL; break; - case QSE_T('f'): - g_script_file = opt.arg; - break; - case QSE_T('m'): g_memlimit = qse_strtoulong (opt.arg); break; diff --git a/qse/lib/cmn/tio-put.c b/qse/lib/cmn/tio-put.c index c8cf98a8..65cdf8c3 100644 --- a/qse/lib/cmn/tio-put.c +++ b/qse/lib/cmn/tio-put.c @@ -1,5 +1,5 @@ /* - * $Id: tio-put.c 556 2011-08-31 15:43:46Z hyunghwan.chung $ + * $Id: tio-put.c 559 2011-09-04 16:21:54Z hyunghwan.chung $ * Copyright 2006-2011 Chung, Hyung-Hwan. This file is part of QSE. @@ -21,7 +21,7 @@ #include #include -static qse_ssize_t tio_putc (qse_tio_t* tio, qse_char_t c) +static qse_ssize_t tio_putc (qse_tio_t* tio, qse_char_t c, int* flush_needed) { #ifdef QSE_CHAR_IS_WCHAR qse_size_t n, i; @@ -41,7 +41,10 @@ static qse_ssize_t tio_putc (qse_tio_t* tio, qse_char_t c) tio->outbuf[tio->outbuf_len++] = c; if (tio->outbuf_len >= QSE_COUNTOF(tio->outbuf)) + { + *flush_needed = 0; return qse_tio_flush (tio); + } #else /* QSE_CHAR_IS_WCHAR */ @@ -62,7 +65,8 @@ static qse_ssize_t tio_putc (qse_tio_t* tio, qse_char_t c) tio->outbuf[tio->outbuf_len++] = mc[i]; if (tio->outbuf_len >= QSE_COUNTOF(tio->outbuf)) { - if (qse_tio_flush (tio) == -1) return -1; + *flush_needed = 0; + if (qse_tio_flush (tio) <= -1) return -1; } } @@ -70,7 +74,8 @@ static qse_ssize_t tio_putc (qse_tio_t* tio, qse_char_t c) if (c == QSE_T('\n') && tio->outbuf_len > 0) { - if (qse_tio_flush (tio) == -1) return -1; + /*if (qse_tio_flush (tio) <= -1) return -1;*/ + *flush_needed = 1; } return 1; @@ -80,6 +85,7 @@ qse_ssize_t qse_tio_write (qse_tio_t* tio, const qse_char_t* str, qse_size_t siz { qse_ssize_t n; const qse_char_t* p; + int flush_needed = 0; if (size == 0) return 0; @@ -89,8 +95,8 @@ qse_ssize_t qse_tio_write (qse_tio_t* tio, const qse_char_t* str, qse_size_t siz { while (*p != QSE_T('\0')) { - n = tio_putc (tio, *p); - if (n == -1) return -1; + n = tio_putc (tio, *p, &flush_needed); + if (n <= -1) return -1; if (n == 0) break; p++; } @@ -100,13 +106,14 @@ qse_ssize_t qse_tio_write (qse_tio_t* tio, const qse_char_t* str, qse_size_t siz const qse_char_t* end = str + size; while (p < end) { - n = tio_putc (tio, *p); - if (n == -1) return -1; + n = tio_putc (tio, *p, &flush_needed); + if (n <= -1) return -1; if (n == 0) break; p++; } } + if (flush_needed && qse_tio_flush(tio) <= -1) return -1; return p - str; } diff --git a/qse/lib/cmn/tio.c b/qse/lib/cmn/tio.c index 14883c75..aeabc596 100644 --- a/qse/lib/cmn/tio.c +++ b/qse/lib/cmn/tio.c @@ -1,5 +1,5 @@ /* - * $Id: tio.c 556 2011-08-31 15:43:46Z hyunghwan.chung $ + * $Id: tio.c 559 2011-09-04 16:21:54Z hyunghwan.chung $ * Copyright 2006-2011 Chung, Hyung-Hwan. This file is part of QSE. @@ -174,7 +174,7 @@ int qse_tio_attachout (qse_tio_t* tio, qse_tio_io_t output, void* arg) tio->output_func = output; tio->output_arg = arg; - tio->outbuf_len = 0; + tio->outbuf_len = 0; return 0; } @@ -236,9 +236,9 @@ qse_ssize_t qse_tio_flush (qse_tio_t* tio) void qse_tio_purge (qse_tio_t* tio) { - tio->input_status = 0; - tio->inbuf_curp = 0; - tio->inbuf_len = 0; - tio->outbuf_len = 0; + tio->input_status = 0; + tio->inbuf_curp = 0; + tio->inbuf_len = 0; + tio->outbuf_len = 0; tio->errnum = QSE_TIO_ENOERR; } diff --git a/qse/lib/cmn/tre-compile.c b/qse/lib/cmn/tre-compile.c index 1bfa9638..264a4766 100644 --- a/qse/lib/cmn/tre-compile.c +++ b/qse/lib/cmn/tre-compile.c @@ -252,406 +252,410 @@ tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree, switch (symbol) { - case ADDTAGS_SET_SUBMATCH_END: - { - int id = tre_stack_pop_int(stack); - int i; - - /* Add end of this submatch to regset. */ - for (i = 0; regset[i] >= 0; i++); - regset[i] = id * 2 + 1; - regset[i + 1] = -1; - - /* Pop this submatch from the parents stack. */ - for (i = 0; parents[i] >= 0; i++); - parents[i - 1] = -1; - break; - } - - case ADDTAGS_RECURSE: - node = tre_stack_pop_voidptr(stack); - - if (node->submatch_id >= 0) + case ADDTAGS_SET_SUBMATCH_END: { - int id = node->submatch_id; + int id = tre_stack_pop_int(stack); int i; - - /* Add start of this submatch to regset. */ + /* Add end of this submatch to regset. */ for (i = 0; regset[i] >= 0; i++); - regset[i] = id * 2; + regset[i] = id * 2 + 1; regset[i + 1] = -1; - if (!first_pass) + /* Pop this submatch from the parents stack. */ + for (i = 0; parents[i] >= 0; i++); + parents[i - 1] = -1; + break; + } + + case ADDTAGS_RECURSE: + node = tre_stack_pop_voidptr(stack); + + if (node->submatch_id >= 0) { - for (i = 0; parents[i] >= 0; i++); - tnfa->submatch_data[id].parents = NULL; - if (i > 0) + int id = node->submatch_id; + int i; + + + /* Add start of this submatch to regset. */ + for (i = 0; regset[i] >= 0; i++); + regset[i] = id * 2; + regset[i + 1] = -1; + + if (!first_pass) { - int *p = xmalloc(mem->mmgr, sizeof(*p) * (i + 1)); - if (p == NULL) + for (i = 0; parents[i] >= 0; i++); + tnfa->submatch_data[id].parents = NULL; + if (i > 0) { - status = REG_ESPACE; - break; + int *p = xmalloc(mem->mmgr, sizeof(*p) * (i + 1)); + if (p == NULL) + { + status = REG_ESPACE; + break; + } + assert(tnfa->submatch_data[id].parents == NULL); + tnfa->submatch_data[id].parents = p; + for (i = 0; parents[i] >= 0; i++) + p[i] = parents[i]; + p[i] = -1; } - assert(tnfa->submatch_data[id].parents == NULL); - tnfa->submatch_data[id].parents = p; - for (i = 0; parents[i] >= 0; i++) - p[i] = parents[i]; - p[i] = -1; + } + + /* Add end of this submatch to regset after processing this + node. */ + STACK_PUSHX(stack, int, node->submatch_id); + STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END); + } + + switch (node->type) + { + case LITERAL: + { + tre_literal_t *lit = node->obj; + + if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) + { + int i; + DPRINT(("Literal %d-%d\n", + (int)lit->code_min, (int)lit->code_max)); + if (regset[0] >= 0) + { + /* Regset is not empty, so add a tag before the + literal or backref. */ + if (!first_pass) + { + status = tre_add_tag_left(mem, node, tag); + tnfa->tag_directions[tag] = direction; + if (minimal_tag >= 0) + { + DPRINT(("Minimal %d, %d\n", minimal_tag, tag)); + for (i = 0; tnfa->minimal_tags[i] >= 0; i++); + tnfa->minimal_tags[i] = tag; + tnfa->minimal_tags[i + 1] = minimal_tag; + tnfa->minimal_tags[i + 2] = -1; + minimal_tag = -1; + num_minimals++; + } + tre_purge_regset(regset, tnfa, tag); + } + else + { + DPRINT((" num_tags = 1\n")); + node->num_tags = 1; + } + + DPRINT((" num_tags++\n")); + regset[0] = -1; + tag = next_tag; + num_tags++; + next_tag++; + } + } + else + { + assert(!IS_TAG(lit)); + } + break; + } + + + case CATENATION: + { + tre_catenation_t *cat = node->obj; + tre_ast_node_t *left = cat->left; + tre_ast_node_t *right = cat->right; + int reserved_tag = -1; + DPRINT(("Catenation, next_tag = %d\n", next_tag)); + + + /* After processing right child. */ + STACK_PUSHX(stack, voidptr, node); + STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT); + + /* Process right child. */ + STACK_PUSHX(stack, voidptr, right); + STACK_PUSHX(stack, int, ADDTAGS_RECURSE); + + /* After processing left child. */ + STACK_PUSHX(stack, int, next_tag + left->num_tags); + DPRINT((" Pushing %d for after left\n", + next_tag + left->num_tags)); + if (left->num_tags > 0 && right->num_tags > 0) + { + /* Reserve the next tag to the right child. */ + DPRINT((" Reserving next_tag %d to right child\n", + next_tag)); + reserved_tag = next_tag; + next_tag++; + } + STACK_PUSHX(stack, int, reserved_tag); + STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT); + + /* Process left child. */ + STACK_PUSHX(stack, voidptr, left); + STACK_PUSHX(stack, int, ADDTAGS_RECURSE); + break; + } + + + case ITERATION: + { + tre_iteration_t *iter = node->obj; + DPRINT(("Iteration\n")); + + if (first_pass) + { + STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal); + } + else + { + STACK_PUSHX(stack, int, tag); + STACK_PUSHX(stack, int, iter->minimal); + } + STACK_PUSHX(stack, voidptr, node); + STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION); + + STACK_PUSHX(stack, voidptr, iter->arg); + STACK_PUSHX(stack, int, ADDTAGS_RECURSE); + + /* Regset is not empty, so add a tag here. */ + if (regset[0] >= 0 || iter->minimal) + { + if (!first_pass) + { + int i; + status = tre_add_tag_left(mem, node, tag); + if (iter->minimal) + tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE; + else + tnfa->tag_directions[tag] = direction; + if (minimal_tag >= 0) + { + DPRINT(("Minimal %d, %d\n", minimal_tag, tag)); + for (i = 0; tnfa->minimal_tags[i] >= 0; i++); + tnfa->minimal_tags[i] = tag; + tnfa->minimal_tags[i + 1] = minimal_tag; + tnfa->minimal_tags[i + 2] = -1; + minimal_tag = -1; + num_minimals++; + } + tre_purge_regset(regset, tnfa, tag); + } + + DPRINT((" num_tags++\n")); + regset[0] = -1; + tag = next_tag; + num_tags++; + next_tag++; + } + direction = TRE_TAG_MINIMIZE; + break; + } + + case UNION: + { + tre_union_t *uni = node->obj; + tre_ast_node_t *left = uni->left; + tre_ast_node_t *right = uni->right; + int left_tag; + int right_tag; + + if (regset[0] >= 0) + { + left_tag = next_tag; + right_tag = next_tag + 1; + } + else + { + left_tag = tag; + right_tag = next_tag; + } + + DPRINT(("Union\n")); + + /* After processing right child. */ + STACK_PUSHX(stack, int, right_tag); + STACK_PUSHX(stack, int, left_tag); + STACK_PUSHX(stack, voidptr, regset); + STACK_PUSHX(stack, int, regset[0] >= 0); + STACK_PUSHX(stack, voidptr, node); + STACK_PUSHX(stack, voidptr, right); + STACK_PUSHX(stack, voidptr, left); + STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT); + + /* Process right child. */ + STACK_PUSHX(stack, voidptr, right); + STACK_PUSHX(stack, int, ADDTAGS_RECURSE); + + /* After processing left child. */ + STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT); + + /* Process left child. */ + STACK_PUSHX(stack, voidptr, left); + STACK_PUSHX(stack, int, ADDTAGS_RECURSE); + + /* Regset is not empty, so add a tag here. */ + if (regset[0] >= 0) + { + if (!first_pass) + { + int i; + status = tre_add_tag_left(mem, node, tag); + tnfa->tag_directions[tag] = direction; + if (minimal_tag >= 0) + { + DPRINT(("Minimal %d, %d\n", minimal_tag, tag)); + for (i = 0; tnfa->minimal_tags[i] >= 0; i++); + tnfa->minimal_tags[i] = tag; + tnfa->minimal_tags[i + 1] = minimal_tag; + tnfa->minimal_tags[i + 2] = -1; + minimal_tag = -1; + num_minimals++; + } + tre_purge_regset(regset, tnfa, tag); + } + + DPRINT((" num_tags++\n")); + regset[0] = -1; + tag = next_tag; + num_tags++; + next_tag++; + } + + if (node->num_submatches > 0) + { + /* The next two tags are reserved for markers. */ + next_tag++; + tag = next_tag; + next_tag++; + } + + break; } } - /* Add end of this submatch to regset after processing this - node. */ - STACK_PUSHX(stack, int, node->submatch_id); - STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END); - } - - switch (node->type) - { - case LITERAL: - { - tre_literal_t *lit = node->obj; - - if (!IS_SPECIAL(lit) || IS_BACKREF(lit)) + if (node->submatch_id >= 0) { int i; - DPRINT(("Literal %d-%d\n", - (int)lit->code_min, (int)lit->code_max)); - if (regset[0] >= 0) - { - /* Regset is not empty, so add a tag before the - literal or backref. */ - if (!first_pass) - { - status = tre_add_tag_left(mem, node, tag); - tnfa->tag_directions[tag] = direction; - if (minimal_tag >= 0) - { - DPRINT(("Minimal %d, %d\n", minimal_tag, tag)); - for (i = 0; tnfa->minimal_tags[i] >= 0; i++); - tnfa->minimal_tags[i] = tag; - tnfa->minimal_tags[i + 1] = minimal_tag; - tnfa->minimal_tags[i + 2] = -1; - minimal_tag = -1; - num_minimals++; - } - tre_purge_regset(regset, tnfa, tag); - } - else - { - DPRINT((" num_tags = 1\n")); - node->num_tags = 1; - } + /* Push this submatch on the parents stack. */ + for (i = 0; parents[i] >= 0; i++); + parents[i] = node->submatch_id; + parents[i + 1] = -1; + } - DPRINT((" num_tags++\n")); - regset[0] = -1; - tag = next_tag; - num_tags++; - next_tag++; - } - } - else - { - assert(!IS_TAG(lit)); - } - break; - } - case CATENATION: + break; /* end case: ADDTAGS_RECURSE */ + + case ADDTAGS_AFTER_ITERATION: { - tre_catenation_t *cat = node->obj; - tre_ast_node_t *left = cat->left; - tre_ast_node_t *right = cat->right; - int reserved_tag = -1; - DPRINT(("Catenation, next_tag = %d\n", next_tag)); - - - /* After processing right child. */ - STACK_PUSHX(stack, voidptr, node); - STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT); - - /* Process right child. */ - STACK_PUSHX(stack, voidptr, right); - STACK_PUSHX(stack, int, ADDTAGS_RECURSE); - - /* After processing left child. */ - STACK_PUSHX(stack, int, next_tag + left->num_tags); - DPRINT((" Pushing %d for after left\n", - next_tag + left->num_tags)); - if (left->num_tags > 0 && right->num_tags > 0) - { - /* Reserve the next tag to the right child. */ - DPRINT((" Reserving next_tag %d to right child\n", - next_tag)); - reserved_tag = next_tag; - next_tag++; - } - STACK_PUSHX(stack, int, reserved_tag); - STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT); - - /* Process left child. */ - STACK_PUSHX(stack, voidptr, left); - STACK_PUSHX(stack, int, ADDTAGS_RECURSE); - - } - break; - case ITERATION: - { - tre_iteration_t *iter = node->obj; - DPRINT(("Iteration\n")); - + int minimal = 0; + int enter_tag; + node = tre_stack_pop_voidptr(stack); if (first_pass) { - STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal); + node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags + + tre_stack_pop_int(stack); + minimal_tag = -1; } else { - STACK_PUSHX(stack, int, tag); - STACK_PUSHX(stack, int, iter->minimal); + minimal = tre_stack_pop_int(stack); + enter_tag = tre_stack_pop_int(stack); + if (minimal) + minimal_tag = enter_tag; } - STACK_PUSHX(stack, voidptr, node); - STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION); - - STACK_PUSHX(stack, voidptr, iter->arg); - STACK_PUSHX(stack, int, ADDTAGS_RECURSE); - - /* Regset is not empty, so add a tag here. */ - if (regset[0] >= 0 || iter->minimal) - { - if (!first_pass) - { - int i; - status = tre_add_tag_left(mem, node, tag); - if (iter->minimal) - tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE; - else - tnfa->tag_directions[tag] = direction; - if (minimal_tag >= 0) - { - DPRINT(("Minimal %d, %d\n", minimal_tag, tag)); - for (i = 0; tnfa->minimal_tags[i] >= 0; i++); - tnfa->minimal_tags[i] = tag; - tnfa->minimal_tags[i + 1] = minimal_tag; - tnfa->minimal_tags[i + 2] = -1; - minimal_tag = -1; - num_minimals++; - } - tre_purge_regset(regset, tnfa, tag); - } - - DPRINT((" num_tags++\n")); - regset[0] = -1; - tag = next_tag; - num_tags++; - next_tag++; - } - direction = TRE_TAG_MINIMIZE; - } - break; - case UNION: - { - tre_union_t *uni = node->obj; - tre_ast_node_t *left = uni->left; - tre_ast_node_t *right = uni->right; - int left_tag; - int right_tag; - - if (regset[0] >= 0) - { - left_tag = next_tag; - right_tag = next_tag + 1; - } - else - { - left_tag = tag; - right_tag = next_tag; - } - - DPRINT(("Union\n")); - - /* After processing right child. */ - STACK_PUSHX(stack, int, right_tag); - STACK_PUSHX(stack, int, left_tag); - STACK_PUSHX(stack, voidptr, regset); - STACK_PUSHX(stack, int, regset[0] >= 0); - STACK_PUSHX(stack, voidptr, node); - STACK_PUSHX(stack, voidptr, right); - STACK_PUSHX(stack, voidptr, left); - STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT); - - /* Process right child. */ - STACK_PUSHX(stack, voidptr, right); - STACK_PUSHX(stack, int, ADDTAGS_RECURSE); - - /* After processing left child. */ - STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT); - - /* Process left child. */ - STACK_PUSHX(stack, voidptr, left); - STACK_PUSHX(stack, int, ADDTAGS_RECURSE); - - /* Regset is not empty, so add a tag here. */ - if (regset[0] >= 0) - { - if (!first_pass) - { - int i; - status = tre_add_tag_left(mem, node, tag); - tnfa->tag_directions[tag] = direction; - if (minimal_tag >= 0) - { - DPRINT(("Minimal %d, %d\n", minimal_tag, tag)); - for (i = 0; tnfa->minimal_tags[i] >= 0; i++); - tnfa->minimal_tags[i] = tag; - tnfa->minimal_tags[i + 1] = minimal_tag; - tnfa->minimal_tags[i + 2] = -1; - minimal_tag = -1; - num_minimals++; - } - tre_purge_regset(regset, tnfa, tag); - } - - DPRINT((" num_tags++\n")); - regset[0] = -1; - tag = next_tag; - num_tags++; - next_tag++; - } - - if (node->num_submatches > 0) - { - /* The next two tags are reserved for markers. */ - next_tag++; - tag = next_tag; - next_tag++; - } - - break; - } - } - - if (node->submatch_id >= 0) - { - int i; - /* Push this submatch on the parents stack. */ - for (i = 0; parents[i] >= 0; i++); - parents[i] = node->submatch_id; - parents[i + 1] = -1; - } - - break; /* end case: ADDTAGS_RECURSE */ - - case ADDTAGS_AFTER_ITERATION: - { - int minimal = 0; - int enter_tag; - node = tre_stack_pop_voidptr(stack); - if (first_pass) - { - node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags - + tre_stack_pop_int(stack); - minimal_tag = -1; - } - else - { - minimal = tre_stack_pop_int(stack); - enter_tag = tre_stack_pop_int(stack); - if (minimal) - minimal_tag = enter_tag; - } - - DPRINT(("After iteration\n")); - if (!first_pass) - { - DPRINT((" Setting direction to %s\n", - minimal ? "minimize" : "maximize")); - if (minimal) - direction = TRE_TAG_MINIMIZE; - else - direction = TRE_TAG_MAXIMIZE; - } - break; - } - - case ADDTAGS_AFTER_CAT_LEFT: - { - int new_tag = tre_stack_pop_int(stack); - next_tag = tre_stack_pop_int(stack); - DPRINT(("After cat left, tag = %d, next_tag = %d\n", - tag, next_tag)); - if (new_tag >= 0) - { - DPRINT((" Setting tag to %d\n", new_tag)); - tag = new_tag; - } - break; - } - - case ADDTAGS_AFTER_CAT_RIGHT: - DPRINT(("After cat right\n")); - node = tre_stack_pop_voidptr(stack); - if (first_pass) - node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags - + ((tre_catenation_t *)node->obj)->right->num_tags; - break; - - case ADDTAGS_AFTER_UNION_LEFT: - DPRINT(("After union left\n")); - /* Lift the bottom of the `regset' array so that when processing - the right operand the items currently in the array are - invisible. The original bottom was saved at ADDTAGS_UNION and - will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */ - while (*regset >= 0) - regset++; - break; - - case ADDTAGS_AFTER_UNION_RIGHT: - { - int added_tags, tag_left, tag_right; - tre_ast_node_t *left = tre_stack_pop_voidptr(stack); - tre_ast_node_t *right = tre_stack_pop_voidptr(stack); - DPRINT(("After union right\n")); - node = tre_stack_pop_voidptr(stack); - added_tags = tre_stack_pop_int(stack); - if (first_pass) - { - node->num_tags = ((tre_union_t *)node->obj)->left->num_tags - + ((tre_union_t *)node->obj)->right->num_tags + added_tags - + ((node->num_submatches > 0) ? 2 : 0); - } - regset = tre_stack_pop_voidptr(stack); - tag_left = tre_stack_pop_int(stack); - tag_right = tre_stack_pop_int(stack); - - /* Add tags after both children, the left child gets a smaller - tag than the right child. This guarantees that we prefer - the left child over the right child. */ - /* XXX - This is not always necessary (if the children have - tags which must be seen for every match of that child). */ - /* XXX - Check if this is the only place where tre_add_tag_right - is used. If so, use tre_add_tag_left (putting the tag before - the child as opposed after the child) and throw away - tre_add_tag_right. */ - if (node->num_submatches > 0) - { + + DPRINT(("After iteration\n")); if (!first_pass) { - status = tre_add_tag_right(mem, left, tag_left); - tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE; - status = tre_add_tag_right(mem, right, tag_right); - tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE; + DPRINT((" Setting direction to %s\n", + minimal ? "minimize" : "maximize")); + if (minimal) + direction = TRE_TAG_MINIMIZE; + else + direction = TRE_TAG_MAXIMIZE; } - DPRINT((" num_tags += 2\n")); - num_tags += 2; + break; } - direction = TRE_TAG_MAXIMIZE; - break; - } - - default: - assert(0); - break; + + case ADDTAGS_AFTER_CAT_LEFT: + { + int new_tag = tre_stack_pop_int(stack); + next_tag = tre_stack_pop_int(stack); + DPRINT(("After cat left, tag = %d, next_tag = %d\n", + tag, next_tag)); + if (new_tag >= 0) + { + DPRINT((" Setting tag to %d\n", new_tag)); + tag = new_tag; + } + break; + } + + case ADDTAGS_AFTER_CAT_RIGHT: + DPRINT(("After cat right\n")); + node = tre_stack_pop_voidptr(stack); + if (first_pass) + node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags + + ((tre_catenation_t *)node->obj)->right->num_tags; + break; + + case ADDTAGS_AFTER_UNION_LEFT: + DPRINT(("After union left\n")); + /* Lift the bottom of the `regset' array so that when processing + the right operand the items currently in the array are + invisible. The original bottom was saved at ADDTAGS_UNION and + will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */ + while (*regset >= 0) + regset++; + break; + + case ADDTAGS_AFTER_UNION_RIGHT: + { + int added_tags, tag_left, tag_right; + tre_ast_node_t *left = tre_stack_pop_voidptr(stack); + tre_ast_node_t *right = tre_stack_pop_voidptr(stack); + DPRINT(("After union right\n")); + node = tre_stack_pop_voidptr(stack); + added_tags = tre_stack_pop_int(stack); + if (first_pass) + { + node->num_tags = ((tre_union_t *)node->obj)->left->num_tags + + ((tre_union_t *)node->obj)->right->num_tags + added_tags + + ((node->num_submatches > 0) ? 2 : 0); + } + regset = tre_stack_pop_voidptr(stack); + tag_left = tre_stack_pop_int(stack); + tag_right = tre_stack_pop_int(stack); + + /* Add tags after both children, the left child gets a smaller + tag than the right child. This guarantees that we prefer + the left child over the right child. */ + /* XXX - This is not always necessary (if the children have + tags which must be seen for every match of that child). */ + /* XXX - Check if this is the only place where tre_add_tag_right + is used. If so, use tre_add_tag_left (putting the tag before + the child as opposed after the child) and throw away + tre_add_tag_right. */ + if (node->num_submatches > 0) + { + if (!first_pass) + { + status = tre_add_tag_right(mem, left, tag_left); + tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE; + status = tre_add_tag_right(mem, right, tag_right); + tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE; + } + DPRINT((" num_tags += 2\n")); + num_tags += 2; + } + direction = TRE_TAG_MAXIMIZE; + break; + } + + default: + assert(0); + break; } /* end switch(symbol) */ } /* end while(tre_stack_num_objects(stack) > bottom) */ @@ -1918,7 +1922,9 @@ int tre_compile (regex_t *preg, const tre_char_t *regex, size_t n, int cflags) /* Allocate a stack used throughout the compilation process for various purposes. */ - stack = tre_stack_new(preg->mmgr, 512, 10240, 128); +/* QSE: deleted limit on the stack size + stack = tre_stack_new(preg->mmgr, 512, 10240, 128); */ + stack = tre_stack_new(preg->mmgr, 512, -1, 128); if (!stack) return REG_ESPACE; /* Allocate a fast memory allocator. */ diff --git a/qse/lib/cmn/tre-parse.c b/qse/lib/cmn/tre-parse.c index d46bb72c..30a95fb5 100644 --- a/qse/lib/cmn/tre-parse.c +++ b/qse/lib/cmn/tre-parse.c @@ -218,98 +218,6 @@ tre_compare_items(const void *a, const void *b, void* ctx) return 0; } -#if 0 -#ifndef TRE_USE_SYSTEM_WCTYPE -/* isalnum() and the rest may be macros, so wrap them to functions. */ -int tre_isalnum_func(tre_cint_t c) -{ - return tre_isalnum(c); -} -int tre_isalpha_func(tre_cint_t c) -{ - return tre_isalpha(c); -} -int tre_isascii_func(tre_cint_t c) -{ - return !(c >> 7); -} -int tre_isblank_func(tre_cint_t c) -{ - return tre_isblank(c); -} -int tre_iscntrl_func(tre_cint_t c) -{ - return tre_iscntrl(c); -} -int tre_isdigit_func(tre_cint_t c) -{ - return tre_isdigit(c); -} -int tre_isgraph_func(tre_cint_t c) -{ - return tre_isgraph(c); -} -int tre_islower_func(tre_cint_t c) -{ - return tre_islower(c); -} -int tre_isprint_func(tre_cint_t c) -{ - return tre_isprint(c); -} -int tre_ispunct_func(tre_cint_t c) -{ - return tre_ispunct(c); -} -int tre_isspace_func(tre_cint_t c) -{ - return tre_isspace(c); -} -int tre_isupper_func(tre_cint_t c) -{ - return tre_isupper(c); -} -int tre_isxdigit_func(tre_cint_t c) -{ - return tre_isxdigit(c); -} - -struct -{ - char *name; - int (*func)(tre_cint_t); -} tre_ctype_map[] = -{ - { "alnum", &tre_isalnum_func }, - { "alpha", &tre_isalpha_func }, - { "ascii", &tre_isascii_func }, - { "blank", &tre_isblank_func }, - { "cntrl", &tre_iscntrl_func }, - { "digit", &tre_isdigit_func }, - { "graph", &tre_isgraph_func }, - { "lower", &tre_islower_func }, - { "print", &tre_isprint_func }, - { "punct", &tre_ispunct_func }, - { "space", &tre_isspace_func }, - { "upper", &tre_isupper_func }, - { "xdigit", &tre_isxdigit_func }, - { NULL, NULL} -}; - -tre_ctype_t tre_ctype(const char *name) -{ - int i; - for (i = 0; tre_ctype_map[i].name != NULL; i++) - { - if (qse_mbscmp(name, tre_ctype_map[i].name) == 0) - return tre_ctype_map[i].func; - } - return (tre_ctype_t)0; -} - -#endif /* !TRE_USE_SYSTEM_WCTYPE */ -#endif - /* Maximum number of character classes that can occur in a negated bracket expression. */ #define MAX_NEG_CLASSES 64 @@ -882,11 +790,15 @@ tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result) minimal = !(ctx->cflags & REG_UNGREEDY); r++; } +/* QSE - commented out for minimal impact on backward compatibility. + * X{x,y}* X{x,y}+ */ +#if 0 else if (*r == CHAR_STAR || *r == CHAR_PLUS) { /* These are reserved for future extensions. */ return REG_BADRPT; } +#endif } /* Create the AST node(s). */ @@ -1196,12 +1108,18 @@ tre_parse(tre_parse_ctx_t *ctx) minimal = !(ctx->cflags & REG_UNGREEDY); ctx->re++; } +/* QSE - TRE has provisions for ** or *+ as a special repetition operator. + * however, that seems to break backward compatibility. + * '+' in 'a*+' is not treated as a normal character with the + * following block enabled. So let me comment it out */ +#if 0 else if (*(ctx->re + 1) == CHAR_STAR || *(ctx->re + 1) == CHAR_PLUS) { /* These are reserved for future extensions. */ return REG_BADRPT; } +#endif } DPRINT(("tre_parse: %s star: '%.*" STRF "'\n", @@ -1512,6 +1430,7 @@ parse_brace: ctx->re++; if (ctx->re[0] != CHAR_LBRACE && ctx->re < ctx->re_end) { + /* QSE */ #if 0 /* 8 bit hex char. */ char tmp[3] = {0, 0, 0}; @@ -1552,6 +1471,7 @@ parse_brace: else if (ctx->re < ctx->re_end) { /* Wide char. */ + /* QSE */ #if 0 char tmp[32]; long val; diff --git a/qse/lib/cmn/tre-stack.c b/qse/lib/cmn/tre-stack.c index a7d61394..9f0503ee 100644 --- a/qse/lib/cmn/tre-stack.c +++ b/qse/lib/cmn/tre-stack.c @@ -117,7 +117,9 @@ tre_stack_push(tre_stack_t *s, union tre_stack_item value) } else { - if (s->size >= s->max_size) +/* QSE added check for s->max_size > 0 + if (s->size >= s->max_size)*/ + if (s->max_size > 0 && s->size >= s->max_size) { DPRINT(("tre_stack_push: stack full\n")); return REG_ESPACE; @@ -128,7 +130,9 @@ tre_stack_push(tre_stack_t *s, union tre_stack_item value) int new_size; DPRINT(("tre_stack_push: trying to realloc more space\n")); new_size = s->size + s->increment; - if (new_size > s->max_size) +/* QSE added check for s->max_size > 0 + if (new_size > s->max_size) */ + if (s->max_size > 0 && new_size > s->max_size) new_size = s->max_size; new_buffer = xrealloc(s->mmgr, s->stack, sizeof(*new_buffer) * new_size); if (new_buffer == NULL) diff --git a/qse/lib/sed/sed.c b/qse/lib/sed/sed.c index c55368d6..357e8e8e 100644 --- a/qse/lib/sed/sed.c +++ b/qse/lib/sed/sed.c @@ -1,5 +1,5 @@ /* - * $Id: sed.c 558 2011-09-02 15:27:44Z hyunghwan.chung $ + * $Id: sed.c 559 2011-09-04 16:21:54Z hyunghwan.chung $ * Copyright 2006-2011 Chung, Hyung-Hwan. This file is part of QSE. @@ -177,7 +177,11 @@ static qse_tre_t* maketre ( if (qse_tre_compx (tre, str->ptr, str->len, QSE_NULL, ((sed->option & QSE_SED_EXTENDEDREX)? QSE_TRE_EXTENDED: 0)) <= -1) { - SETERR1 (sed, QSE_SED_EREXBL, str->ptr, str->len, loc); + qse_sed_errnum_t errnum; + errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)? + QSE_TRE_ENOMEM: QSE_SED_EREXBL; + SETERR1 (sed, errnum, str->ptr, str->len, loc); + qse_tre_close (tre); return QSE_NULL; } @@ -201,8 +205,13 @@ static int matchtre ( n = qse_tre_execx (tre, str->ptr, str->len, match, 10, opt); if (n <= -1) { + qse_sed_errnum_t errnum; + if (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMATCH) return 0; - SETERR0 (sed, QSE_SED_EREXMA, loc); + + errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)? + QSE_TRE_ENOMEM: QSE_SED_EREXMA; + SETERR0 (sed, errnum, loc); return -1; } @@ -374,6 +383,39 @@ static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd) } } +static qse_cint_t trans_escaped (qse_cint_t c) +{ + switch (c) + { + case QSE_T('a'): + c = QSE_T('\a'); + break; +/* +Omitted for clash with regular expression \b. + case QSE_T('b'): + c = QSE_T('\b'); + break; +*/ + + case QSE_T('f'): + c = QSE_T('\f'); + case QSE_T('n'): + c = QSE_T('\n'); + break; + case QSE_T('r'): + c = QSE_T('\r'); + break; + case QSE_T('t'): + c = QSE_T('\t'); + break; + case QSE_T('v'): + c = QSE_T('\v'); + break; + } + + return c; +} + static void* compile_rex (qse_sed_t* sed, qse_char_t rxend) { #ifdef USE_REX @@ -401,8 +443,10 @@ static void* compile_rex (qse_sed_t* sed, qse_char_t rxend) if (c == QSE_T('\\')) { - c = NXTSC (sed); - if (c == QSE_CHAR_EOF || c == QSE_T('\n')) + qse_cint_t nc; + + nc = NXTSC (sed); + if (nc == QSE_CHAR_EOF /*|| nc == QSE_T('\n')*/) { SETERR1 ( sed, QSE_SED_EREXIC, @@ -413,8 +457,25 @@ static void* compile_rex (qse_sed_t* sed, qse_char_t rxend) return QSE_NULL; } - if (c == QSE_T('n')) c = QSE_T('\n'); - /* TODO: support more escaped characters?? */ + if (nc == QSE_T('\n')) c = nc; + else + { + qse_cint_t ec; + + ec = trans_escaped (nc); + if (ec == nc) + { + /* if the character after a backslash is not special at the + * this layer, add the backslash into the regular expression + * buffer as it is. */ + if (qse_str_ccat (&sed->tmp.rex, QSE_T('\\')) == (qse_size_t)-1) + { + SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL); + return QSE_NULL; + } + } + c = ec; + } } if (qse_str_ccat (&sed->tmp.rex, c) == (qse_size_t)-1) @@ -824,6 +885,16 @@ do { \ } \ } while (0) +#define CHECK_CMDIC_ESCAPED(sed,cmd,c,action) \ +do { \ + if (c == QSE_CHAR_EOF) \ + { \ + SETERR1 (sed, QSE_SED_ECMDIC, \ + &cmd->type, 1, &sed->src.loc); \ + action; \ + } \ +} while (0) + static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd) { qse_cint_t c, delim; @@ -859,14 +930,39 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd) { CHECK_CMDIC (sed, cmd, c, goto oops); -#if 0 if (c == QSE_T('\\')) { - c = NXTSC (sed); - CHECK_CMDIC (sed, cmd, c, goto oops); - if (c == QSE_T('n')) c = QSE_T('\n'); + qse_cint_t nc; + + nc = NXTSC (sed); + CHECK_CMDIC_ESCAPED (sed, cmd, nc, goto oops); + + if (nc == QSE_T('\n')) c = nc; + else + { + qse_cint_t ec; + + /* Escaping a known speical character for the regular expression + * part is done here. However, Escaping a special character for + * the replacement part is done in do_subst() except '\n' because + * it has more special characters like '&'. */ + + ec = trans_escaped (nc); + if (ec == nc) + { + /* if the character after a backslash is not special at the + * this layer, add the backslash into the regular expression + * buffer as it is. */ + if (qse_str_ccat (t[i], QSE_T('\\')) == (qse_size_t)-1) + { + SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL); + goto oops; + } + } + + c = ec; + } } -#endif if (qse_str_ccat (t[i], c) == (qse_size_t)-1) { @@ -1017,8 +1113,8 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd) if (c == QSE_T('\\')) { c = NXTSC (sed); - CHECK_CMDIC (sed, cmd, c, goto oops); - if (c == QSE_T('n')) c = QSE_T('\n'); + CHECK_CMDIC_ESCAPED (sed, cmd, c, goto oops); + c = trans_escaped (c); } b[0] = c; @@ -1039,8 +1135,8 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd) if (c == QSE_T('\\')) { c = NXTSC (sed); - CHECK_CMDIC (sed, cmd, c, goto oops); - if (c == QSE_T('n')) c = QSE_T('\n'); + CHECK_CMDIC_ESCAPED (sed, cmd, c, goto oops); + c = trans_escaped (c); } if (pos >= QSE_STR_LEN(t)) @@ -1659,10 +1755,21 @@ static int write_char (qse_sed_t* sed, qse_char_t c) static int write_str (qse_sed_t* sed, const qse_char_t* str, qse_size_t len) { qse_size_t i; + int flush_needed = 0; + for (i = 0; i < len; i++) { - if (write_char (sed, str[i]) <= -1) return -1; + /*if (write_char (sed, str[i]) <= -1) return -1;*/ + sed->e.out.buf[sed->e.out.len++] = str[i]; + if (sed->e.out.len >= QSE_COUNTOF(sed->e.out.buf)) + { + if (flush (sed) <= -1) return -1; + flush_needed = 0; + } + else if (str[i] == QSE_T('\n')) flush_needed = 1; } + + if (flush_needed && flush(sed) <= -1) return -1; return 0; } @@ -1913,7 +2020,7 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd) /* TODO: support different line end convension */ if (str.len > 0 && str.ptr[str.len-1] == QSE_T('\n')) str.len--; - + str_end = str.ptr + str.len; cur = str; @@ -1984,27 +2091,33 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd) if (max_count > 0 && sub_count + 1 != max_count) { - m = qse_str_ncat ( - &sed->e.txt.subst, - cur.ptr, mat.ptr-cur.ptr+mat.len - ); - - if (m == (qse_size_t)-1) + if (cur.ptr < str_end) { - SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL); - return -1; + m = qse_str_ncat ( + &sed->e.txt.subst, + cur.ptr, mat.ptr-cur.ptr+mat.len + ); + if (m == (qse_size_t)-1) + { + SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL); + return -1; + } } } else { repl = 1; - m = qse_str_ncat ( - &sed->e.txt.subst, cur.ptr, mat.ptr-cur.ptr); - if (m == (qse_size_t)-1) + if (cur.ptr < str_end) { - SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL); - return -1; + m = qse_str_ncat ( + &sed->e.txt.subst, cur.ptr, mat.ptr-cur.ptr + ); + if (m == (qse_size_t)-1) + { + SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL); + return -1; + } } for (i = 0; i < cmd->u.subst.rpl.len; i++) @@ -2018,40 +2131,20 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd) if (nc >= QSE_T('1') && nc <= QSE_T('9')) { int smi = nc - QSE_T('1'); - m = qse_str_ncat (&sed->e.txt.subst, submat[smi].ptr, submat[smi].len); + m = qse_str_ncat ( + &sed->e.txt.subst, + submat[smi].ptr, submat[smi].len + ); } else { #endif - switch (nc) - { - case QSE_T('n'): - nc = QSE_T('\n'); - break; - case QSE_T('r'): - nc = QSE_T('\r'); - break; - case QSE_T('t'): - nc = QSE_T('\t'); - break; - case QSE_T('f'): - nc = QSE_T('\f'); - break; - case QSE_T('b'): - nc = QSE_T('\b'); - break; - case QSE_T('v'): - nc = QSE_T('\v'); - break; - case QSE_T('a'): - nc = QSE_T('\a'); - break; -#ifndef USE_REX - } -#endif - + /* the know speical characters have been escaped + * in get_subst(). so i don't call trans_escaped() here */ m = qse_str_ccat (&sed->e.txt.subst, nc); +#ifndef USE_REX } +#endif i++; } @@ -2085,13 +2178,15 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd) if (mat.len == 0) { skip_one_char: - /* special treament is need if the match length is 0 */ - - m = qse_str_ncat (&sed->e.txt.subst, cur.ptr, 1); - if (m == (qse_size_t)-1) + if (cur.ptr < str_end) { - SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL); - return -1; + /* special treament is needed if the match length is 0 */ + m = qse_str_ncat (&sed->e.txt.subst, cur.ptr, 1); + if (m == (qse_size_t)-1) + { + SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL); + return -1; + } } cur.ptr++; cur.len--; diff --git a/qse/regress/sed/regress.sh.in b/qse/regress/sed/regress.sh.in index 6265e856..0661c881 100755 --- a/qse/regress/sed/regress.sh.in +++ b/qse/regress/sed/regress.sh.in @@ -56,7 +56,7 @@ OUTFILE_XMA="${OUTFILE}.xma" XMAOPTS="-m 500000" PROGS=" - s001.sed/s001.dat//-n + s001.sed/s001.dat//-n -r s002.sed/s002.dat// s003.sed/s003.dat// s004.sed/s004.dat//