From a4a82873f7a56e288da294204d145cae105cb298 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Wed, 7 Oct 2009 07:47:16 +0000 Subject: [PATCH] enhanced cut --- qse/include/qse/cmn/Makefile.am | 2 +- qse/include/qse/cmn/Makefile.in | 2 +- qse/include/qse/cmn/str.h | 46 +++-- qse/include/qse/cut/cut.h | 112 ++++++------ qse/lib/cmn/str_dyn.c | 23 ++- qse/lib/cmn/str_utl.c | 125 ++++++++++++- qse/lib/cut/cut.c | 303 ++++++++++++++++++++++++++------ qse/lib/cut/cut.h | 13 +- qse/lib/cut/err.c | 4 +- qse/samples/cmn/str.c | 6 +- 10 files changed, 496 insertions(+), 140 deletions(-) diff --git a/qse/include/qse/cmn/Makefile.am b/qse/include/qse/cmn/Makefile.am index 53c0b9e0..a64183a8 100644 --- a/qse/include/qse/cmn/Makefile.am +++ b/qse/include/qse/cmn/Makefile.am @@ -3,7 +3,7 @@ pkgincludedir = $(includedir)/qse/cmn pkginclude_HEADERS = \ mem.h chr.h str.h lda.h map.h \ rex.h sll.h dll.h opt.h tio.h \ - fio.h pio.h sio.h time.h main.h stdio.h + fio.h pio.h sio.h time.h misc.h main.h stdio.h #if ENABLE_CXX #pkginclude_HEADERS += diff --git a/qse/include/qse/cmn/Makefile.in b/qse/include/qse/cmn/Makefile.in index 220b6611..74bbe621 100644 --- a/qse/include/qse/cmn/Makefile.in +++ b/qse/include/qse/cmn/Makefile.in @@ -199,7 +199,7 @@ top_srcdir = @top_srcdir@ pkginclude_HEADERS = \ mem.h chr.h str.h lda.h map.h \ rex.h sll.h dll.h opt.h tio.h \ - fio.h pio.h sio.h time.h main.h stdio.h + fio.h pio.h sio.h time.h misc.h main.h stdio.h all: all-am diff --git a/qse/include/qse/cmn/str.h b/qse/include/qse/cmn/str.h index 307cfcbe..953552f1 100644 --- a/qse/include/qse/cmn/str.h +++ b/qse/include/qse/cmn/str.h @@ -1,5 +1,5 @@ /* - * $Id: str.h 290 2009-09-19 04:28:49Z hyunghwan.chung $ + * $Id: str.h 295 2009-10-06 13:47:16Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -104,12 +104,12 @@ struct qse_str_t } /** - * The qse_strtrm_op_t defines a string trimming operation. + * The qse_strtrmc_op_t defines a string trimming operation. */ -enum qse_strtrm_op_t +enum qse_strtrmc_op_t { - QSE_STRTRM_LEFT = (1 << 0), /**< trim leading spaces */ - QSE_STRTRM_RIGHT = (1 << 1) /**< trim trailing spaces */ + QSE_STRTRMC_LEFT = (1 << 0), /**< trim leading spaces */ + QSE_STRTRMC_RIGHT = (1 << 1) /**< trim trailing spaces */ }; #ifdef __cplusplus @@ -483,13 +483,13 @@ int qse_strspltrn ( /******/ /** - * The qse_strtrm() function removes leading spaces and/or trailing + * The qse_strtrmc() function removes leading spaces and/or trailing * spaces from a string depending on the opt parameter. You can form * the op parameter by bitwise-OR'ing one or more of the following * values: * - * - QSE_STRTRM_LEFT - trim leading spaces - * - QSE_STRTRM_RIGHT - trim trailing spaces + * - QSE_STRTRMC_LEFT - trim leading spaces + * - QSE_STRTRMC_RIGHT - trim trailing spaces * * Should it remove leading spaces, it just returns the pointer to * the first non-space character in the string. Should it remove trailing @@ -498,16 +498,33 @@ int qse_strspltrn ( * * @code * qse_char_t a[] = QSE_T(" this is a test string "); - * qse_printf (QSE_T("[%s]\n"), qse_strtrm(a,QSE_STRTRM_LEFT|QSE_STRTRM_RIGHT)); + * qse_printf (QSE_T("[%s]\n"), qse_strtrmc(a,QSE_STRTRMC_LEFT|QSE_STRTRMC_RIGHT)); * @endcode * * @return the pointer to a trimmed string. */ -qse_char_t* qse_strtrm ( +qse_char_t* qse_strtrmc ( qse_char_t* str, /**< a string */ - int op /**< operation code XOR'ed of qse_strtrm_op_t values */ + int op /**< operation code XOR'ed of qse_strtrmc_op_t values */ ); +qse_size_t qse_strtrm ( + qse_char_t* str +); + +qse_size_t qse_strxtrm ( + qse_char_t* str, + qse_size_t len +); + +qse_size_t qse_strpac ( + qse_char_t* str +); + +qse_size_t qse_strxpac ( + qse_char_t* str, + qse_size_t len +); /****f* Common/qse_mbstowcs * NAME @@ -831,6 +848,13 @@ qse_size_t qse_str_del ( qse_size_t size ); +qse_size_t qse_str_trm ( + qse_str_t* str +); + +qse_size_t qse_str_pac ( + qse_str_t* str +); #ifdef __cplusplus } diff --git a/qse/include/qse/cut/cut.h b/qse/include/qse/cut/cut.h index ceb9806b..4d192f39 100644 --- a/qse/include/qse/cut/cut.h +++ b/qse/include/qse/cut/cut.h @@ -27,6 +27,12 @@ /** @file * cut utility + * + */ + +/** + * @example cut.c + * This example implements a simple cut utility. */ /** @struct qse_cut_t @@ -41,9 +47,7 @@ enum qse_cut_errnum_t QSE_CUT_ENOERR, /**< no error */ QSE_CUT_ENOMEM, /**< insufficient memory */ QSE_CUT_ESELNV, /**< selector not valid */ - QSE_CUT_EREXIC, /**< regular expression '${0}' incomplete */ - QSE_CUT_EREXBL, /**< failed to compile regular expression '${0}' */ - QSE_CUT_EREXMA, /**< failed to match regular expression */ + QSE_SED_EIOFIL, /**< io error with file '${0}'*/ QSE_CUT_EIOUSR /**< error returned by user io handler */ }; typedef enum qse_cut_errnum_t qse_cut_errnum_t; @@ -56,25 +60,30 @@ typedef enum qse_cut_errnum_t qse_cut_errnum_t; * editor with the qse_cut_seterrstr() function to customize an error string. */ typedef const qse_char_t* (*qse_cut_errstr_t) ( - qse_cut_t* sed, /**< stream editor */ + qse_cut_t* sed, /**< cut object */ qse_cut_errnum_t num /**< an error number */ ); /** - * The qse_cut_option_t type defines various option codes for a stream editor. - * Options can be OR'ed with each other and be passed to a stream editor with + * The qse_cut_option_t type defines various option codes for a cut object. + * Options can be OR'ed with each other and be passed to a cut object with * the qse_cut_setoption() function. */ enum qse_cut_option_t { - QSE_CUT_STRIPLS = (1 << 0), /**< strip leading spaces from text */ - QSE_CUT_KEEPTBS = (1 << 1), /**< keep an trailing backslash */ - QSE_CUT_ENSURENL = (1 << 2), /**< ensure NL at the text end */ - QSE_CUT_QUIET = (1 << 3), /**< do not print pattern space */ - QSE_CUT_STRICT = (1 << 4), /**< do strict address check */ - QSE_CUT_STARTSTEP = (1 << 5), /**< allow start~step */ - QSE_CUT_REXBOUND = (1 << 6), /**< allow {n,m} in regular expression */ - QSE_CUT_SAMELINE = (1 << 7), /**< allow text on the same line as c, a, i */ + /** show delimited line only. if not set, undelimited lines are + * shown in its entirety */ + QSE_CUT_DELIMONLY = (1 << 0), + /** support mixing of c and f selectors */ + QSE_CUT_HYBRIDSEL = (1 << 1), + /** treat any whitespaces as an input delimiter */ + QSE_CUT_WHITESPACE = (1 << 2), + /** fold adjacent delimiters */ + QSE_CUT_FOLDDELIMS = (1 << 3), + /** trim leading and trailing whitespaces off the input line */ + QSE_CUT_TRIMSPACE = (1 << 4), + /** normalize whitespaces in the input line */ + QSE_CUT_NORMSPACE = (1 << 5) }; typedef enum qse_cut_option_t qse_cut_option_t; @@ -88,17 +97,6 @@ enum qse_cut_sel_id_t }; typedef enum qse_cut_sel_id_t qse_cut_sel_id_t; -/** - * The qse_cut_depth_t type defines IDs for qse_cut_getmaxdepth() and - * qse_cut_setmaxdepth(). - */ -enum qse_cut_depth_t -{ - QSE_CUT_DEPTH_REX_BUILD = (1 << 0), - QSE_CUT_DEPTH_REX_MATCH = (1 << 1) -}; -typedef enum qse_cut_depth_t qse_cut_depth_t; - /** * The qse_cut_io_cmd_t type defines IO command codes. The code indicates * the action to take in an IO handler. @@ -140,7 +138,7 @@ extern "C" { QSE_DEFINE_COMMON_FUNCTIONS (cut) /** - * The qse_cut_open() function creates a stream editor object. A memory + * The qse_cut_open() function creates a cut object object. A memory * manager provided is used to allocate and destory the object and any dynamic * data through out its lifetime. An extension area is allocated if an * extension size greater than 0 is specified. You can access it with the @@ -148,7 +146,7 @@ QSE_DEFINE_COMMON_FUNCTIONS (cut) * with the object. See #QSE_DEFINE_COMMON_FUNCTIONS() for qse_cut_getxtn(). * When done, you should destroy the object with the qse_cut_close() function * to avoid any resource leaks including memory. - * @return A pointer to a stream editor on success, QSE_NULL on failure + * @return A pointer to a cut object on success, QSE_NULL on failure */ qse_cut_t* qse_cut_open ( qse_mmgr_t* mmgr, /**< a memory manager */ @@ -156,51 +154,34 @@ qse_cut_t* qse_cut_open ( ); /** - * The qse_cut_close() function destroys a stream editor. + * The qse_cut_close() function destroys a cut object. */ void qse_cut_close ( - qse_cut_t* cut /**< stream editor */ + qse_cut_t* cut /**< cut object */ ); /** * The qse_cut_getoption() function retrieves the current options set in - * a stream editor. + * a cut object. * @return 0 or a number OR'ed of #qse_cut_option_t values */ int qse_cut_getoption ( - qse_cut_t* cut /**< stream editor */ + qse_cut_t* cut /**< cut object */ ); /** * The qse_cut_setoption() function sets the option code. */ void qse_cut_setoption ( - qse_cut_t* cut, /**< stream editor */ + qse_cut_t* cut, /**< cut object */ int opt /**< 0 or a number OR'ed of #qse_cut_option_t values */ ); -/** - * The qse_cut_getmaxdepth() gets the maximum processing depth. - */ -qse_size_t qse_cut_getmaxdepth ( - qse_cut_t* cut, /**< stream editor */ - qse_cut_depth_t id /**< one of qse_cut_depth_t values */ -); - -/** - * The qse_cut_setmaxdepth() sets the maximum processing depth. - */ -void qse_cut_setmaxdepth ( - qse_cut_t* cut, /**< stream editor */ - int ids, /**< 0 or a number OR'ed of #qse_cut_depth_t values */ - qse_size_t depth /**< maximum depth level */ -); - /** * The qse_cut_geterrstr() gets an error string getter. */ qse_cut_errstr_t qse_cut_geterrstr ( - qse_cut_t* cut /**< stream editor */ + qse_cut_t* cut /**< cut object */ ); /** @@ -228,7 +209,7 @@ qse_cut_errstr_t qse_cut_geterrstr ( * @endcode */ void qse_cut_seterrstr ( - qse_cut_t* cut, /**< stream editor */ + qse_cut_t* cut, /**< cut object */ qse_cut_errstr_t errstr /**< an error string getter */ ); @@ -237,7 +218,7 @@ void qse_cut_seterrstr ( * @return the number of the last error */ qse_cut_errnum_t qse_cut_geterrnum ( - qse_cut_t* cut /**< stream editor */ + qse_cut_t* cut /**< cut object */ ); /** @@ -245,7 +226,7 @@ qse_cut_errnum_t qse_cut_geterrnum ( * @return a pointer to an error message */ const qse_char_t* qse_cut_geterrmsg ( - qse_cut_t* cut /**< stream editor */ + qse_cut_t* cut /**< cut object */ ); /** @@ -254,7 +235,7 @@ const qse_char_t* qse_cut_geterrmsg ( * to by each parameter. */ void qse_cut_geterror ( - qse_cut_t* cut, /**< stream editor */ + qse_cut_t* cut, /**< cut object */ qse_cut_errnum_t* errnum, /**< error number */ const qse_char_t** errmsg /**< error message */ ); @@ -264,7 +245,7 @@ void qse_cut_geterror ( * location. */ void qse_cut_seterrnum ( - qse_cut_t* cut, /**< stream editor */ + qse_cut_t* cut, /**< cut object */ qse_cut_errnum_t errnum, /**< error number */ const qse_cstr_t* errarg /**< argument for formatting error message */ ); @@ -274,7 +255,7 @@ void qse_cut_seterrnum ( * message for a given error number. */ void qse_cut_seterrmsg ( - qse_cut_t* cut, /**< stream editor */ + qse_cut_t* cut, /**< cut object */ qse_cut_errnum_t errnum, /**< error number */ const qse_char_t* errmsg /**< error message */ ); @@ -285,21 +266,30 @@ void qse_cut_seterrmsg ( * and an array of formatting parameters. */ void qse_cut_seterror ( - qse_cut_t* cut, /**< stream editor */ + qse_cut_t* cut, /**< cut object */ qse_cut_errnum_t errnum, /**< error number */ const qse_cstr_t* errarg /**< array of arguments for formatting * an error message */ ); +/** + * The qse_cut_clear() function clears memory buffers internally allocated. + */ +void qse_cut_clear ( + qse_cut_t* cut /**< cut object */ +); + /** * The qse_cut_comp() function compiles a selector into an internal form. * @return 0 on success, -1 on error */ int qse_cut_comp ( - qse_cut_t* cut, /**< stream editor */ + qse_cut_t* cut, /**< cut object */ qse_cut_sel_id_t sel, /**< initial selector type */ - const qse_char_t* ptr, /**< pointer to a string containing commands */ - qse_size_t len /**< the number of characters in the string */ + const qse_char_t* str, /**< selector pointer */ + qse_size_t len, /**< selector length */ + qse_char_t din, /**< input field delimiter */ + qse_char_t dout /**< output field delimiter */ ); /** @@ -307,7 +297,7 @@ int qse_cut_comp ( * @return 0 on success, -1 on error */ int qse_cut_exec ( - qse_cut_t* cut, /**< stream editor */ + qse_cut_t* cut, /**< cut object */ qse_cut_io_fun_t inf, /**< stream reader */ qse_cut_io_fun_t outf /**< stream writer */ ); diff --git a/qse/lib/cmn/str_dyn.c b/qse/lib/cmn/str_dyn.c index f52144ff..4f5ea460 100644 --- a/qse/lib/cmn/str_dyn.c +++ b/qse/lib/cmn/str_dyn.c @@ -1,5 +1,5 @@ /* - * $Id: str_dyn.c 287 2009-09-15 10:01:02Z hyunghwan.chung $ + * $Id: str_dyn.c 295 2009-10-06 13:47:16Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -334,3 +334,24 @@ qse_size_t qse_str_del (qse_str_t* str, qse_size_t index, qse_size_t size) return str->len; } + +qse_size_t qse_str_trm (qse_str_t* str) +{ + if (str->ptr != QSE_NULL) + { + str->len = qse_strxtrm (str->ptr, str->len); + } + + return str->len; +} + +qse_size_t qse_str_pac (qse_str_t* str) +{ + if (str->ptr != QSE_NULL) + { + str->len = qse_strxpac (str->ptr, str->len); + } + + return str->len; +} + diff --git a/qse/lib/cmn/str_utl.c b/qse/lib/cmn/str_utl.c index a33a0c5e..570bae98 100644 --- a/qse/lib/cmn/str_utl.c +++ b/qse/lib/cmn/str_utl.c @@ -1,5 +1,5 @@ /* - * $Id: str_utl.c 287 2009-09-15 10:01:02Z hyunghwan.chung $ + * $Id: str_utl.c 295 2009-10-06 13:47:16Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -20,6 +20,7 @@ #include #include +#include "mem.h" #define ISSPACE(c) \ ((c) == QSE_T(' ') || (c) == QSE_T('\t') || (c) == QSE_T('\n') || \ @@ -311,7 +312,7 @@ int qse_strspl ( return qse_strspltrn (s, delim, lquote, rquote, escape, QSE_NULL); } -qse_char_t* qse_strtrm (qse_char_t* str, int opt) +qse_char_t* qse_strtrmc (qse_char_t* str, int opt) { qse_char_t* p = str; qse_char_t* s = QSE_NULL, * e = QSE_NULL; @@ -326,8 +327,124 @@ qse_char_t* qse_strtrm (qse_char_t* str, int opt) p++; } - if (opt & QSE_STRTRM_RIGHT) e[1] = QSE_T('\0'); - if (opt & QSE_STRTRM_LEFT) str = s; + if (opt & QSE_STRTRMC_RIGHT) e[1] = QSE_T('\0'); + if (opt & QSE_STRTRMC_LEFT) str = s; return str; } + +qse_size_t qse_strtrm (qse_char_t* str) +{ + qse_char_t* p = str; + qse_char_t* s = QSE_NULL, * e = QSE_NULL; + + while (*p != QSE_T('\0')) + { + if (!QSE_ISSPACE(*p)) + { + if (s == QSE_NULL) s = p; + e = p; + } + p++; + } + + if (e != QSE_NULL) + { + e[1] = QSE_T('\0'); + if (str != s) + QSE_MEMCPY (str, s, (e - s + 2) * QSE_SIZEOF(qse_char_t)); + return e - s + 1; + } + + str[0] = QSE_T('\0'); + return 0; +} + +qse_size_t qse_strxtrm (qse_char_t* str, qse_size_t len) +{ + qse_char_t* p = str, * end = str + len; + qse_char_t* s = QSE_NULL, * e = QSE_NULL; + + while (p < end) + { + if (!QSE_ISSPACE(*p)) + { + if (s == QSE_NULL) s = p; + e = p; + } + p++; + } + + if (e != QSE_NULL) + { + /* do not insert a terminating null */ + /*e[1] = QSE_T('\0');*/ + if (str != s) + QSE_MEMCPY (str, s, (e - s + 2) * QSE_SIZEOF(qse_char_t)); + return e - s + 1; + } + + /* do not insert a terminating null */ + /*str[0] = QSE_T('\0');*/ + return 0; +} + +qse_size_t qse_strpac (qse_char_t* str) +{ + qse_char_t* p = str, * q = str; + + while (QSE_ISSPACE(*p)) p++; + while (*p != QSE_T('\0')) + { + if (QSE_ISSPACE(*p)) + { + *q++ = *p++; + while (QSE_ISSPACE(*p)) p++; + } + else *q++ = *p++; + } + + if (q > str && QSE_ISSPACE(q[-1])) q--; + *q = QSE_T('\0'); + + return q - str; +} + +qse_size_t qse_strxpac (qse_char_t* str, qse_size_t len) +{ + qse_char_t* p = str, * q = str, * end = str + len; + int followed_by_space = 0; + int state = 0; + + while (p < end) + { + if (state == 0) + { + if (!QSE_ISSPACE(*p)) + { + *q++ = *p; + state = 1; + } + } + else if (state == 1) + { + if (QSE_ISSPACE(*p)) + { + if (!followed_by_space) + { + followed_by_space = 1; + *q++ = *p; + } + } + else + { + followed_by_space = 0; + *q++ = *p; + } + } + + p++; + } + + return (followed_by_space) ? (q - str -1): (q - str); +} diff --git a/qse/lib/cut/cut.c b/qse/lib/cut/cut.c index 6334dfd0..1f128d16 100644 --- a/qse/lib/cut/cut.c +++ b/qse/lib/cut/cut.c @@ -20,11 +20,8 @@ #include "cut.h" #include "../cmn/mem.h" -#include #include -#define MAX QSE_TYPE_MAX(qse_size_t) - QSE_IMPLEMENT_COMMON_FUNCTIONS (cut) static qse_cut_t* qse_cut_init (qse_cut_t* cut, qse_mmgr_t* mmgr); @@ -33,13 +30,6 @@ static void qse_cut_fini (qse_cut_t* cut); #define SETERR0(cut,num) \ do { qse_cut_seterror (cut, num, QSE_NULL); } while (0) -#define SETERR1(cut,num,argp,argl) \ -do { \ - qse_cstr_t __ea__; \ - __ea__.ptr = argp; __ea__.len = argl; \ - qse_cut_seterror (cut, num, &__ea__); \ -} while (0) - static int add_selector_block (qse_cut_t* cut) { qse_cut_sel_blk_t* b; @@ -59,6 +49,7 @@ static int add_selector_block (qse_cut_t* cut) cut->sel.lb = b; cut->sel.count = 0; cut->sel.fcount = 0; + cut->sel.ccount = 0; return 0; } @@ -79,6 +70,7 @@ static void free_all_selector_blocks (qse_cut_t* cut) cut->sel.lb->next = QSE_NULL; cut->sel.count = 0; cut->sel.fcount = 0; + cut->sel.ccount = 0; } qse_cut_t* qse_cut_open (qse_mmgr_t* mmgr, qse_size_t xtn) @@ -125,13 +117,17 @@ static qse_cut_t* qse_cut_init (qse_cut_t* cut, qse_mmgr_t* mmgr) /* the block has no data yet */ cut->sel.fb.len = 0; + cut->e.in.cflds = QSE_COUNTOF(cut->e.in.sflds); + cut->e.in.flds = cut->e.in.sflds; + return cut; } - static void qse_cut_fini (qse_cut_t* cut) { free_all_selector_blocks (cut); + if (cut->e.in.flds != cut->e.in.sflds) + QSE_MMGR_FREE (cut->mmgr, cut->e.in.flds); } void qse_cut_setoption (qse_cut_t* cut, int option) @@ -144,21 +140,19 @@ int qse_cut_getoption (qse_cut_t* cut) return cut->option; } -qse_size_t qse_cut_getmaxdepth (qse_cut_t* cut, qse_cut_depth_t id) +void qse_cut_clear (qse_cut_t* cut) { - return (id & QSE_CUT_DEPTH_REX_BUILD)? cut->depth.rex.build: - (id & QSE_CUT_DEPTH_REX_MATCH)? cut->depth.rex.match: 0; -} - -void qse_cut_setmaxdepth (qse_cut_t* cut, int ids, qse_size_t depth) -{ - if (ids & QSE_CUT_DEPTH_REX_BUILD) cut->depth.rex.build = depth; - if (ids & QSE_CUT_DEPTH_REX_MATCH) cut->depth.rex.match = depth; + free_all_selector_blocks (cut); + if (cut->e.in.flds != cut->e.in.sflds) + QSE_MMGR_FREE (cut->mmgr, cut->e.in.flds); + cut->e.in.cflds = QSE_COUNTOF(cut->e.in.sflds); + cut->e.in.flds = cut->e.in.sflds; } int qse_cut_comp ( qse_cut_t* cut, qse_cut_sel_id_t sel, - const qse_char_t* str, qse_size_t len) + const qse_char_t* str, qse_size_t len, + qse_char_t din, qse_char_t dout) { const qse_char_t* p = str; const qse_char_t* xnd = str + len; @@ -169,6 +163,7 @@ int qse_cut_comp ( #define EOF(x) ((x) == QSE_CHAR_EOF) #define MASK_START (1 << 1) #define MASK_END (1 << 2) +#define MAX QSE_TYPE_MAX(qse_size_t) free_all_selector_blocks (cut); @@ -192,17 +187,20 @@ int qse_cut_comp ( break; } - if (c == QSE_T('c')) + if (cut->option & QSE_CUT_HYBRIDSEL) { - sel = QSE_CUT_SEL_CHAR; - c = NC (p, xnd); - while (QSE_ISSPACE(c)) c = NC (p, xnd); - } - else if (c == QSE_T('f')) - { - sel = QSE_CUT_SEL_FIELD; - c = NC (p, xnd); - while (QSE_ISSPACE(c)) c = NC (p, xnd); + if (c == QSE_T('c')) + { + sel = QSE_CUT_SEL_CHAR; + c = NC (p, xnd); + while (QSE_ISSPACE(c)) c = NC (p, xnd); + } + else if (c == QSE_T('f')) + { + sel = QSE_CUT_SEL_FIELD; + c = NC (p, xnd); + while (QSE_ISSPACE(c)) c = NC (p, xnd); + } } if (QSE_ISDIGIT(c)) @@ -260,11 +258,14 @@ int qse_cut_comp ( cut->sel.lb->len++; cut->sel.count++; if (sel == QSE_CUT_SEL_FIELD) cut->sel.fcount++; + else cut->sel.ccount++; if (EOF(c)) break; if (c == QSE_T(',')) c = NC (p, xnd); } + cut->sel.din = din; + cut->sel.dout = dout; return 0; } @@ -332,6 +333,9 @@ static int read_line (qse_cut_t* cut) } cut->e.in.num++; + + if (cut->option & QSE_CUT_TRIMSPACE) qse_str_trm (&cut->e.in.line); + if (cut->option & QSE_CUT_NORMSPACE) qse_str_pac (&cut->e.in.line); return 1; } @@ -381,6 +385,12 @@ static int write_char (qse_cut_t* cut, qse_char_t c) return 0; } +static int write_linebreak (qse_cut_t* cut) +{ + /* TODO: different line termination convention */ + return write_char (cut, QSE_T('\n')); +} + static int write_str (qse_cut_t* cut, const qse_char_t* str, qse_size_t len) { qse_size_t i; @@ -391,17 +401,15 @@ static int write_str (qse_cut_t* cut, const qse_char_t* str, qse_size_t len) return 0; } -int cut_chars (qse_cut_t* cut, qse_size_t start, qse_size_t end) +static int cut_chars ( + qse_cut_t* cut, qse_size_t start, qse_size_t end, int delim) { const qse_char_t* ptr = QSE_STR_PTR(&cut->e.in.line); qse_size_t len = QSE_STR_LEN(&cut->e.in.line); - if (len <= 0) - { - /* TODO: delimited only */ - if (write_char (cut, QSE_T('\n')) <= -1) return -1; - } - else if (start <= end) + if (len <= 0) return 0; + + if (start <= end) { if (start <= len && end > 0) { @@ -409,13 +417,14 @@ int cut_chars (qse_cut_t* cut, qse_size_t start, qse_size_t end) if (end >= 1) end--; if (end >= len) end = len - 1; + + if (delim && write_char (cut, cut->sel.dout) <= -1) return -1; if (write_str (cut, &ptr[start], end-start+1) <= -1) return -1; - } - /* TODO: DELIMTIED ONLY */ - if (write_char (cut, QSE_T('\n')) <= -1) return -1; + return 1; + } } else { @@ -428,24 +437,142 @@ int cut_chars (qse_cut_t* cut, qse_size_t start, qse_size_t end) if (start >= len) start = len - 1; + if (delim && write_char (cut, cut->sel.dout) <= -1) return -1; + for (i = start; i >= end; i--) { if (write_char (cut, ptr[i]) <= -1) return -1; } - } - /* TODO: DELIMTIED ONLY */ - if (write_char (cut, QSE_T('\n')) <= -1) return -1; + return 1; + } } return 0; } -int cut_fields (qse_cut_t* cut, qse_size_t start, qse_size_t end) +static int isdelim (qse_cut_t* cut, qse_char_t c) { -/* TODO: field splitting... delimited only */ - return -1; + return ((cut->option & QSE_CUT_WHITESPACE) && QSE_ISSPACE(c)) || + (!(cut->option & QSE_CUT_WHITESPACE) && c == cut->sel.din); +} + +static int split_line (qse_cut_t* cut) +{ + const qse_char_t* ptr = QSE_STR_PTR(&cut->e.in.line); + qse_size_t len = QSE_STR_LEN(&cut->e.in.line); + qse_size_t i, x = 0, xl = 0; + + cut->e.in.delimited = 0; + cut->e.in.flds[x].ptr = ptr; + for (i = 0; i < len; ) + { + qse_char_t c = ptr[i++]; + if (isdelim(cut,c)) + { + if (cut->option & QSE_CUT_FOLDDELIMS) + { + while (i < len && isdelim(cut,ptr[i])) i++; + } + + cut->e.in.flds[x++].len = xl; + + if (x >= cut->e.in.cflds) + { + qse_cstr_t* tmp; + qse_size_t nsz; + + nsz = cut->e.in.cflds; + if (nsz > 100000) nsz += 100000; + else nsz *= 2; + + tmp = QSE_MMGR_ALLOC (cut->mmgr, + QSE_SIZEOF(*tmp) * nsz); + if (tmp == QSE_NULL) + { + SETERR0 (cut, QSE_CUT_ENOMEM); + return -1; + } + + QSE_MEMCPY (tmp, cut->e.in.flds, + QSE_SIZEOF(*tmp) * cut->e.in.cflds); + + if (cut->e.in.flds != cut->e.in.sflds) + QSE_MMGR_FREE (cut->mmgr, cut->e.in.flds); + cut->e.in.flds = tmp; + cut->e.in.cflds = nsz; + } + + xl = 0; + cut->e.in.flds[x].ptr = &ptr[i]; + cut->e.in.delimited = 1; + } + else xl++; + } + cut->e.in.flds[x].len = xl; + cut->e.in.nflds = ++x; + return 0; +} + +static int cut_fields ( + qse_cut_t* cut, qse_size_t start, qse_size_t end, int delim) +{ + qse_size_t len = cut->e.in.nflds; + + if (!cut->e.in.delimited /*|| len <= 0*/) return 0; + + QSE_ASSERT (len > 0); + if (start <= end) + { + if (start <= len && end > 0) + { + qse_size_t i; + + if (start >= 1) start--; + if (end >= 1) end--; + + if (end >= len) end = len - 1; + + if (delim && write_char (cut, cut->sel.dout) <= -1) return -1; + + for (i = start; i <= end; i++) + { + if (write_str (cut, cut->e.in.flds[i].ptr, cut->e.in.flds[i].len) <= -1) + return -1; + + if (i < end && write_char (cut, cut->sel.dout) <= -1) return -1; + } + + return 1; + } + } + else + { + if (start > 0 && end <= len) + { + qse_size_t i; + + if (start >= 1) start--; + if (end >= 1) end--; + + if (start >= len) start = len - 1; + + if (delim && write_char (cut, cut->sel.dout) <= -1) return -1; + + for (i = start; i >= end; i--) + { + if (write_str (cut, cut->e.in.flds[i].ptr, cut->e.in.flds[i].len) <= -1) + return -1; + + if (i > end && write_char (cut, cut->sel.dout) <= -1) return -1; + } + + return 1; + } + } + + return 0; } int qse_cut_exec (qse_cut_t* cut, qse_cut_io_fun_t inf, qse_cut_io_fun_t outf) @@ -500,11 +627,12 @@ int qse_cut_exec (qse_cut_t* cut, qse_cut_io_fun_t inf, qse_cut_io_fun_t outf) cut->e.out.eof = 1; } - while (1) { qse_cut_sel_blk_t* b; - qse_size_t i; + int id = 0; /* mark 'no output' so far */ + int delimited = 0; + int linebreak = 0; n = read_line (cut); if (n <= -1) { ret = -1; goto done; } @@ -512,19 +640,86 @@ int qse_cut_exec (qse_cut_t* cut, qse_cut_io_fun_t inf, qse_cut_io_fun_t outf) if (cut->sel.fcount > 0) { -/* split the line into fields */ + if (split_line (cut) <= -1) { ret = -1; goto done; } + delimited = cut->e.in.delimited; } for (b = &cut->sel.fb; b != QSE_NULL; b = b->next) { + qse_size_t i; + for (i = 0; i < b->len; i++) { - ret = (b->range[i].id == QSE_CUT_SEL_CHAR)? - cut_chars (cut, b->range[i].start, b->range[i].end): - cut_fields (cut, b->range[i].start, b->range[i].end); - if (ret <= -1) goto done; + if (b->range[i].id == QSE_CUT_SEL_CHAR) + { + n = cut_chars ( + cut, + b->range[i].start, + b->range[i].end, + id == 2 + ); + if (n >= 1) + { + /* mark a char's been output */ + id = 1; + } + } + else + { + n = cut_fields ( + cut, + b->range[i].start, + b->range[i].end, + id > 0 + ); + if (n >= 1) + { + /* mark a field's been output */ + id = 2; + } + } + + if (n <= -1) { ret = -1; goto done; } } } + + if (cut->sel.ccount > 0) + { + /* so long as there is a character selector, + * a newline must be printed */ + linebreak = 1; + } + else if (cut->sel.fcount > 0) + { + /* if only field selectors are specified */ + + if (delimited) + { + /* and if the input line is delimited, + * write a line break */ + linebreak = 1; + } + else if (!(cut->option & QSE_CUT_DELIMONLY)) + { + /* if not delimited, write the + * entire undelimited input line depending + * on the option set. */ + if (write_str (cut, + QSE_STR_PTR(&cut->e.in.line), + QSE_STR_LEN(&cut->e.in.line)) <= -1) + { + ret = -1; goto done; + } + + /* a line break is needed in this case */ + linebreak = 1; + } + } + + if (linebreak && write_linebreak(cut) <= -1) + { + ret = -1; goto done; + } } done: diff --git a/qse/lib/cut/cut.h b/qse/lib/cut/cut.h index 8824b4c1..58090aca 100644 --- a/qse/lib/cut/cut.h +++ b/qse/lib/cut/cut.h @@ -34,7 +34,7 @@ struct qse_cut_sel_blk_t qse_cut_sel_id_t id; qse_size_t start; qse_size_t end; - } range[256]; + } range[128]; qse_cut_sel_blk_t* next; }; @@ -61,8 +61,13 @@ struct qse_cut_t { qse_cut_sel_blk_t fb; /**< the first block is static */ qse_cut_sel_blk_t* lb; /**< points to the last block */ + + qse_char_t din; /**< input field delimiter */ + qse_char_t dout; /**< output field delimiter */ + qse_size_t count; qse_size_t fcount; + qse_size_t ccount; } sel; struct @@ -94,6 +99,12 @@ struct qse_cut_t qse_str_t line; /**< pattern space */ qse_size_t num; /**< current line number */ + + qse_size_t nflds; /**< the number of fields */ + qse_size_t cflds; /**< capacity of flds field */ + qse_cstr_t sflds[128]; /**< static field buffer */ + qse_cstr_t* flds; + int delimited; } in; } e; diff --git a/qse/lib/cut/err.c b/qse/lib/cut/err.c index fa323a2a..f22886b3 100644 --- a/qse/lib/cut/err.c +++ b/qse/lib/cut/err.c @@ -28,9 +28,7 @@ const qse_char_t* qse_cut_dflerrstr (qse_cut_t* cut, qse_cut_errnum_t errnum) QSE_T("no error"), QSE_T("insufficient memory"), QSE_T("selector not valid"), - QSE_T("regular expression '${0}' incomplete"), - QSE_T("failed to compile regular expression '${0}'"), - QSE_T("failed to match regular expression"), + QSE_T("io error with file '${0}'"), QSE_T("error returned by user io handler") }; diff --git a/qse/samples/cmn/str.c b/qse/samples/cmn/str.c index c8ef34c4..0143626c 100644 --- a/qse/samples/cmn/str.c +++ b/qse/samples/cmn/str.c @@ -515,14 +515,14 @@ static int test12 (void) qse_char_t a3[] = QSE_T(" this is a test string "); qse_printf (QSE_T("[%s] =>"), a1); - qse_printf (QSE_T("[%s]\n"), qse_strtrm (a1, QSE_STRTRM_LEFT)); + qse_printf (QSE_T("[%s]\n"), qse_strtrmc (a1, QSE_STRTRMC_LEFT)); qse_printf (QSE_T("[%s] =>"), a2); - qse_printf (QSE_T("[%s]\n"), qse_strtrm (a2, QSE_STRTRM_RIGHT)); + qse_printf (QSE_T("[%s]\n"), qse_strtrmc (a2, QSE_STRTRMC_RIGHT)); qse_printf (QSE_T("[%s] =>"), a3); qse_printf (QSE_T("[%s]\n"), - qse_strtrm (a3, QSE_STRTRM_LEFT|QSE_STRTRM_RIGHT)); + qse_strtrmc (a3, QSE_STRTRMC_LEFT|QSE_STRTRMC_RIGHT)); return 0; }