enhanced cut

This commit is contained in:
hyung-hwan 2009-10-07 07:47:16 +00:00
parent b3389e89d5
commit a4a82873f7
10 changed files with 496 additions and 140 deletions

View File

@ -3,7 +3,7 @@ pkgincludedir = $(includedir)/qse/cmn
pkginclude_HEADERS = \ pkginclude_HEADERS = \
mem.h chr.h str.h lda.h map.h \ mem.h chr.h str.h lda.h map.h \
rex.h sll.h dll.h opt.h tio.h \ rex.h sll.h dll.h opt.h tio.h \
fio.h pio.h sio.h time.h main.h stdio.h fio.h pio.h sio.h time.h misc.h main.h stdio.h
#if ENABLE_CXX #if ENABLE_CXX
#pkginclude_HEADERS += #pkginclude_HEADERS +=

View File

@ -199,7 +199,7 @@ top_srcdir = @top_srcdir@
pkginclude_HEADERS = \ pkginclude_HEADERS = \
mem.h chr.h str.h lda.h map.h \ mem.h chr.h str.h lda.h map.h \
rex.h sll.h dll.h opt.h tio.h \ rex.h sll.h dll.h opt.h tio.h \
fio.h pio.h sio.h time.h main.h stdio.h fio.h pio.h sio.h time.h misc.h main.h stdio.h
all: all-am all: all-am

View File

@ -1,5 +1,5 @@
/* /*
* $Id: str.h 290 2009-09-19 04:28:49Z hyunghwan.chung $ * $Id: str.h 295 2009-10-06 13:47:16Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -104,12 +104,12 @@ struct qse_str_t
} }
/** /**
* The qse_strtrm_op_t defines a string trimming operation. * The qse_strtrmc_op_t defines a string trimming operation.
*/ */
enum qse_strtrm_op_t enum qse_strtrmc_op_t
{ {
QSE_STRTRM_LEFT = (1 << 0), /**< trim leading spaces */ QSE_STRTRMC_LEFT = (1 << 0), /**< trim leading spaces */
QSE_STRTRM_RIGHT = (1 << 1) /**< trim trailing spaces */ QSE_STRTRMC_RIGHT = (1 << 1) /**< trim trailing spaces */
}; };
#ifdef __cplusplus #ifdef __cplusplus
@ -483,13 +483,13 @@ int qse_strspltrn (
/******/ /******/
/** /**
* The qse_strtrm() function removes leading spaces and/or trailing * The qse_strtrmc() function removes leading spaces and/or trailing
* spaces from a string depending on the opt parameter. You can form * spaces from a string depending on the opt parameter. You can form
* the op parameter by bitwise-OR'ing one or more of the following * the op parameter by bitwise-OR'ing one or more of the following
* values: * values:
* *
* - QSE_STRTRM_LEFT - trim leading spaces * - QSE_STRTRMC_LEFT - trim leading spaces
* - QSE_STRTRM_RIGHT - trim trailing spaces * - QSE_STRTRMC_RIGHT - trim trailing spaces
* *
* Should it remove leading spaces, it just returns the pointer to * Should it remove leading spaces, it just returns the pointer to
* the first non-space character in the string. Should it remove trailing * the first non-space character in the string. Should it remove trailing
@ -498,16 +498,33 @@ int qse_strspltrn (
* *
* @code * @code
* qse_char_t a[] = QSE_T(" this is a test string "); * qse_char_t a[] = QSE_T(" this is a test string ");
* qse_printf (QSE_T("[%s]\n"), qse_strtrm(a,QSE_STRTRM_LEFT|QSE_STRTRM_RIGHT)); * qse_printf (QSE_T("[%s]\n"), qse_strtrmc(a,QSE_STRTRMC_LEFT|QSE_STRTRMC_RIGHT));
* @endcode * @endcode
* *
* @return the pointer to a trimmed string. * @return the pointer to a trimmed string.
*/ */
qse_char_t* qse_strtrm ( qse_char_t* qse_strtrmc (
qse_char_t* str, /**< a string */ qse_char_t* str, /**< a string */
int op /**< operation code XOR'ed of qse_strtrm_op_t values */ int op /**< operation code XOR'ed of qse_strtrmc_op_t values */
); );
qse_size_t qse_strtrm (
qse_char_t* str
);
qse_size_t qse_strxtrm (
qse_char_t* str,
qse_size_t len
);
qse_size_t qse_strpac (
qse_char_t* str
);
qse_size_t qse_strxpac (
qse_char_t* str,
qse_size_t len
);
/****f* Common/qse_mbstowcs /****f* Common/qse_mbstowcs
* NAME * NAME
@ -831,6 +848,13 @@ qse_size_t qse_str_del (
qse_size_t size qse_size_t size
); );
qse_size_t qse_str_trm (
qse_str_t* str
);
qse_size_t qse_str_pac (
qse_str_t* str
);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -27,6 +27,12 @@
/** @file /** @file
* cut utility * cut utility
*
*/
/**
* @example cut.c
* This example implements a simple cut utility.
*/ */
/** @struct qse_cut_t /** @struct qse_cut_t
@ -41,9 +47,7 @@ enum qse_cut_errnum_t
QSE_CUT_ENOERR, /**< no error */ QSE_CUT_ENOERR, /**< no error */
QSE_CUT_ENOMEM, /**< insufficient memory */ QSE_CUT_ENOMEM, /**< insufficient memory */
QSE_CUT_ESELNV, /**< selector not valid */ QSE_CUT_ESELNV, /**< selector not valid */
QSE_CUT_EREXIC, /**< regular expression '${0}' incomplete */ QSE_SED_EIOFIL, /**< io error with file '${0}'*/
QSE_CUT_EREXBL, /**< failed to compile regular expression '${0}' */
QSE_CUT_EREXMA, /**< failed to match regular expression */
QSE_CUT_EIOUSR /**< error returned by user io handler */ QSE_CUT_EIOUSR /**< error returned by user io handler */
}; };
typedef enum qse_cut_errnum_t qse_cut_errnum_t; typedef enum qse_cut_errnum_t qse_cut_errnum_t;
@ -56,25 +60,30 @@ typedef enum qse_cut_errnum_t qse_cut_errnum_t;
* editor with the qse_cut_seterrstr() function to customize an error string. * editor with the qse_cut_seterrstr() function to customize an error string.
*/ */
typedef const qse_char_t* (*qse_cut_errstr_t) ( typedef const qse_char_t* (*qse_cut_errstr_t) (
qse_cut_t* sed, /**< stream editor */ qse_cut_t* sed, /**< cut object */
qse_cut_errnum_t num /**< an error number */ qse_cut_errnum_t num /**< an error number */
); );
/** /**
* The qse_cut_option_t type defines various option codes for a stream editor. * The qse_cut_option_t type defines various option codes for a cut object.
* Options can be OR'ed with each other and be passed to a stream editor with * Options can be OR'ed with each other and be passed to a cut object with
* the qse_cut_setoption() function. * the qse_cut_setoption() function.
*/ */
enum qse_cut_option_t enum qse_cut_option_t
{ {
QSE_CUT_STRIPLS = (1 << 0), /**< strip leading spaces from text */ /** show delimited line only. if not set, undelimited lines are
QSE_CUT_KEEPTBS = (1 << 1), /**< keep an trailing backslash */ * shown in its entirety */
QSE_CUT_ENSURENL = (1 << 2), /**< ensure NL at the text end */ QSE_CUT_DELIMONLY = (1 << 0),
QSE_CUT_QUIET = (1 << 3), /**< do not print pattern space */ /** support mixing of c and f selectors */
QSE_CUT_STRICT = (1 << 4), /**< do strict address check */ QSE_CUT_HYBRIDSEL = (1 << 1),
QSE_CUT_STARTSTEP = (1 << 5), /**< allow start~step */ /** treat any whitespaces as an input delimiter */
QSE_CUT_REXBOUND = (1 << 6), /**< allow {n,m} in regular expression */ QSE_CUT_WHITESPACE = (1 << 2),
QSE_CUT_SAMELINE = (1 << 7), /**< allow text on the same line as c, a, i */ /** fold adjacent delimiters */
QSE_CUT_FOLDDELIMS = (1 << 3),
/** trim leading and trailing whitespaces off the input line */
QSE_CUT_TRIMSPACE = (1 << 4),
/** normalize whitespaces in the input line */
QSE_CUT_NORMSPACE = (1 << 5)
}; };
typedef enum qse_cut_option_t qse_cut_option_t; typedef enum qse_cut_option_t qse_cut_option_t;
@ -88,17 +97,6 @@ enum qse_cut_sel_id_t
}; };
typedef enum qse_cut_sel_id_t qse_cut_sel_id_t; typedef enum qse_cut_sel_id_t qse_cut_sel_id_t;
/**
* The qse_cut_depth_t type defines IDs for qse_cut_getmaxdepth() and
* qse_cut_setmaxdepth().
*/
enum qse_cut_depth_t
{
QSE_CUT_DEPTH_REX_BUILD = (1 << 0),
QSE_CUT_DEPTH_REX_MATCH = (1 << 1)
};
typedef enum qse_cut_depth_t qse_cut_depth_t;
/** /**
* The qse_cut_io_cmd_t type defines IO command codes. The code indicates * The qse_cut_io_cmd_t type defines IO command codes. The code indicates
* the action to take in an IO handler. * the action to take in an IO handler.
@ -140,7 +138,7 @@ extern "C" {
QSE_DEFINE_COMMON_FUNCTIONS (cut) QSE_DEFINE_COMMON_FUNCTIONS (cut)
/** /**
* The qse_cut_open() function creates a stream editor object. A memory * The qse_cut_open() function creates a cut object object. A memory
* manager provided is used to allocate and destory the object and any dynamic * manager provided is used to allocate and destory the object and any dynamic
* data through out its lifetime. An extension area is allocated if an * data through out its lifetime. An extension area is allocated if an
* extension size greater than 0 is specified. You can access it with the * extension size greater than 0 is specified. You can access it with the
@ -148,7 +146,7 @@ QSE_DEFINE_COMMON_FUNCTIONS (cut)
* with the object. See #QSE_DEFINE_COMMON_FUNCTIONS() for qse_cut_getxtn(). * with the object. See #QSE_DEFINE_COMMON_FUNCTIONS() for qse_cut_getxtn().
* When done, you should destroy the object with the qse_cut_close() function * When done, you should destroy the object with the qse_cut_close() function
* to avoid any resource leaks including memory. * to avoid any resource leaks including memory.
* @return A pointer to a stream editor on success, QSE_NULL on failure * @return A pointer to a cut object on success, QSE_NULL on failure
*/ */
qse_cut_t* qse_cut_open ( qse_cut_t* qse_cut_open (
qse_mmgr_t* mmgr, /**< a memory manager */ qse_mmgr_t* mmgr, /**< a memory manager */
@ -156,51 +154,34 @@ qse_cut_t* qse_cut_open (
); );
/** /**
* The qse_cut_close() function destroys a stream editor. * The qse_cut_close() function destroys a cut object.
*/ */
void qse_cut_close ( void qse_cut_close (
qse_cut_t* cut /**< stream editor */ qse_cut_t* cut /**< cut object */
); );
/** /**
* The qse_cut_getoption() function retrieves the current options set in * The qse_cut_getoption() function retrieves the current options set in
* a stream editor. * a cut object.
* @return 0 or a number OR'ed of #qse_cut_option_t values * @return 0 or a number OR'ed of #qse_cut_option_t values
*/ */
int qse_cut_getoption ( int qse_cut_getoption (
qse_cut_t* cut /**< stream editor */ qse_cut_t* cut /**< cut object */
); );
/** /**
* The qse_cut_setoption() function sets the option code. * The qse_cut_setoption() function sets the option code.
*/ */
void qse_cut_setoption ( void qse_cut_setoption (
qse_cut_t* cut, /**< stream editor */ qse_cut_t* cut, /**< cut object */
int opt /**< 0 or a number OR'ed of #qse_cut_option_t values */ int opt /**< 0 or a number OR'ed of #qse_cut_option_t values */
); );
/**
* The qse_cut_getmaxdepth() gets the maximum processing depth.
*/
qse_size_t qse_cut_getmaxdepth (
qse_cut_t* cut, /**< stream editor */
qse_cut_depth_t id /**< one of qse_cut_depth_t values */
);
/**
* The qse_cut_setmaxdepth() sets the maximum processing depth.
*/
void qse_cut_setmaxdepth (
qse_cut_t* cut, /**< stream editor */
int ids, /**< 0 or a number OR'ed of #qse_cut_depth_t values */
qse_size_t depth /**< maximum depth level */
);
/** /**
* The qse_cut_geterrstr() gets an error string getter. * The qse_cut_geterrstr() gets an error string getter.
*/ */
qse_cut_errstr_t qse_cut_geterrstr ( qse_cut_errstr_t qse_cut_geterrstr (
qse_cut_t* cut /**< stream editor */ qse_cut_t* cut /**< cut object */
); );
/** /**
@ -228,7 +209,7 @@ qse_cut_errstr_t qse_cut_geterrstr (
* @endcode * @endcode
*/ */
void qse_cut_seterrstr ( void qse_cut_seterrstr (
qse_cut_t* cut, /**< stream editor */ qse_cut_t* cut, /**< cut object */
qse_cut_errstr_t errstr /**< an error string getter */ qse_cut_errstr_t errstr /**< an error string getter */
); );
@ -237,7 +218,7 @@ void qse_cut_seterrstr (
* @return the number of the last error * @return the number of the last error
*/ */
qse_cut_errnum_t qse_cut_geterrnum ( qse_cut_errnum_t qse_cut_geterrnum (
qse_cut_t* cut /**< stream editor */ qse_cut_t* cut /**< cut object */
); );
/** /**
@ -245,7 +226,7 @@ qse_cut_errnum_t qse_cut_geterrnum (
* @return a pointer to an error message * @return a pointer to an error message
*/ */
const qse_char_t* qse_cut_geterrmsg ( const qse_char_t* qse_cut_geterrmsg (
qse_cut_t* cut /**< stream editor */ qse_cut_t* cut /**< cut object */
); );
/** /**
@ -254,7 +235,7 @@ const qse_char_t* qse_cut_geterrmsg (
* to by each parameter. * to by each parameter.
*/ */
void qse_cut_geterror ( void qse_cut_geterror (
qse_cut_t* cut, /**< stream editor */ qse_cut_t* cut, /**< cut object */
qse_cut_errnum_t* errnum, /**< error number */ qse_cut_errnum_t* errnum, /**< error number */
const qse_char_t** errmsg /**< error message */ const qse_char_t** errmsg /**< error message */
); );
@ -264,7 +245,7 @@ void qse_cut_geterror (
* location. * location.
*/ */
void qse_cut_seterrnum ( void qse_cut_seterrnum (
qse_cut_t* cut, /**< stream editor */ qse_cut_t* cut, /**< cut object */
qse_cut_errnum_t errnum, /**< error number */ qse_cut_errnum_t errnum, /**< error number */
const qse_cstr_t* errarg /**< argument for formatting error message */ const qse_cstr_t* errarg /**< argument for formatting error message */
); );
@ -274,7 +255,7 @@ void qse_cut_seterrnum (
* message for a given error number. * message for a given error number.
*/ */
void qse_cut_seterrmsg ( void qse_cut_seterrmsg (
qse_cut_t* cut, /**< stream editor */ qse_cut_t* cut, /**< cut object */
qse_cut_errnum_t errnum, /**< error number */ qse_cut_errnum_t errnum, /**< error number */
const qse_char_t* errmsg /**< error message */ const qse_char_t* errmsg /**< error message */
); );
@ -285,21 +266,30 @@ void qse_cut_seterrmsg (
* and an array of formatting parameters. * and an array of formatting parameters.
*/ */
void qse_cut_seterror ( void qse_cut_seterror (
qse_cut_t* cut, /**< stream editor */ qse_cut_t* cut, /**< cut object */
qse_cut_errnum_t errnum, /**< error number */ qse_cut_errnum_t errnum, /**< error number */
const qse_cstr_t* errarg /**< array of arguments for formatting const qse_cstr_t* errarg /**< array of arguments for formatting
* an error message */ * an error message */
); );
/**
* The qse_cut_clear() function clears memory buffers internally allocated.
*/
void qse_cut_clear (
qse_cut_t* cut /**< cut object */
);
/** /**
* The qse_cut_comp() function compiles a selector into an internal form. * The qse_cut_comp() function compiles a selector into an internal form.
* @return 0 on success, -1 on error * @return 0 on success, -1 on error
*/ */
int qse_cut_comp ( int qse_cut_comp (
qse_cut_t* cut, /**< stream editor */ qse_cut_t* cut, /**< cut object */
qse_cut_sel_id_t sel, /**< initial selector type */ qse_cut_sel_id_t sel, /**< initial selector type */
const qse_char_t* ptr, /**< pointer to a string containing commands */ const qse_char_t* str, /**< selector pointer */
qse_size_t len /**< the number of characters in the string */ qse_size_t len, /**< selector length */
qse_char_t din, /**< input field delimiter */
qse_char_t dout /**< output field delimiter */
); );
/** /**
@ -307,7 +297,7 @@ int qse_cut_comp (
* @return 0 on success, -1 on error * @return 0 on success, -1 on error
*/ */
int qse_cut_exec ( int qse_cut_exec (
qse_cut_t* cut, /**< stream editor */ qse_cut_t* cut, /**< cut object */
qse_cut_io_fun_t inf, /**< stream reader */ qse_cut_io_fun_t inf, /**< stream reader */
qse_cut_io_fun_t outf /**< stream writer */ qse_cut_io_fun_t outf /**< stream writer */
); );

View File

@ -1,5 +1,5 @@
/* /*
* $Id: str_dyn.c 287 2009-09-15 10:01:02Z hyunghwan.chung $ * $Id: str_dyn.c 295 2009-10-06 13:47:16Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -334,3 +334,24 @@ qse_size_t qse_str_del (qse_str_t* str, qse_size_t index, qse_size_t size)
return str->len; return str->len;
} }
qse_size_t qse_str_trm (qse_str_t* str)
{
if (str->ptr != QSE_NULL)
{
str->len = qse_strxtrm (str->ptr, str->len);
}
return str->len;
}
qse_size_t qse_str_pac (qse_str_t* str)
{
if (str->ptr != QSE_NULL)
{
str->len = qse_strxpac (str->ptr, str->len);
}
return str->len;
}

View File

@ -1,5 +1,5 @@
/* /*
* $Id: str_utl.c 287 2009-09-15 10:01:02Z hyunghwan.chung $ * $Id: str_utl.c 295 2009-10-06 13:47:16Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -20,6 +20,7 @@
#include <qse/cmn/str.h> #include <qse/cmn/str.h>
#include <qse/cmn/chr.h> #include <qse/cmn/chr.h>
#include "mem.h"
#define ISSPACE(c) \ #define ISSPACE(c) \
((c) == QSE_T(' ') || (c) == QSE_T('\t') || (c) == QSE_T('\n') || \ ((c) == QSE_T(' ') || (c) == QSE_T('\t') || (c) == QSE_T('\n') || \
@ -311,7 +312,7 @@ int qse_strspl (
return qse_strspltrn (s, delim, lquote, rquote, escape, QSE_NULL); return qse_strspltrn (s, delim, lquote, rquote, escape, QSE_NULL);
} }
qse_char_t* qse_strtrm (qse_char_t* str, int opt) qse_char_t* qse_strtrmc (qse_char_t* str, int opt)
{ {
qse_char_t* p = str; qse_char_t* p = str;
qse_char_t* s = QSE_NULL, * e = QSE_NULL; qse_char_t* s = QSE_NULL, * e = QSE_NULL;
@ -326,8 +327,124 @@ qse_char_t* qse_strtrm (qse_char_t* str, int opt)
p++; p++;
} }
if (opt & QSE_STRTRM_RIGHT) e[1] = QSE_T('\0'); if (opt & QSE_STRTRMC_RIGHT) e[1] = QSE_T('\0');
if (opt & QSE_STRTRM_LEFT) str = s; if (opt & QSE_STRTRMC_LEFT) str = s;
return str; return str;
} }
qse_size_t qse_strtrm (qse_char_t* str)
{
qse_char_t* p = str;
qse_char_t* s = QSE_NULL, * e = QSE_NULL;
while (*p != QSE_T('\0'))
{
if (!QSE_ISSPACE(*p))
{
if (s == QSE_NULL) s = p;
e = p;
}
p++;
}
if (e != QSE_NULL)
{
e[1] = QSE_T('\0');
if (str != s)
QSE_MEMCPY (str, s, (e - s + 2) * QSE_SIZEOF(qse_char_t));
return e - s + 1;
}
str[0] = QSE_T('\0');
return 0;
}
qse_size_t qse_strxtrm (qse_char_t* str, qse_size_t len)
{
qse_char_t* p = str, * end = str + len;
qse_char_t* s = QSE_NULL, * e = QSE_NULL;
while (p < end)
{
if (!QSE_ISSPACE(*p))
{
if (s == QSE_NULL) s = p;
e = p;
}
p++;
}
if (e != QSE_NULL)
{
/* do not insert a terminating null */
/*e[1] = QSE_T('\0');*/
if (str != s)
QSE_MEMCPY (str, s, (e - s + 2) * QSE_SIZEOF(qse_char_t));
return e - s + 1;
}
/* do not insert a terminating null */
/*str[0] = QSE_T('\0');*/
return 0;
}
qse_size_t qse_strpac (qse_char_t* str)
{
qse_char_t* p = str, * q = str;
while (QSE_ISSPACE(*p)) p++;
while (*p != QSE_T('\0'))
{
if (QSE_ISSPACE(*p))
{
*q++ = *p++;
while (QSE_ISSPACE(*p)) p++;
}
else *q++ = *p++;
}
if (q > str && QSE_ISSPACE(q[-1])) q--;
*q = QSE_T('\0');
return q - str;
}
qse_size_t qse_strxpac (qse_char_t* str, qse_size_t len)
{
qse_char_t* p = str, * q = str, * end = str + len;
int followed_by_space = 0;
int state = 0;
while (p < end)
{
if (state == 0)
{
if (!QSE_ISSPACE(*p))
{
*q++ = *p;
state = 1;
}
}
else if (state == 1)
{
if (QSE_ISSPACE(*p))
{
if (!followed_by_space)
{
followed_by_space = 1;
*q++ = *p;
}
}
else
{
followed_by_space = 0;
*q++ = *p;
}
}
p++;
}
return (followed_by_space) ? (q - str -1): (q - str);
}

View File

@ -20,11 +20,8 @@
#include "cut.h" #include "cut.h"
#include "../cmn/mem.h" #include "../cmn/mem.h"
#include <qse/cmn/rex.h>
#include <qse/cmn/chr.h> #include <qse/cmn/chr.h>
#define MAX QSE_TYPE_MAX(qse_size_t)
QSE_IMPLEMENT_COMMON_FUNCTIONS (cut) QSE_IMPLEMENT_COMMON_FUNCTIONS (cut)
static qse_cut_t* qse_cut_init (qse_cut_t* cut, qse_mmgr_t* mmgr); static qse_cut_t* qse_cut_init (qse_cut_t* cut, qse_mmgr_t* mmgr);
@ -33,13 +30,6 @@ static void qse_cut_fini (qse_cut_t* cut);
#define SETERR0(cut,num) \ #define SETERR0(cut,num) \
do { qse_cut_seterror (cut, num, QSE_NULL); } while (0) do { qse_cut_seterror (cut, num, QSE_NULL); } while (0)
#define SETERR1(cut,num,argp,argl) \
do { \
qse_cstr_t __ea__; \
__ea__.ptr = argp; __ea__.len = argl; \
qse_cut_seterror (cut, num, &__ea__); \
} while (0)
static int add_selector_block (qse_cut_t* cut) static int add_selector_block (qse_cut_t* cut)
{ {
qse_cut_sel_blk_t* b; qse_cut_sel_blk_t* b;
@ -59,6 +49,7 @@ static int add_selector_block (qse_cut_t* cut)
cut->sel.lb = b; cut->sel.lb = b;
cut->sel.count = 0; cut->sel.count = 0;
cut->sel.fcount = 0; cut->sel.fcount = 0;
cut->sel.ccount = 0;
return 0; return 0;
} }
@ -79,6 +70,7 @@ static void free_all_selector_blocks (qse_cut_t* cut)
cut->sel.lb->next = QSE_NULL; cut->sel.lb->next = QSE_NULL;
cut->sel.count = 0; cut->sel.count = 0;
cut->sel.fcount = 0; cut->sel.fcount = 0;
cut->sel.ccount = 0;
} }
qse_cut_t* qse_cut_open (qse_mmgr_t* mmgr, qse_size_t xtn) qse_cut_t* qse_cut_open (qse_mmgr_t* mmgr, qse_size_t xtn)
@ -125,13 +117,17 @@ static qse_cut_t* qse_cut_init (qse_cut_t* cut, qse_mmgr_t* mmgr)
/* the block has no data yet */ /* the block has no data yet */
cut->sel.fb.len = 0; cut->sel.fb.len = 0;
cut->e.in.cflds = QSE_COUNTOF(cut->e.in.sflds);
cut->e.in.flds = cut->e.in.sflds;
return cut; return cut;
} }
static void qse_cut_fini (qse_cut_t* cut) static void qse_cut_fini (qse_cut_t* cut)
{ {
free_all_selector_blocks (cut); free_all_selector_blocks (cut);
if (cut->e.in.flds != cut->e.in.sflds)
QSE_MMGR_FREE (cut->mmgr, cut->e.in.flds);
} }
void qse_cut_setoption (qse_cut_t* cut, int option) void qse_cut_setoption (qse_cut_t* cut, int option)
@ -144,21 +140,19 @@ int qse_cut_getoption (qse_cut_t* cut)
return cut->option; return cut->option;
} }
qse_size_t qse_cut_getmaxdepth (qse_cut_t* cut, qse_cut_depth_t id) void qse_cut_clear (qse_cut_t* cut)
{ {
return (id & QSE_CUT_DEPTH_REX_BUILD)? cut->depth.rex.build: free_all_selector_blocks (cut);
(id & QSE_CUT_DEPTH_REX_MATCH)? cut->depth.rex.match: 0; if (cut->e.in.flds != cut->e.in.sflds)
} QSE_MMGR_FREE (cut->mmgr, cut->e.in.flds);
cut->e.in.cflds = QSE_COUNTOF(cut->e.in.sflds);
void qse_cut_setmaxdepth (qse_cut_t* cut, int ids, qse_size_t depth) cut->e.in.flds = cut->e.in.sflds;
{
if (ids & QSE_CUT_DEPTH_REX_BUILD) cut->depth.rex.build = depth;
if (ids & QSE_CUT_DEPTH_REX_MATCH) cut->depth.rex.match = depth;
} }
int qse_cut_comp ( int qse_cut_comp (
qse_cut_t* cut, qse_cut_sel_id_t sel, qse_cut_t* cut, qse_cut_sel_id_t sel,
const qse_char_t* str, qse_size_t len) const qse_char_t* str, qse_size_t len,
qse_char_t din, qse_char_t dout)
{ {
const qse_char_t* p = str; const qse_char_t* p = str;
const qse_char_t* xnd = str + len; const qse_char_t* xnd = str + len;
@ -169,6 +163,7 @@ int qse_cut_comp (
#define EOF(x) ((x) == QSE_CHAR_EOF) #define EOF(x) ((x) == QSE_CHAR_EOF)
#define MASK_START (1 << 1) #define MASK_START (1 << 1)
#define MASK_END (1 << 2) #define MASK_END (1 << 2)
#define MAX QSE_TYPE_MAX(qse_size_t)
free_all_selector_blocks (cut); free_all_selector_blocks (cut);
@ -192,17 +187,20 @@ int qse_cut_comp (
break; break;
} }
if (c == QSE_T('c')) if (cut->option & QSE_CUT_HYBRIDSEL)
{ {
sel = QSE_CUT_SEL_CHAR; if (c == QSE_T('c'))
c = NC (p, xnd); {
while (QSE_ISSPACE(c)) c = NC (p, xnd); sel = QSE_CUT_SEL_CHAR;
} c = NC (p, xnd);
else if (c == QSE_T('f')) while (QSE_ISSPACE(c)) c = NC (p, xnd);
{ }
sel = QSE_CUT_SEL_FIELD; else if (c == QSE_T('f'))
c = NC (p, xnd); {
while (QSE_ISSPACE(c)) c = NC (p, xnd); sel = QSE_CUT_SEL_FIELD;
c = NC (p, xnd);
while (QSE_ISSPACE(c)) c = NC (p, xnd);
}
} }
if (QSE_ISDIGIT(c)) if (QSE_ISDIGIT(c))
@ -260,11 +258,14 @@ int qse_cut_comp (
cut->sel.lb->len++; cut->sel.lb->len++;
cut->sel.count++; cut->sel.count++;
if (sel == QSE_CUT_SEL_FIELD) cut->sel.fcount++; if (sel == QSE_CUT_SEL_FIELD) cut->sel.fcount++;
else cut->sel.ccount++;
if (EOF(c)) break; if (EOF(c)) break;
if (c == QSE_T(',')) c = NC (p, xnd); if (c == QSE_T(',')) c = NC (p, xnd);
} }
cut->sel.din = din;
cut->sel.dout = dout;
return 0; return 0;
} }
@ -332,6 +333,9 @@ static int read_line (qse_cut_t* cut)
} }
cut->e.in.num++; cut->e.in.num++;
if (cut->option & QSE_CUT_TRIMSPACE) qse_str_trm (&cut->e.in.line);
if (cut->option & QSE_CUT_NORMSPACE) qse_str_pac (&cut->e.in.line);
return 1; return 1;
} }
@ -381,6 +385,12 @@ static int write_char (qse_cut_t* cut, qse_char_t c)
return 0; return 0;
} }
static int write_linebreak (qse_cut_t* cut)
{
/* TODO: different line termination convention */
return write_char (cut, QSE_T('\n'));
}
static int write_str (qse_cut_t* cut, const qse_char_t* str, qse_size_t len) static int write_str (qse_cut_t* cut, const qse_char_t* str, qse_size_t len)
{ {
qse_size_t i; qse_size_t i;
@ -391,17 +401,15 @@ static int write_str (qse_cut_t* cut, const qse_char_t* str, qse_size_t len)
return 0; return 0;
} }
int cut_chars (qse_cut_t* cut, qse_size_t start, qse_size_t end) static int cut_chars (
qse_cut_t* cut, qse_size_t start, qse_size_t end, int delim)
{ {
const qse_char_t* ptr = QSE_STR_PTR(&cut->e.in.line); const qse_char_t* ptr = QSE_STR_PTR(&cut->e.in.line);
qse_size_t len = QSE_STR_LEN(&cut->e.in.line); qse_size_t len = QSE_STR_LEN(&cut->e.in.line);
if (len <= 0) if (len <= 0) return 0;
{
/* TODO: delimited only */ if (start <= end)
if (write_char (cut, QSE_T('\n')) <= -1) return -1;
}
else if (start <= end)
{ {
if (start <= len && end > 0) if (start <= len && end > 0)
{ {
@ -409,13 +417,14 @@ int cut_chars (qse_cut_t* cut, qse_size_t start, qse_size_t end)
if (end >= 1) end--; if (end >= 1) end--;
if (end >= len) end = len - 1; if (end >= len) end = len - 1;
if (delim && write_char (cut, cut->sel.dout) <= -1) return -1;
if (write_str (cut, &ptr[start], end-start+1) <= -1) if (write_str (cut, &ptr[start], end-start+1) <= -1)
return -1; return -1;
}
/* TODO: DELIMTIED ONLY */ return 1;
if (write_char (cut, QSE_T('\n')) <= -1) return -1; }
} }
else else
{ {
@ -428,24 +437,142 @@ int cut_chars (qse_cut_t* cut, qse_size_t start, qse_size_t end)
if (start >= len) start = len - 1; if (start >= len) start = len - 1;
if (delim && write_char (cut, cut->sel.dout) <= -1) return -1;
for (i = start; i >= end; i--) for (i = start; i >= end; i--)
{ {
if (write_char (cut, ptr[i]) <= -1) if (write_char (cut, ptr[i]) <= -1)
return -1; return -1;
} }
}
/* TODO: DELIMTIED ONLY */ return 1;
if (write_char (cut, QSE_T('\n')) <= -1) return -1; }
} }
return 0; return 0;
} }
int cut_fields (qse_cut_t* cut, qse_size_t start, qse_size_t end) static int isdelim (qse_cut_t* cut, qse_char_t c)
{ {
/* TODO: field splitting... delimited only */ return ((cut->option & QSE_CUT_WHITESPACE) && QSE_ISSPACE(c)) ||
return -1; (!(cut->option & QSE_CUT_WHITESPACE) && c == cut->sel.din);
}
static int split_line (qse_cut_t* cut)
{
const qse_char_t* ptr = QSE_STR_PTR(&cut->e.in.line);
qse_size_t len = QSE_STR_LEN(&cut->e.in.line);
qse_size_t i, x = 0, xl = 0;
cut->e.in.delimited = 0;
cut->e.in.flds[x].ptr = ptr;
for (i = 0; i < len; )
{
qse_char_t c = ptr[i++];
if (isdelim(cut,c))
{
if (cut->option & QSE_CUT_FOLDDELIMS)
{
while (i < len && isdelim(cut,ptr[i])) i++;
}
cut->e.in.flds[x++].len = xl;
if (x >= cut->e.in.cflds)
{
qse_cstr_t* tmp;
qse_size_t nsz;
nsz = cut->e.in.cflds;
if (nsz > 100000) nsz += 100000;
else nsz *= 2;
tmp = QSE_MMGR_ALLOC (cut->mmgr,
QSE_SIZEOF(*tmp) * nsz);
if (tmp == QSE_NULL)
{
SETERR0 (cut, QSE_CUT_ENOMEM);
return -1;
}
QSE_MEMCPY (tmp, cut->e.in.flds,
QSE_SIZEOF(*tmp) * cut->e.in.cflds);
if (cut->e.in.flds != cut->e.in.sflds)
QSE_MMGR_FREE (cut->mmgr, cut->e.in.flds);
cut->e.in.flds = tmp;
cut->e.in.cflds = nsz;
}
xl = 0;
cut->e.in.flds[x].ptr = &ptr[i];
cut->e.in.delimited = 1;
}
else xl++;
}
cut->e.in.flds[x].len = xl;
cut->e.in.nflds = ++x;
return 0;
}
static int cut_fields (
qse_cut_t* cut, qse_size_t start, qse_size_t end, int delim)
{
qse_size_t len = cut->e.in.nflds;
if (!cut->e.in.delimited /*|| len <= 0*/) return 0;
QSE_ASSERT (len > 0);
if (start <= end)
{
if (start <= len && end > 0)
{
qse_size_t i;
if (start >= 1) start--;
if (end >= 1) end--;
if (end >= len) end = len - 1;
if (delim && write_char (cut, cut->sel.dout) <= -1) return -1;
for (i = start; i <= end; i++)
{
if (write_str (cut, cut->e.in.flds[i].ptr, cut->e.in.flds[i].len) <= -1)
return -1;
if (i < end && write_char (cut, cut->sel.dout) <= -1) return -1;
}
return 1;
}
}
else
{
if (start > 0 && end <= len)
{
qse_size_t i;
if (start >= 1) start--;
if (end >= 1) end--;
if (start >= len) start = len - 1;
if (delim && write_char (cut, cut->sel.dout) <= -1) return -1;
for (i = start; i >= end; i--)
{
if (write_str (cut, cut->e.in.flds[i].ptr, cut->e.in.flds[i].len) <= -1)
return -1;
if (i > end && write_char (cut, cut->sel.dout) <= -1) return -1;
}
return 1;
}
}
return 0;
} }
int qse_cut_exec (qse_cut_t* cut, qse_cut_io_fun_t inf, qse_cut_io_fun_t outf) int qse_cut_exec (qse_cut_t* cut, qse_cut_io_fun_t inf, qse_cut_io_fun_t outf)
@ -500,11 +627,12 @@ int qse_cut_exec (qse_cut_t* cut, qse_cut_io_fun_t inf, qse_cut_io_fun_t outf)
cut->e.out.eof = 1; cut->e.out.eof = 1;
} }
while (1) while (1)
{ {
qse_cut_sel_blk_t* b; qse_cut_sel_blk_t* b;
qse_size_t i; int id = 0; /* mark 'no output' so far */
int delimited = 0;
int linebreak = 0;
n = read_line (cut); n = read_line (cut);
if (n <= -1) { ret = -1; goto done; } if (n <= -1) { ret = -1; goto done; }
@ -512,19 +640,86 @@ int qse_cut_exec (qse_cut_t* cut, qse_cut_io_fun_t inf, qse_cut_io_fun_t outf)
if (cut->sel.fcount > 0) if (cut->sel.fcount > 0)
{ {
/* split the line into fields */ if (split_line (cut) <= -1) { ret = -1; goto done; }
delimited = cut->e.in.delimited;
} }
for (b = &cut->sel.fb; b != QSE_NULL; b = b->next) for (b = &cut->sel.fb; b != QSE_NULL; b = b->next)
{ {
qse_size_t i;
for (i = 0; i < b->len; i++) for (i = 0; i < b->len; i++)
{ {
ret = (b->range[i].id == QSE_CUT_SEL_CHAR)? if (b->range[i].id == QSE_CUT_SEL_CHAR)
cut_chars (cut, b->range[i].start, b->range[i].end): {
cut_fields (cut, b->range[i].start, b->range[i].end); n = cut_chars (
if (ret <= -1) goto done; cut,
b->range[i].start,
b->range[i].end,
id == 2
);
if (n >= 1)
{
/* mark a char's been output */
id = 1;
}
}
else
{
n = cut_fields (
cut,
b->range[i].start,
b->range[i].end,
id > 0
);
if (n >= 1)
{
/* mark a field's been output */
id = 2;
}
}
if (n <= -1) { ret = -1; goto done; }
} }
} }
if (cut->sel.ccount > 0)
{
/* so long as there is a character selector,
* a newline must be printed */
linebreak = 1;
}
else if (cut->sel.fcount > 0)
{
/* if only field selectors are specified */
if (delimited)
{
/* and if the input line is delimited,
* write a line break */
linebreak = 1;
}
else if (!(cut->option & QSE_CUT_DELIMONLY))
{
/* if not delimited, write the
* entire undelimited input line depending
* on the option set. */
if (write_str (cut,
QSE_STR_PTR(&cut->e.in.line),
QSE_STR_LEN(&cut->e.in.line)) <= -1)
{
ret = -1; goto done;
}
/* a line break is needed in this case */
linebreak = 1;
}
}
if (linebreak && write_linebreak(cut) <= -1)
{
ret = -1; goto done;
}
} }
done: done:

View File

@ -34,7 +34,7 @@ struct qse_cut_sel_blk_t
qse_cut_sel_id_t id; qse_cut_sel_id_t id;
qse_size_t start; qse_size_t start;
qse_size_t end; qse_size_t end;
} range[256]; } range[128];
qse_cut_sel_blk_t* next; qse_cut_sel_blk_t* next;
}; };
@ -61,8 +61,13 @@ struct qse_cut_t
{ {
qse_cut_sel_blk_t fb; /**< the first block is static */ qse_cut_sel_blk_t fb; /**< the first block is static */
qse_cut_sel_blk_t* lb; /**< points to the last block */ qse_cut_sel_blk_t* lb; /**< points to the last block */
qse_char_t din; /**< input field delimiter */
qse_char_t dout; /**< output field delimiter */
qse_size_t count; qse_size_t count;
qse_size_t fcount; qse_size_t fcount;
qse_size_t ccount;
} sel; } sel;
struct struct
@ -94,6 +99,12 @@ struct qse_cut_t
qse_str_t line; /**< pattern space */ qse_str_t line; /**< pattern space */
qse_size_t num; /**< current line number */ qse_size_t num; /**< current line number */
qse_size_t nflds; /**< the number of fields */
qse_size_t cflds; /**< capacity of flds field */
qse_cstr_t sflds[128]; /**< static field buffer */
qse_cstr_t* flds;
int delimited;
} in; } in;
} e; } e;

View File

@ -28,9 +28,7 @@ const qse_char_t* qse_cut_dflerrstr (qse_cut_t* cut, qse_cut_errnum_t errnum)
QSE_T("no error"), QSE_T("no error"),
QSE_T("insufficient memory"), QSE_T("insufficient memory"),
QSE_T("selector not valid"), QSE_T("selector not valid"),
QSE_T("regular expression '${0}' incomplete"), QSE_T("io error with file '${0}'"),
QSE_T("failed to compile regular expression '${0}'"),
QSE_T("failed to match regular expression"),
QSE_T("error returned by user io handler") QSE_T("error returned by user io handler")
}; };

View File

@ -515,14 +515,14 @@ static int test12 (void)
qse_char_t a3[] = QSE_T(" this is a test string "); qse_char_t a3[] = QSE_T(" this is a test string ");
qse_printf (QSE_T("[%s] =>"), a1); qse_printf (QSE_T("[%s] =>"), a1);
qse_printf (QSE_T("[%s]\n"), qse_strtrm (a1, QSE_STRTRM_LEFT)); qse_printf (QSE_T("[%s]\n"), qse_strtrmc (a1, QSE_STRTRMC_LEFT));
qse_printf (QSE_T("[%s] =>"), a2); qse_printf (QSE_T("[%s] =>"), a2);
qse_printf (QSE_T("[%s]\n"), qse_strtrm (a2, QSE_STRTRM_RIGHT)); qse_printf (QSE_T("[%s]\n"), qse_strtrmc (a2, QSE_STRTRMC_RIGHT));
qse_printf (QSE_T("[%s] =>"), a3); qse_printf (QSE_T("[%s] =>"), a3);
qse_printf (QSE_T("[%s]\n"), qse_printf (QSE_T("[%s]\n"),
qse_strtrm (a3, QSE_STRTRM_LEFT|QSE_STRTRM_RIGHT)); qse_strtrmc (a3, QSE_STRTRMC_LEFT|QSE_STRTRMC_RIGHT));
return 0; return 0;
} }