added initial code for qse_cut_exec()

This commit is contained in:
hyung-hwan 2009-10-06 07:33:26 +00:00
parent 6de11d0391
commit b3389e89d5
4 changed files with 316 additions and 67 deletions

View File

@ -44,26 +44,6 @@ enum qse_cut_errnum_t
QSE_CUT_EREXIC, /**< regular expression '${0}' incomplete */
QSE_CUT_EREXBL, /**< failed to compile regular expression '${0}' */
QSE_CUT_EREXMA, /**< failed to match regular expression */
QSE_CUT_EA1PHB, /**< address 1 prohibited for '${0}' */
QSE_CUT_EA2PHB, /**< address 2 prohibited */
QSE_CUT_EA2MOI, /**< address 2 missing or invalid */
QSE_CUT_ENEWLN, /**< newline expected */
QSE_CUT_EBSEXP, /**< backslash expected */
QSE_CUT_EBSDEL, /**< backslash used as delimiter */
QSE_CUT_EGBABS, /**< garbage after backslash */
QSE_CUT_ESCEXP, /**< semicolon expected */
QSE_CUT_ELABEM, /**< empty label name */
QSE_CUT_ELABDU, /**< duplicate label name '${0}' */
QSE_CUT_ELABNF, /**< label '${0}' not found */
QSE_CUT_EFILEM, /**< empty file name */
QSE_CUT_EFILIL, /**< illegal file name */
QSE_CUT_ETSNSL, /**< strings in translation set not the same length*/
QSE_CUT_EGRNBA, /**< group brackets not balanced */
QSE_CUT_EGRNTD, /**< group nesting too deep */
QSE_CUT_EOCSDU, /**< multiple occurrence specifiers */
QSE_CUT_EOCSZE, /**< occurrence specifier zero */
QSE_CUT_EOCSTL, /**< occurrence specifier too large */
QSE_CUT_EIOFIL, /**< io error with file '${0}'*/
QSE_CUT_EIOUSR /**< error returned by user io handler */
};
typedef enum qse_cut_errnum_t qse_cut_errnum_t;
@ -98,6 +78,15 @@ enum qse_cut_option_t
};
typedef enum qse_cut_option_t qse_cut_option_t;
/**
* The qse_cut_sel_id_t type defines selector types.
*/
enum qse_cut_sel_id_t
{
QSE_CUT_SEL_CHAR, /**< character */
QSE_CUT_SEL_FIELD /**< field */
};
typedef enum qse_cut_sel_id_t qse_cut_sel_id_t;
/**
* The qse_cut_depth_t type defines IDs for qse_cut_getmaxdepth() and
@ -129,7 +118,6 @@ typedef enum qse_cut_io_cmd_t qse_cut_io_cmd_t;
struct qse_cut_io_arg_t
{
void* handle; /**< IO handle */
const qse_char_t* path; /**< file path. QSE_NULL for a console */
};
typedef struct qse_cut_io_arg_t qse_cut_io_arg_t;
@ -309,6 +297,7 @@ void qse_cut_seterror (
*/
int qse_cut_comp (
qse_cut_t* cut, /**< stream editor */
qse_cut_sel_id_t sel, /**< initial selector type */
const qse_char_t* ptr, /**< pointer to a string containing commands */
qse_size_t len /**< the number of characters in the string */
);

View File

@ -58,6 +58,7 @@ static int add_selector_block (qse_cut_t* cut)
cut->sel.lb->next = b;
cut->sel.lb = b;
cut->sel.count = 0;
cut->sel.fcount = 0;
return 0;
}
@ -77,6 +78,7 @@ static void free_all_selector_blocks (qse_cut_t* cut)
cut->sel.lb->len = 0;
cut->sel.lb->next = QSE_NULL;
cut->sel.count = 0;
cut->sel.fcount = 0;
}
qse_cut_t* qse_cut_open (qse_mmgr_t* mmgr, qse_size_t xtn)
@ -154,12 +156,13 @@ void qse_cut_setmaxdepth (qse_cut_t* cut, int ids, qse_size_t depth)
if (ids & QSE_CUT_DEPTH_REX_MATCH) cut->depth.rex.match = depth;
}
int qse_cut_comp (qse_cut_t* cut, const qse_char_t* str, qse_size_t len)
int qse_cut_comp (
qse_cut_t* cut, qse_cut_sel_id_t sel,
const qse_char_t* str, qse_size_t len)
{
const qse_char_t* p = str;
const qse_char_t* xnd = str + len;
qse_cint_t c;
int type = CHAR;
#define CC(x,y) (((x) <= (y))? ((qse_cint_t)*(x)): QSE_CHAR_EOF)
#define NC(x,y) (((x) < (y))? ((qse_cint_t)*(++(x))): QSE_CHAR_EOF)
@ -191,13 +194,13 @@ int qse_cut_comp (qse_cut_t* cut, const qse_char_t* str, qse_size_t len)
if (c == QSE_T('c'))
{
type = CHAR;
sel = QSE_CUT_SEL_CHAR;
c = NC (p, xnd);
while (QSE_ISSPACE(c)) c = NC (p, xnd);
}
else if (c == QSE_T('f'))
{
type = FIELD;
sel = QSE_CUT_SEL_FIELD;
c = NC (p, xnd);
while (QSE_ISSPACE(c)) c = NC (p, xnd);
}
@ -251,11 +254,12 @@ int qse_cut_comp (qse_cut_t* cut, const qse_char_t* str, qse_size_t len)
}
}
cut->sel.lb->range[cut->sel.lb->len].type = type;
cut->sel.lb->range[cut->sel.lb->len].id = sel;
cut->sel.lb->range[cut->sel.lb->len].start = start;
cut->sel.lb->range[cut->sel.lb->len].end = end;
cut->sel.lb->len++;
cut->sel.count++;
if (sel == QSE_CUT_SEL_FIELD) cut->sel.fcount++;
if (EOF(c)) break;
if (c == QSE_T(',')) c = NC (p, xnd);
@ -264,24 +268,270 @@ int qse_cut_comp (qse_cut_t* cut, const qse_char_t* str, qse_size_t len)
return 0;
}
int qse_cut_exec (qse_cut_t* cut, qse_cut_io_fun_t inf, qse_cut_io_fun_t outf)
static int read_char (qse_cut_t* cut, qse_char_t* c)
{
qse_ssize_t n;
{
/* selector: c12-30, b30-40, f1-3,6-7,10
* default input field delimiter: TAB
* default output field delimiter: same as input delimiter
* option: QSE_CUT_ONLYDELIMITED
*/
if (cut->e.in.pos >= cut->e.in.len)
{
cut->errnum = QSE_CUT_ENOERR;
n = cut->e.in.fun (
cut, QSE_CUT_IO_READ, &cut->e.in.arg,
cut->e.in.buf, QSE_COUNTOF(cut->e.in.buf)
);
if (n <= -1)
{
if (cut->errnum == QSE_CUT_ENOERR)
SETERR0 (cut, QSE_CUT_EIOUSR);
return -1;
}
{
int i;
for (i = 0; i < cut->sel.count; i++)
{
qse_printf (QSE_T("%d start = %llu, end = %llu\n"),
cut->sel.fb.range[i].type,
(unsigned long long)cut->sel.fb.range[i].start,
(unsigned long long)cut->sel.fb.range[i].end);
if (n == 0) return 0; /* end of file */
cut->e.in.len = n;
cut->e.in.pos = 0;
}
*c = cut->e.in.buf[cut->e.in.pos++];
return 1;
}
static int read_line (qse_cut_t* cut)
{
qse_size_t len = 0;
qse_char_t c;
int n;
qse_str_clear (&cut->e.in.line);
if (cut->e.in.eof) return 0;
while (1)
{
n = read_char (cut, &c);
if (n <= -1) return -1;
if (n == 0)
{
cut->e.in.eof = 1;
if (len == 0) return 0;
break;
}
if (c == QSE_T('\n'))
{
/* don't include the line terminater to a line */
/* TODO: support different line end convension */
break;
}
if (qse_str_ccat (&cut->e.in.line, c) == (qse_size_t)-1)
{
SETERR0 (cut, QSE_CUT_ENOMEM);
return -1;
}
len++;
}
cut->e.in.num++;
return 1;
}
static int flush (qse_cut_t* cut)
{
qse_size_t pos = 0;
qse_ssize_t n;
while (cut->e.out.len > 0)
{
cut->errnum = QSE_CUT_ENOERR;
n = cut->e.out.fun (
cut, QSE_CUT_IO_WRITE, &cut->e.out.arg,
&cut->e.out.buf[pos], cut->e.out.len);
if (n <= -1)
{
if (cut->errnum == QSE_CUT_ENOERR)
SETERR0 (cut, QSE_CUT_EIOUSR);
return -1;
}
if (n == 0)
{
/* reached the end of file - this is also an error */
if (cut->errnum == QSE_CUT_ENOERR)
SETERR0 (cut, QSE_CUT_EIOUSR);
return -1;
}
pos += n;
cut->e.out.len -= n;
}
return 0;
}
static int write_char (qse_cut_t* cut, qse_char_t c)
{
cut->e.out.buf[cut->e.out.len++] = c;
if (c == QSE_T('\n') ||
cut->e.out.len >= QSE_COUNTOF(cut->e.out.buf))
{
return flush (cut);
}
return 0;
}
static int write_str (qse_cut_t* cut, const qse_char_t* str, qse_size_t len)
{
qse_size_t i;
for (i = 0; i < len; i++)
{
if (write_char (cut, str[i]) <= -1) return -1;
}
return 0;
}
int cut_chars (qse_cut_t* cut, qse_size_t start, qse_size_t end)
{
const qse_char_t* ptr = QSE_STR_PTR(&cut->e.in.line);
qse_size_t len = QSE_STR_LEN(&cut->e.in.line);
if (len <= 0)
{
/* TODO: delimited only */
if (write_char (cut, QSE_T('\n')) <= -1) return -1;
}
else if (start <= end)
{
if (start <= len && end > 0)
{
if (start >= 1) start--;
if (end >= 1) end--;
if (end >= len) end = len - 1;
if (write_str (cut, &ptr[start], end-start+1) <= -1)
return -1;
}
/* TODO: DELIMTIED ONLY */
if (write_char (cut, QSE_T('\n')) <= -1) return -1;
}
else
{
if (start > 0 && end <= len)
{
qse_size_t i;
if (start >= 1) start--;
if (end >= 1) end--;
if (start >= len) start = len - 1;
for (i = start; i >= end; i--)
{
if (write_char (cut, ptr[i]) <= -1)
return -1;
}
}
/* TODO: DELIMTIED ONLY */
if (write_char (cut, QSE_T('\n')) <= -1) return -1;
}
return 0;
}
int cut_fields (qse_cut_t* cut, qse_size_t start, qse_size_t end)
{
/* TODO: field splitting... delimited only */
return -1;
}
int qse_cut_exec (qse_cut_t* cut, qse_cut_io_fun_t inf, qse_cut_io_fun_t outf)
{
int ret = 0;
qse_ssize_t n;
cut->e.out.fun = outf;
cut->e.out.eof = 0;
cut->e.out.len = 0;
cut->e.in.fun = inf;
cut->e.in.eof = 0;
cut->e.in.len = 0;
cut->e.in.pos = 0;
cut->e.in.num = 0;
if (qse_str_init (&cut->e.in.line, QSE_MMGR(cut), 256) == QSE_NULL)
{
SETERR0 (cut, QSE_CUT_ENOMEM);
return -1;
}
cut->errnum = QSE_CUT_ENOERR;
n = cut->e.in.fun (cut, QSE_CUT_IO_OPEN, &cut->e.in.arg, QSE_NULL, 0);
if (n <= -1)
{
ret = -1;
if (cut->errnum == QSE_CUT_ENOERR)
SETERR0 (cut, QSE_CUT_EIOUSR);
goto done3;
}
if (n == 0)
{
/* EOF reached upon opening an input stream.
* no data to process. this is success */
goto done2;
}
cut->errnum = QSE_CUT_ENOERR;
n = cut->e.out.fun (cut, QSE_CUT_IO_OPEN, &cut->e.out.arg, QSE_NULL, 0);
if (n <= -1)
{
ret = -1;
if (cut->errnum == QSE_CUT_ENOERR)
SETERR0 (cut, QSE_CUT_EIOUSR);
goto done2;
}
if (n == 0)
{
/* still don't know if we will write something.
* just mark EOF on the output stream and continue */
cut->e.out.eof = 1;
}
while (1)
{
qse_cut_sel_blk_t* b;
qse_size_t i;
n = read_line (cut);
if (n <= -1) { ret = -1; goto done; }
if (n == 0) goto done;
if (cut->sel.fcount > 0)
{
/* split the line into fields */
}
for (b = &cut->sel.fb; b != QSE_NULL; b = b->next)
{
for (i = 0; i < b->len; i++)
{
ret = (b->range[i].id == QSE_CUT_SEL_CHAR)?
cut_chars (cut, b->range[i].start, b->range[i].end):
cut_fields (cut, b->range[i].start, b->range[i].end);
if (ret <= -1) goto done;
}
}
}
done:
cut->e.out.fun (cut, QSE_CUT_IO_CLOSE, &cut->e.out.arg, QSE_NULL, 0);
done2:
cut->e.in.fun (cut, QSE_CUT_IO_CLOSE, &cut->e.in.arg, QSE_NULL, 0);
done3:
qse_str_fini (&cut->e.in.line);
return ret;
}

View File

@ -28,14 +28,10 @@ typedef struct qse_cut_sel_blk_t qse_cut_sel_blk_t;
struct qse_cut_sel_blk_t
{
qse_size_t len;
qse_size_t len;
struct
{
enum
{
CHAR,
FIELD
} type;
qse_cut_sel_id_t id;
qse_size_t start;
qse_size_t end;
} range[256];
@ -66,7 +62,41 @@ struct qse_cut_t
qse_cut_sel_blk_t fb; /**< the first block is static */
qse_cut_sel_blk_t* lb; /**< points to the last block */
qse_size_t count;
qse_size_t fcount;
} sel;
struct
{
/** data needed for output streams */
struct
{
qse_cut_io_fun_t fun; /**< an output handler */
qse_cut_io_arg_t arg; /**< output handling data */
qse_char_t buf[2048];
qse_size_t len;
int eof;
} out;
/** data needed for input streams */
struct
{
qse_cut_io_fun_t fun; /**< an input handler */
qse_cut_io_arg_t arg; /**< input handling data */
qse_char_t xbuf[1]; /**< a read-ahead buffer */
int xbuf_len; /**< data length in the buffer */
qse_char_t buf[2048]; /**< input buffer */
qse_size_t len; /**< data length in the buffer */
qse_size_t pos; /**< current position in the buffer */
int eof; /**< EOF indicator */
qse_str_t line; /**< pattern space */
qse_size_t num; /**< current line number */
} in;
} e;
};
#ifdef __cplusplus

View File

@ -31,26 +31,6 @@ const qse_char_t* qse_cut_dflerrstr (qse_cut_t* cut, qse_cut_errnum_t errnum)
QSE_T("regular expression '${0}' incomplete"),
QSE_T("failed to compile regular expression '${0}'"),
QSE_T("failed to match regular expression"),
QSE_T("address 1 prohibited for '${0}'"),
QSE_T("address 2 prohibited for '${0}'"),
QSE_T("address 2 missing or invalid"),
QSE_T("newline expected"),
QSE_T("backslash expected"),
QSE_T("backslash ucut as delimiter"),
QSE_T("garbage after backslash"),
QSE_T("semicolon expected"),
QSE_T("empty label name"),
QSE_T("duplicate label name '${0}'"),
QSE_T("label '${0}' not found"),
QSE_T("empty file name"),
QSE_T("illegal file name"),
QSE_T("strings in translation set not the same length"),
QSE_T("group brackets not balanced"),
QSE_T("group nesting too deep"),
QSE_T("multiple occurrence specifiers"),
QSE_T("occurrence specifier zero"),
QSE_T("occurrence specifier too large"),
QSE_T("io error with file '${0}'"),
QSE_T("error returned by user io handler")
};