added some routines to qse_sed_c

This commit is contained in:
hyung-hwan 2009-02-09 08:40:19 +00:00
parent 41b324b7a7
commit b1211d6e58
3 changed files with 279 additions and 48 deletions

View File

@ -21,12 +21,30 @@
#include <qse/types.h> #include <qse/types.h>
#include <qse/macros.h> #include <qse/macros.h>
#include <qse/cmn/str.h>
enum qse_sed_errnum_t
{
QSE_SED_ENOERR, /* no error */
QSE_SED_ENOMEM, /* no memory */
QSE_SED_ETMTXT, /* too much text */
QSE_SED_ECMDNR, /* command not recognized */
QSE_SED_ECMDGB, /* command garbled */
QSE_SED_ELBLTL, /* label too long */
QSE_SED_EREXBL, /* regular expression build error */
QSE_SED_EA2NNC, /* address 2 not necessary */
};
typedef struct qse_sed_t qse_sed_t; typedef struct qse_sed_t qse_sed_t;
typedef enum qse_sed_errnum_t qse_sed_errnum_t;
struct qse_sed_t struct qse_sed_t
{ {
QSE_DEFINE_COMMON_FIELDS (sed) QSE_DEFINE_COMMON_FIELDS (sed)
qse_sed_errnum_t errnum;
void* lastrex;
qse_str_t rexbuf; /* temporary regular expression buffer */
}; };

View File

@ -18,6 +18,7 @@
#include "sed.h" #include "sed.h"
#include "../cmn/mem.h" #include "../cmn/mem.h"
#include <qse/cmn/rex.h>
QSE_IMPLEMENT_COMMON_FUNCTIONS (sed) QSE_IMPLEMENT_COMMON_FUNCTIONS (sed)
@ -58,69 +59,101 @@ qse_sed_t* qse_sed_init (qse_sed_t* sed, qse_mmgr_t* mmgr)
QSE_MEMSET (sed, 0, sizeof(*sed)); QSE_MEMSET (sed, 0, sizeof(*sed));
sed->mmgr = mmgr; sed->mmgr = mmgr;
if (qse_str_init (&sed->rexbuf, mmgr, 0) == QSE_NULL)
{
sed->errnum = QSE_SED_ENOMEM;
return QSE_NULL;
}
return sed; return sed;
} }
void qse_sed_fini (qse_sed_t* sed) void qse_sed_fini (qse_sed_t* sed)
{ {
qse_str_fini (&sed->rexbuf);
} }
static void compile (qse_sed_t* sed, const qse_char_t* cp, qse_char_t seof) /* get the current character without advancing the pointer */
#define CC(ptr,end) ((ptr < end)? *ptr: QSE_CHAR_EOF)
/* get the current character advancing the pointer */
#define NC(ptr,end) ((ptr < end)? *ptr++: QSE_CHAR_EOF)
static const void* compile (
qse_sed_t* sed, const qse_char_t* ptr,
const qse_char_t* end, qse_char_t seof)
{ {
qse_char_t c; void* code;
qse_cint_t c;
if ((c = *cp++) == seof) return QSE_NULL; /* // */ qse_str_clear (&sed->rexbuf);
do for (;;)
{ {
if (c == QSE_T('\0') || c == QSE_T('\n')) c = NC (ptr, end);
if (c == QSE_CHAR_EOF || c == QSE_T('\n'))
{ {
/* premature end of text */ sed->errnum = QSE_SED_ETMTXT;
return QSE_NULL; /* TODO: return an error..*/ return QSE_NULL;
} }
if (c == QSE_T('\\') if (c == seof) break;
if (c == QSE_T('\\'))
{ {
if (ep >= end) c = NC (ptr, end);
if (c == QSE_CHAR_EOF || c == QSE_T('\n'))
{ {
/* too many characters */ sed->errnum = QSE_SED_ETMTXT;
return QSE_NULL; /* TODO: return an error..*/ return QSE_NULL;
} }
*ep++ = c; if (c == QSE_T('n')) c = QSE_T('\n');
// TODO: support more escaped characters??
/* TODO: more escaped characters */
if ((c = *cp++) == QSE_T('n') c = QSE_T('n');
} }
if (ep >= end) if (qse_str_ccat (&sed->rexbuf, c) == (qse_size_t)-1)
{ {
/* too many characters */ sed->errnum = QSE_SED_ENOMEM;
return QSE_NULL; /* TODO: return an error..*/ return QSE_NULL;
} }
}
*ep++ = c; /* TODO: maximum depth - optionize the second parameter */
code = qse_buildrex (
sed->mmgr, 0,
QSE_STR_PTR(&sed->rexbuf),
QSE_STR_LEN(&sed->rexbuf),
QSE_NULL);
if (code == QSE_NULL)
{
sed->errnum = QSE_SED_EREXBL;
return QSE_NULL;
} }
while ((c = *cp++) != seof);
*ep = QSE_T('\0'); sed->lastrex = code;
regcomp (expbuf); return ptr;
} }
static const qse_char_t* address ( static const qse_char_t* address (
qse_sed_t* sed, const qse_char_t* cp, qse_sed_a_t* a) qse_sed_t* sed, const qse_char_t* ptr,
const qse_char_t* end, qse_sed_a_t* a)
{ {
qse_char_t c; qse_cint_t c;
if ((c = *cp) == QSE_T('$')) c = NC (ptr, end);
if ((c = *ptr) == QSE_T('$'))
{ {
a->type = QSE_SED_A_DOL; a->type = QSE_SED_A_DOL;
cp++; ptr++;
} }
else if (c == QSE_T('/')) else if (c == QSE_T('/'))
{ {
cp++; ptr++;
a->type = (a->u.rex = compile(sed, c))? A_RE: A_LAST; if (compile (sed, ptr, end, c) == QSE_NULL)
return QSE_NULL;
a->u.rex = sed->lastrex;
a->type = QSE_SED_A_REX;
} }
else if (c >= QSE_T('0') && c <= QSE_T('9')) else if (c >= QSE_T('0') && c <= QSE_T('9'))
{ {
@ -128,9 +161,9 @@ static const qse_char_t* address (
do do
{ {
lno = lno * 10 + c - QSE_T('0'); lno = lno * 10 + c - QSE_T('0');
cp++; ptr++;
} }
while ((c = *cp) >= QSE_T('0') && c <= QSE_T('9')) while ((c = *ptr) >= QSE_T('0') && c <= QSE_T('9'));
/* line number 0 is illegal */ /* line number 0 is illegal */
if (lno == 0) return QSE_NULL; if (lno == 0) return QSE_NULL;
@ -138,35 +171,186 @@ static const qse_char_t* address (
a->type = QSE_SED_A_LINE; a->type = QSE_SED_A_LINE;
a->u.line = lno; a->u.line = lno;
} }
else if (c == QSE_T('\\'))
{
/* TODO */
}
else else
{ {
a->type = QSE_SED_A_NONE; a->type = QSE_SED_A_NONE;
} }
return cp; return ptr;
} }
static void fcomp (const qse_char_t* str) static const qse_char_t* command (
qse_sed_t* sed, const qse_char_t* ptr, const qse_char_t* end)
{ {
const qse_char_t* cp = str; qse_cint_t c;
c = CC (ptr, end);
switch (c)
{
default:
sed->errnum = QSE_SED_ECMDNR;
return QSE_NULL;
#if 0
case QSE_T('{'):
cmd = QSE_SED_C_B;
break;
case QSE_T('}'):
break;
case QSE_T('='):
cmd = QSE_SED_C_EQ;
if (ad2.type != QSE_SED_A_NONE)
{
sed->errnum = QSE_SED_EA2NNC;
return QSE_NULL;
}
break;
#endif
case QSE_T(':'):
break;
case QSE_T('a'):
break;
case QSE_T('c'):
break;
case QSE_T('i'):
break;
case QSE_T('g'):
break;
case QSE_T('G'):
break;
case QSE_T('h'):
break;
case QSE_T('H'):
break;
case QSE_T('t'):
break;
case QSE_T('b'):
break;
case QSE_T('n'):
break;
case QSE_T('N'):
break;
case QSE_T('p'):
break;
case QSE_T('P'):
break;
case QSE_T('r'):
break;
case QSE_T('d'):
break;
case QSE_T('D'):
break;
case QSE_T('q'):
break;
case QSE_T('l'):
break;
case QSE_T('s'):
break;
case QSE_T('w'):
break;
case QSE_T('x'):
break;
case QSE_T('y'):
break;
}
return ptr;
}
static const qse_char_t* fcomp (
qse_sed_t* sed, const qse_char_t* ptr, qse_size_t len)
{
qse_cint_t c;
const qse_char_t* end = ptr + len;
qse_sed_a_t a1, a2;
while (1) while (1)
{ {
/* TODO: should use ISSPACE()? or is it enough to c = CC (ptr, end);
* check for a ' ' and '\t' because the input 'str'
* is just a line
*/
while (*cp == QSE_T(' ') || *cp == QSE_T('\t')) cp++;
if (*cp == QSE_T('\0') || *cp == QSE_T('#')) break; /* skip white spaces */
while (c == QSE_T(' ') || c == QSE_T('\t'))
if (*cp == QSE_T(';'))
{ {
cp++; ptr++;
c = CC (ptr, end);
}
/* check if it has reached the end or is commented */
if (c == QSE_CHAR_EOF || c == QSE_T('#')) break;
if (c == QSE_T(';'))
{
ptr++;
continue; continue;
} }
cp = address (sed, cp/*, &rep->ad1*/); /* process address */
ptr = address (sed, ptr, end, &a1);
if (ptr == QSE_NULL) return QSE_NULL;
c = CC (ptr, end);
if (a1.type != QSE_SED_A_NONE)
{
/* if (a1.type == QSE_SED_A_LAST)
{
// TODO: ????
} */
if (c == QSE_T(',') || c == QSE_T(';'))
{
ptr++;
ptr = address (sed, ptr, end, &a2);
if (ptr == QSE_NULL) return QSE_NULL;
c = CC (ptr, end);
}
else a2.type = QSE_SED_A_NONE;
}
/* skip white spaces */
while (c == QSE_T(' ') || c == QSE_T('\t'))
{
ptr++;
c = CC (ptr, end);
}
if (c == QSE_T('!'))
{
/* negate */
}
ptr = command (sed, ptr, end);
if (ptr == QSE_NULL) return QSE_NULL;
} }
return ptr;
} }

View File

@ -32,10 +32,9 @@ struct qse_sed_a_t
enum enum
{ {
QSE_SED_A_NONE, /* no address */ QSE_SED_A_NONE, /* no address */
QSE_SED_A_DOL, /* $ */ QSE_SED_A_DOL, /* $ - last line */
QSE_SED_A_LINE, /* line # */ QSE_SED_A_LINE, /* specified line */
QSE_SED_A_REX, /* regular expression */ QSE_SED_A_REX /* lines matching regular expression */
QSE_SED_A_LAST /* the last regular expression */
} type; } type;
union union
@ -59,6 +58,37 @@ struct qse_sed_c_t
} u; } u;
qse_char_t* rhs; /* right-hand side of sustitution */ qse_char_t* rhs; /* right-hand side of sustitution */
enum
{
QSE_SED_C_A,
QSE_SED_C_B,
QSE_SED_C_C,
QSE_SED_C_CD,
QSE_SED_C_CN,
QSE_SED_C_CO,
QSE_SED_C_CP,
QSE_SED_C_D,
QSE_SED_C_E,
QSE_SED_C_EQ,
QSE_SED_C_F,
QSE_SED_C_G,
QSE_SED_C_CG,
QSE_SED_C_H,
QSE_SED_C_CH,
QSE_SED_C_I,
QSE_SED_C_L,
QSE_SED_C_N,
QSE_SED_C_P,
QSE_SED_C_Q,
QSE_SED_C_R,
QSE_SED_C_S,
QSE_SED_C_T,
QSE_SED_C_W,
QSE_SED_C_CW,
QSE_SED_C_Y,
QSE_SED_C_X
} cmd;
}; };
struct qse_sed_l_t struct qse_sed_l_t
@ -69,5 +99,4 @@ struct qse_sed_l_t
}; };
#endif #endif