added some routines to qse_sed_c

This commit is contained in:
hyung-hwan 2009-02-09 08:40:19 +00:00
parent 41b324b7a7
commit b1211d6e58
3 changed files with 279 additions and 48 deletions

View File

@ -21,12 +21,30 @@
#include <qse/types.h>
#include <qse/macros.h>
#include <qse/cmn/str.h>
enum qse_sed_errnum_t
{
QSE_SED_ENOERR, /* no error */
QSE_SED_ENOMEM, /* no memory */
QSE_SED_ETMTXT, /* too much text */
QSE_SED_ECMDNR, /* command not recognized */
QSE_SED_ECMDGB, /* command garbled */
QSE_SED_ELBLTL, /* label too long */
QSE_SED_EREXBL, /* regular expression build error */
QSE_SED_EA2NNC, /* address 2 not necessary */
};
typedef struct qse_sed_t qse_sed_t;
typedef enum qse_sed_errnum_t qse_sed_errnum_t;
struct qse_sed_t
{
QSE_DEFINE_COMMON_FIELDS (sed)
qse_sed_errnum_t errnum;
void* lastrex;
qse_str_t rexbuf; /* temporary regular expression buffer */
};

View File

@ -18,6 +18,7 @@
#include "sed.h"
#include "../cmn/mem.h"
#include <qse/cmn/rex.h>
QSE_IMPLEMENT_COMMON_FUNCTIONS (sed)
@ -58,69 +59,101 @@ qse_sed_t* qse_sed_init (qse_sed_t* sed, qse_mmgr_t* mmgr)
QSE_MEMSET (sed, 0, sizeof(*sed));
sed->mmgr = mmgr;
if (qse_str_init (&sed->rexbuf, mmgr, 0) == QSE_NULL)
{
sed->errnum = QSE_SED_ENOMEM;
return QSE_NULL;
}
return sed;
}
void qse_sed_fini (qse_sed_t* sed)
{
qse_str_fini (&sed->rexbuf);
}
static void compile (qse_sed_t* sed, const qse_char_t* cp, qse_char_t seof)
/* get the current character without advancing the pointer */
#define CC(ptr,end) ((ptr < end)? *ptr: QSE_CHAR_EOF)
/* get the current character advancing the pointer */
#define NC(ptr,end) ((ptr < end)? *ptr++: QSE_CHAR_EOF)
static const void* compile (
qse_sed_t* sed, const qse_char_t* ptr,
const qse_char_t* end, qse_char_t seof)
{
qse_char_t c;
void* code;
qse_cint_t c;
if ((c = *cp++) == seof) return QSE_NULL; /* // */
qse_str_clear (&sed->rexbuf);
do
for (;;)
{
if (c == QSE_T('\0') || c == QSE_T('\n'))
c = NC (ptr, end);
if (c == QSE_CHAR_EOF || c == QSE_T('\n'))
{
/* premature end of text */
return QSE_NULL; /* TODO: return an error..*/
sed->errnum = QSE_SED_ETMTXT;
return QSE_NULL;
}
if (c == QSE_T('\\')
if (c == seof) break;
if (c == QSE_T('\\'))
{
if (ep >= end)
c = NC (ptr, end);
if (c == QSE_CHAR_EOF || c == QSE_T('\n'))
{
/* too many characters */
return QSE_NULL; /* TODO: return an error..*/
sed->errnum = QSE_SED_ETMTXT;
return QSE_NULL;
}
*ep++ = c;
/* TODO: more escaped characters */
if ((c = *cp++) == QSE_T('n') c = QSE_T('n');
if (c == QSE_T('n')) c = QSE_T('\n');
// TODO: support more escaped characters??
}
if (ep >= end)
if (qse_str_ccat (&sed->rexbuf, c) == (qse_size_t)-1)
{
/* too many characters */
return QSE_NULL; /* TODO: return an error..*/
sed->errnum = QSE_SED_ENOMEM;
return QSE_NULL;
}
}
*ep++ = c;
/* TODO: maximum depth - optionize the second parameter */
code = qse_buildrex (
sed->mmgr, 0,
QSE_STR_PTR(&sed->rexbuf),
QSE_STR_LEN(&sed->rexbuf),
QSE_NULL);
if (code == QSE_NULL)
{
sed->errnum = QSE_SED_EREXBL;
return QSE_NULL;
}
while ((c = *cp++) != seof);
*ep = QSE_T('\0');
regcomp (expbuf);
sed->lastrex = code;
return ptr;
}
static const qse_char_t* address (
qse_sed_t* sed, const qse_char_t* cp, qse_sed_a_t* a)
qse_sed_t* sed, const qse_char_t* ptr,
const qse_char_t* end, qse_sed_a_t* a)
{
qse_char_t c;
qse_cint_t c;
if ((c = *cp) == QSE_T('$'))
c = NC (ptr, end);
if ((c = *ptr) == QSE_T('$'))
{
a->type = QSE_SED_A_DOL;
cp++;
ptr++;
}
else if (c == QSE_T('/'))
{
cp++;
a->type = (a->u.rex = compile(sed, c))? A_RE: A_LAST;
ptr++;
if (compile (sed, ptr, end, c) == QSE_NULL)
return QSE_NULL;
a->u.rex = sed->lastrex;
a->type = QSE_SED_A_REX;
}
else if (c >= QSE_T('0') && c <= QSE_T('9'))
{
@ -128,9 +161,9 @@ static const qse_char_t* address (
do
{
lno = lno * 10 + c - QSE_T('0');
cp++;
ptr++;
}
while ((c = *cp) >= QSE_T('0') && c <= QSE_T('9'))
while ((c = *ptr) >= QSE_T('0') && c <= QSE_T('9'));
/* line number 0 is illegal */
if (lno == 0) return QSE_NULL;
@ -138,35 +171,186 @@ static const qse_char_t* address (
a->type = QSE_SED_A_LINE;
a->u.line = lno;
}
else if (c == QSE_T('\\'))
{
/* TODO */
}
else
{
a->type = QSE_SED_A_NONE;
}
return cp;
return ptr;
}
static void fcomp (const qse_char_t* str)
static const qse_char_t* command (
qse_sed_t* sed, const qse_char_t* ptr, const qse_char_t* end)
{
const qse_char_t* cp = str;
qse_cint_t c;
c = CC (ptr, end);
switch (c)
{
default:
sed->errnum = QSE_SED_ECMDNR;
return QSE_NULL;
#if 0
case QSE_T('{'):
cmd = QSE_SED_C_B;
break;
case QSE_T('}'):
break;
case QSE_T('='):
cmd = QSE_SED_C_EQ;
if (ad2.type != QSE_SED_A_NONE)
{
sed->errnum = QSE_SED_EA2NNC;
return QSE_NULL;
}
break;
#endif
case QSE_T(':'):
break;
case QSE_T('a'):
break;
case QSE_T('c'):
break;
case QSE_T('i'):
break;
case QSE_T('g'):
break;
case QSE_T('G'):
break;
case QSE_T('h'):
break;
case QSE_T('H'):
break;
case QSE_T('t'):
break;
case QSE_T('b'):
break;
case QSE_T('n'):
break;
case QSE_T('N'):
break;
case QSE_T('p'):
break;
case QSE_T('P'):
break;
case QSE_T('r'):
break;
case QSE_T('d'):
break;
case QSE_T('D'):
break;
case QSE_T('q'):
break;
case QSE_T('l'):
break;
case QSE_T('s'):
break;
case QSE_T('w'):
break;
case QSE_T('x'):
break;
case QSE_T('y'):
break;
}
return ptr;
}
static const qse_char_t* fcomp (
qse_sed_t* sed, const qse_char_t* ptr, qse_size_t len)
{
qse_cint_t c;
const qse_char_t* end = ptr + len;
qse_sed_a_t a1, a2;
while (1)
{
/* TODO: should use ISSPACE()? or is it enough to
* check for a ' ' and '\t' because the input 'str'
* is just a line
*/
while (*cp == QSE_T(' ') || *cp == QSE_T('\t')) cp++;
c = CC (ptr, end);
if (*cp == QSE_T('\0') || *cp == QSE_T('#')) break;
if (*cp == QSE_T(';'))
/* skip white spaces */
while (c == QSE_T(' ') || c == QSE_T('\t'))
{
cp++;
ptr++;
c = CC (ptr, end);
}
/* check if it has reached the end or is commented */
if (c == QSE_CHAR_EOF || c == QSE_T('#')) break;
if (c == QSE_T(';'))
{
ptr++;
continue;
}
cp = address (sed, cp/*, &rep->ad1*/);
/* process address */
ptr = address (sed, ptr, end, &a1);
if (ptr == QSE_NULL) return QSE_NULL;
c = CC (ptr, end);
if (a1.type != QSE_SED_A_NONE)
{
/* if (a1.type == QSE_SED_A_LAST)
{
// TODO: ????
} */
if (c == QSE_T(',') || c == QSE_T(';'))
{
ptr++;
ptr = address (sed, ptr, end, &a2);
if (ptr == QSE_NULL) return QSE_NULL;
c = CC (ptr, end);
}
else a2.type = QSE_SED_A_NONE;
}
/* skip white spaces */
while (c == QSE_T(' ') || c == QSE_T('\t'))
{
ptr++;
c = CC (ptr, end);
}
if (c == QSE_T('!'))
{
/* negate */
}
ptr = command (sed, ptr, end);
if (ptr == QSE_NULL) return QSE_NULL;
}
return ptr;
}

View File

@ -32,10 +32,9 @@ struct qse_sed_a_t
enum
{
QSE_SED_A_NONE, /* no address */
QSE_SED_A_DOL, /* $ */
QSE_SED_A_LINE, /* line # */
QSE_SED_A_REX, /* regular expression */
QSE_SED_A_LAST /* the last regular expression */
QSE_SED_A_DOL, /* $ - last line */
QSE_SED_A_LINE, /* specified line */
QSE_SED_A_REX /* lines matching regular expression */
} type;
union
@ -59,6 +58,37 @@ struct qse_sed_c_t
} u;
qse_char_t* rhs; /* right-hand side of sustitution */
enum
{
QSE_SED_C_A,
QSE_SED_C_B,
QSE_SED_C_C,
QSE_SED_C_CD,
QSE_SED_C_CN,
QSE_SED_C_CO,
QSE_SED_C_CP,
QSE_SED_C_D,
QSE_SED_C_E,
QSE_SED_C_EQ,
QSE_SED_C_F,
QSE_SED_C_G,
QSE_SED_C_CG,
QSE_SED_C_H,
QSE_SED_C_CH,
QSE_SED_C_I,
QSE_SED_C_L,
QSE_SED_C_N,
QSE_SED_C_P,
QSE_SED_C_Q,
QSE_SED_C_R,
QSE_SED_C_S,
QSE_SED_C_T,
QSE_SED_C_W,
QSE_SED_C_CW,
QSE_SED_C_Y,
QSE_SED_C_X
} cmd;
};
struct qse_sed_l_t
@ -69,5 +99,4 @@ struct qse_sed_l_t
};
#endif