added QSE_SED_EXTENDEDADR

deleted QSE_SED_ZEROA1 and QSE_SED_STARTSTEP
added actual code for more ~extended address formats
fixed the 'divide-by-zero' bug caused by the command 'start~0'
added more test scripts for sed
This commit is contained in:
hyung-hwan 2011-09-29 00:31:17 +00:00
parent 84cb2b81d8
commit 7a246a02ef
13 changed files with 250 additions and 33 deletions

View File

@ -114,10 +114,10 @@ static void print_usage (QSE_FILE* out, int argc, qse_char_t* argv[])
qse_fprintf (out, QSE_T(" -R enable non-standard extensions to the regular expression\n"));
qse_fprintf (out, QSE_T(" -s processes input files separately\n"));
qse_fprintf (out, QSE_T(" -a perform strict address and label check\n"));
qse_fprintf (out, QSE_T(" -w allow address format of start~step\n"));
qse_fprintf (out, QSE_T(" -w allow extended address formats\n"));
qse_fprintf (out, QSE_T(" <start~step>,<start,+line>,<start,~line>,<0,/regex/>\n"));
qse_fprintf (out, QSE_T(" -x allow text on the same line as c, a, i\n"));
qse_fprintf (out, QSE_T(" -y ensure a newline at text end\n"));
qse_fprintf (out, QSE_T(" -z allow 0,/regex/ address\n"));
qse_fprintf (out, QSE_T(" -m number specify the maximum amount of memory to use in bytes\n"));
#if defined(QSE_BUILD_DEBUG)
qse_fprintf (out, QSE_T(" -X number fail the number'th memory allocation\n"));
@ -129,9 +129,9 @@ static int handle_args (int argc, qse_char_t* argv[])
static qse_opt_t opt =
{
#if defined(QSE_BUILD_DEBUG)
QSE_T("hnf:o:rRsawxyzm:X:"),
QSE_T("hnf:o:rRsawxym:X:"),
#else
QSE_T("hnf:o:rRsawxyzm:"),
QSE_T("hnf:o:rRsawxym:"),
#endif
QSE_NULL
};
@ -194,7 +194,7 @@ static int handle_args (int argc, qse_char_t* argv[])
break;
case QSE_T('w'):
g_option |= QSE_SED_STARTSTEP;
g_option |= QSE_SED_EXTENDEDADR;
break;
case QSE_T('x'):
@ -205,10 +205,6 @@ static int handle_args (int argc, qse_char_t* argv[])
g_option |= QSE_SED_ENSURENL;
break;
case QSE_T('z'):
g_option |= QSE_SED_ZEROA1;
break;
case QSE_T('m'):
g_memlimit = qse_strtoulong (opt.arg);
break;

View File

@ -142,8 +142,7 @@ enum qse_sed_option_t
QSE_SED_ENSURENL = (1 << 2), /**< ensure NL at the text end */
QSE_SED_QUIET = (1 << 3), /**< do not print pattern space */
QSE_SED_STRICT = (1 << 4), /**< do strict address and label check */
QSE_SED_STARTSTEP = (1 << 5), /**< allow start~step */
QSE_SED_ZEROA1 = (1 << 6), /**< allow 0,/regex/ */
QSE_SED_EXTENDEDADR = (1 << 5), /**< allow start~step , addr1,+line, addr1,~line */
QSE_SED_SAMELINE = (1 << 7), /**< allow text on the same line as c, a, i */
QSE_SED_EXTENDEDREX = (1 << 8), /**< use extended regex */
QSE_SED_NONSTDEXTREX = (1 << 9) /**< enable non-standard extensions to regex */

View File

@ -747,7 +747,7 @@ static QSE_INLINE void* compile_rex_address (qse_sed_t* sed, qse_char_t rxend)
return build_rex (sed, QSE_STR_CSTR(&sed->tmp.rex), ignorecase, &sed->src.loc);
}
static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a)
static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a, int extended)
{
qse_cint_t c;
@ -794,6 +794,28 @@ static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a)
a->type = QSE_SED_ADR_REX;
NXTSC (sed);
}
else if (extended && (c == QSE_T('+') || c == QSE_T('~')))
{
qse_size_t lno = 0;
a->type = (c == QSE_T('+'))? QSE_SED_ADR_RELLINE: QSE_SED_ADR_RELLINEM;
NXTSC (sed);
if (!((c = CURSC(sed)) >= QSE_T('0') && c <= QSE_T('9')))
{
SETERR0 (sed, QSE_SED_EA2MOI, &sed->src.loc);
return QSE_NULL;
}
do
{
lno = lno * 10 + c - QSE_T('0');
NXTSC (sed);
}
while ((c = CURSC(sed)) >= QSE_T('0') && c <= QSE_T('9'));
a->u.lno = lno;
}
else
{
a->type = QSE_SED_ADR_NONE;
@ -1633,7 +1655,7 @@ int qse_sed_comp (qse_sed_t* sed, const qse_char_t* sptr, qse_size_t slen)
QSE_MEMSET (cmd, 0, QSE_SIZEOF(*cmd));
/* process the first address */
if (get_address (sed, &cmd->a1) == QSE_NULL)
if (get_address (sed, &cmd->a1, 0) == QSE_NULL)
{
cmd = QSE_NULL;
SETERR0 (sed, QSE_SED_EA1MOI, &sed->src.loc);
@ -1646,14 +1668,14 @@ int qse_sed_comp (qse_sed_t* sed, const qse_char_t* sptr, qse_size_t slen)
while (IS_SPACE(c)) c = NXTSC (sed);
if (c == QSE_T(',') ||
((sed->option & QSE_SED_STARTSTEP) && c == QSE_T('~')))
((sed->option & QSE_SED_EXTENDEDADR) && c == QSE_T('~')))
{
qse_char_t delim = c;
/* maybe an address range */
do { c = NXTSC (sed); } while (IS_SPACE(c));
if (get_address (sed, &cmd->a2) == QSE_NULL)
if (get_address (sed, &cmd->a2, (sed->option & QSE_SED_EXTENDEDADR)) == QSE_NULL)
{
QSE_ASSERT (cmd->a2.type == QSE_SED_ADR_NONE);
SETERR0 (sed, QSE_SED_EA2MOI, &sed->src.loc);
@ -1667,8 +1689,17 @@ int qse_sed_comp (qse_sed_t* sed, const qse_char_t* sptr, qse_size_t slen)
SETERR0 (sed, QSE_SED_EA2MOI, &sed->src.loc);
goto oops;
}
if (cmd->a2.type == QSE_SED_ADR_RELLINE ||
cmd->a2.type == QSE_SED_ADR_RELLINEM)
{
if (cmd->a2.u.lno <= 0)
{
/* tranform 'addr1,+0' and 'addr1,~0' to 'addr1' */
cmd->a2.type = QSE_SED_ADR_NONE;
}
}
}
else if ((sed->option & QSE_SED_STARTSTEP) &&
else if ((sed->option & QSE_SED_EXTENDEDADR) &&
(delim == QSE_T('~')))
{
if (cmd->a1.type != QSE_SED_ADR_LINE ||
@ -1678,7 +1709,15 @@ int qse_sed_comp (qse_sed_t* sed, const qse_char_t* sptr, qse_size_t slen)
goto oops;
}
cmd->a2.type = QSE_SED_ADR_STEP;
if (cmd->a2.u.lno > 0)
{
cmd->a2.type = QSE_SED_ADR_STEP;
}
else
{
/* transform 'X,~0' to 'X' */
cmd->a2.type = QSE_SED_ADR_NONE;
}
}
c = CURSC (sed);
@ -1686,15 +1725,24 @@ int qse_sed_comp (qse_sed_t* sed, const qse_char_t* sptr, qse_size_t slen)
else cmd->a2.type = QSE_SED_ADR_NONE;
}
if (cmd->a2.type != QSE_SED_ADR_STEP &&
cmd->a1.type == QSE_SED_ADR_LINE &&
cmd->a1.u.lno <= 0)
if (cmd->a1.type == QSE_SED_ADR_LINE && cmd->a1.u.lno <= 0)
{
if (!(sed->option & QSE_SED_ZEROA1) ||
cmd->a2.type != QSE_SED_ADR_REX)
if (cmd->a2.type == QSE_SED_ADR_STEP ||
((sed->option & QSE_SED_EXTENDEDADR) && cmd->a2.type == QSE_SED_ADR_REX))
{
/* 0 as the first address is allowed in this two contexts.
* 0~step
* 0,/regex/
* however, '0~0' is not allowed. but at this point '0~0' is
* already transformed to '0'. and disallowing it is achieved
* gratuitously.
*/
/* nothing to do - adding negation to the condition dropped
* code readability so i decided to write this part of code this way.
*/
}
else
{
/* 0 is not allowed as a normal line number.
* 0,/regex/ is allowed */
SETERR0 (sed, QSE_SED_EA1MOI, &sed->src.loc);
goto oops;
}
@ -2636,6 +2684,29 @@ static int match_a (qse_sed_t* sed, qse_sed_cmd_t* cmd, qse_sed_adr_t* a)
}
}
case QSE_SED_ADR_RELLINE:
/* this address type should be seen only when matching
* the second address */
QSE_ASSERT (cmd->state.a1_matched && cmd->state.a1_match_line >= 1);
return (sed->e.in.num >= cmd->state.a1_match_line + a->u.lno)? 1: 0;
case QSE_SED_ADR_RELLINEM:
{
/* this address type should be seen only when matching
* the second address */
qse_size_t tmp;
QSE_ASSERT (cmd->state.a1_matched && cmd->state.a1_match_line >= 1);
QSE_ASSERT (a->u.lno > 0);
/* TODO: is it better to store this value some in the state
* not to calculate this every time?? */
tmp = (cmd->state.a1_match_line + a->u.lno) -
(cmd->state.a1_match_line % a->u.lno);
return (sed->e.in.num >= tmp)? 1: 0;
}
default:
QSE_ASSERT (a->type == QSE_SED_ADR_NONE);
return 1; /* match */
@ -2662,6 +2733,7 @@ static int match_address (qse_sed_t* sed, qse_sed_cmd_t* cmd)
/* stepping address */
cmd->state.c_ready = 1;
if (sed->e.in.num < cmd->a1.u.lno) return 0;
QSE_ASSERT (cmd->a2.u.lno > 0);
if ((sed->e.in.num - cmd->a1.u.lno) % cmd->a2.u.lno == 0) return 1;
return 0;
}
@ -2677,7 +2749,18 @@ static int match_address (qse_sed_t* sed, qse_sed_cmd_t* cmd)
if (cmd->a2.type == QSE_SED_ADR_LINE &&
sed->e.in.num > cmd->a2.u.lno)
{
/* exit the range */
/* This check is needed because matching of the second
* address could be skipped while it could match.
*
* Consider commands like '1,3p;2N'.
* '3' in '1,3p' is skipped because 'N' in '2N' triggers
* reading of the third line.
*
* Unfortunately, I can't handle a non-line-number
* second address like this. If 'abcxyz' is given as the third
* line for command '1,/abc/p;2N', 'abcxyz' is not matched
* against '/abc/'. so it doesn't exit the range.
*/
cmd->state.a1_matched = 0;
return 0;
}
@ -2714,6 +2797,7 @@ static int match_address (qse_sed_t* sed, qse_sed_cmd_t* cmd)
/* mark that the first is matched so as to
* move on to the range test */
cmd->state.a1_matched = 1;
cmd->state.a1_match_line = sed->e.in.num;
}
return 1;
@ -3070,7 +3154,7 @@ static int init_command_block_for_exec (qse_sed_t* sed, qse_sed_cmd_blk_t* b)
/* clear states */
c->state.a1_matched = 0;
if (sed->option & QSE_SED_ZEROA1)
if (sed->option & QSE_SED_EXTENDEDADR)
{
if (c->a2.type == QSE_SED_ADR_REX &&
c->a1.type == QSE_SED_ADR_LINE &&
@ -3078,6 +3162,7 @@ static int init_command_block_for_exec (qse_sed_t* sed, qse_sed_cmd_blk_t* b)
{
/* special handling for 0,/regex/ */
c->state.a1_matched = 1;
c->state.a1_match_line = 0;
}
}

View File

@ -50,11 +50,13 @@ struct qse_sed_adr_t
{
enum
{
QSE_SED_ADR_NONE, /* no address */
QSE_SED_ADR_DOL, /* $ - last line */
QSE_SED_ADR_LINE, /* specified line */
QSE_SED_ADR_REX, /* lines matching regular expression */
QSE_SED_ADR_STEP /* line steps - only in the second address */
QSE_SED_ADR_NONE, /* no address */
QSE_SED_ADR_DOL, /* $ - last line */
QSE_SED_ADR_LINE, /* specified line */
QSE_SED_ADR_REX, /* lines matching regular expression */
QSE_SED_ADR_STEP, /* line steps - only in the second address */
QSE_SED_ADR_RELLINE, /* relative line - only in second address */
QSE_SED_ADR_RELLINEM /* relative line in the multiples - only in second address */
} type;
union
@ -144,6 +146,8 @@ struct qse_sed_cmd_t
struct
{
int a1_matched;
qse_size_t a1_match_line;
int c_ready;
/* points to the next command for fast traversal and

View File

@ -9,4 +9,6 @@ EXTRA_DIST = \
s003.sed s003.dat \
s004.sed s004.dat \
s005.sed s005.dat \
s006.sed s006.dat
s006.sed s006.dat \
s007.sed s007.dat \
s008.sed s006.dat

View File

@ -205,7 +205,9 @@ EXTRA_DIST = \
s003.sed s003.dat \
s004.sed s004.dat \
s005.sed s005.dat \
s006.sed s006.dat
s006.sed s006.dat \
s007.sed s007.dat \
s008.sed s006.dat
all: all-am

View File

@ -81,3 +81,43 @@ ADDRESS: 45.34.34.33
4
-------------------
5
--------------------------------------------------------------------------------
[CMD] qsesed -f s007.sed s007.dat </dev/stdin 2>&1
--------------------------------------------------------------------------------
# copy binaries to ramdisk
cp -a /bin/* /ramdisk/busybin/
echo -n "done${CRE}"
#COPY DISK TO /ramdisk/cdrom
if [ -n "$FROMINIT" ];then
cp -af /image/* /ramdisk/cdrom/
fi
# copy binaries again to ramdisk
cp -a /bin/* /ramdisk/busybin/
echo -n "done${CRE}"
#COPY DISK TO /ramdisk/cdrom
if [ -n "$FROMINIT" ];then
cp -af /image/* /ramdisk/cdrom/
fi
--------------------------------------------------------------------------------
[CMD] qsesed -w -f s008.sed s008.dat </dev/stdin 2>&1
--------------------------------------------------------------------------------
# copy binaries to ramdisk
cp -a /bin/* /ramdisk/busybin/
echo -n "done${CRE}"
#COPY DISK TO /ramdisk/cdrom
if [ -n "$FROMINIT" ];then
cp -af /image/* /ramdisk/cdrom/
fi
# copy binaries again to ramdisk
cp -a /bin/* /ramdisk/busybin/
echo -n "done${CRE}"
#COPY DISK TO /ramdisk/cdrom
if [ -n "$FROMINIT" ];then
cp -af /image/* /ramdisk/cdrom/
fi

View File

@ -81,3 +81,43 @@ ADDRESS: 45.34.34.33
4
-------------------
5
--------------------------------------------------------------------------------
[CMD] qsesed -m 500000 -f s007.sed s007.dat </dev/stdin 2>&1
--------------------------------------------------------------------------------
# copy binaries to ramdisk
cp -a /bin/* /ramdisk/busybin/
echo -n "done${CRE}"
#COPY DISK TO /ramdisk/cdrom
if [ -n "$FROMINIT" ];then
cp -af /image/* /ramdisk/cdrom/
fi
# copy binaries again to ramdisk
cp -a /bin/* /ramdisk/busybin/
echo -n "done${CRE}"
#COPY DISK TO /ramdisk/cdrom
if [ -n "$FROMINIT" ];then
cp -af /image/* /ramdisk/cdrom/
fi
--------------------------------------------------------------------------------
[CMD] qsesed -m 500000 -w -f s008.sed s008.dat </dev/stdin 2>&1
--------------------------------------------------------------------------------
# copy binaries to ramdisk
cp -a /bin/* /ramdisk/busybin/
echo -n "done${CRE}"
#COPY DISK TO /ramdisk/cdrom
if [ -n "$FROMINIT" ];then
cp -af /image/* /ramdisk/cdrom/
fi
# copy binaries again to ramdisk
cp -a /bin/* /ramdisk/busybin/
echo -n "done${CRE}"
#COPY DISK TO /ramdisk/cdrom
if [ -n "$FROMINIT" ];then
cp -af /image/* /ramdisk/cdrom/
fi

View File

@ -62,6 +62,8 @@ PROGS="
s004.sed/s004.dat//
s005.sed/s005.dat//
s006.sed/s006.dat//
s007.sed/s007.dat//
s008.sed/s008.dat//-w
"
[ -x "${QSESED}" ] ||

9
qse/regress/sed/s007.dat Normal file
View File

@ -0,0 +1,9 @@
# copy binaries to ramdisk
cp -a /bin/* /ramdisk/busybin/
echo -n "done${CRE}"
# copy binaries again to ramdisk
cp -a /bin/* /ramdisk/busybin/
echo -n "done${CRE}"

18
qse/regress/sed/s007.sed Normal file
View File

@ -0,0 +1,18 @@
#
# Taken from message #8918 by Davide Brini in the sed-users mailing list
#
$!N
\|^cp -a /bin/\* /ramdisk/busybin/\necho -n "done\${CRE}"$| {
s|$|\
#COPY DISK TO /ramdisk/cdrom\
if [ -n "$FROMINIT" ];then\
cp -af /image/* /ramdisk/cdrom/\
fi|
p
d
}
P
D

9
qse/regress/sed/s008.dat Normal file
View File

@ -0,0 +1,9 @@
# copy binaries to ramdisk
cp -a /bin/* /ramdisk/busybin/
echo -n "done${CRE}"
# copy binaries again to ramdisk
cp -a /bin/* /ramdisk/busybin/
echo -n "done${CRE}"

11
qse/regress/sed/s008.sed Normal file
View File

@ -0,0 +1,11 @@
#
# Taken from message #8926 by nburns1980 in the sed-users mailing list
#
\|cp -a /bin/\* /ramdisk/busybin/|,+1 {
//n
\|echo -n "done\${CRE}"| a\
#COPY DISK TO /ramdisk/cdrom\
if [ -n "$FROMINIT" ];then\
cp -af /image/* /ramdisk/cdrom/\
fi
}