From 7a246a02ef0ed82694032cb7438104a8b03f3e4c Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Thu, 29 Sep 2011 00:31:17 +0000 Subject: [PATCH] added QSE_SED_EXTENDEDADR deleted QSE_SED_ZEROA1 and QSE_SED_STARTSTEP added actual code for more ~extended address formats fixed the 'divide-by-zero' bug caused by the command 'start~0' added more test scripts for sed --- qse/cmd/sed/sed.c | 14 ++-- qse/include/qse/sed/sed.h | 3 +- qse/lib/sed/sed.c | 115 +++++++++++++++++++++++++++----- qse/lib/sed/sed.h | 14 ++-- qse/regress/sed/Makefile.am | 4 +- qse/regress/sed/Makefile.in | 4 +- qse/regress/sed/regress.out | 40 +++++++++++ qse/regress/sed/regress.out.xma | 40 +++++++++++ qse/regress/sed/regress.sh.in | 2 + qse/regress/sed/s007.dat | 9 +++ qse/regress/sed/s007.sed | 18 +++++ qse/regress/sed/s008.dat | 9 +++ qse/regress/sed/s008.sed | 11 +++ 13 files changed, 250 insertions(+), 33 deletions(-) create mode 100644 qse/regress/sed/s007.dat create mode 100644 qse/regress/sed/s007.sed create mode 100644 qse/regress/sed/s008.dat create mode 100644 qse/regress/sed/s008.sed diff --git a/qse/cmd/sed/sed.c b/qse/cmd/sed/sed.c index 1ac2be24..bd6b7813 100644 --- a/qse/cmd/sed/sed.c +++ b/qse/cmd/sed/sed.c @@ -114,10 +114,10 @@ static void print_usage (QSE_FILE* out, int argc, qse_char_t* argv[]) qse_fprintf (out, QSE_T(" -R enable non-standard extensions to the regular expression\n")); qse_fprintf (out, QSE_T(" -s processes input files separately\n")); qse_fprintf (out, QSE_T(" -a perform strict address and label check\n")); - qse_fprintf (out, QSE_T(" -w allow address format of start~step\n")); + qse_fprintf (out, QSE_T(" -w allow extended address formats\n")); + qse_fprintf (out, QSE_T(" ,,,<0,/regex/>\n")); qse_fprintf (out, QSE_T(" -x allow text on the same line as c, a, i\n")); qse_fprintf (out, QSE_T(" -y ensure a newline at text end\n")); - qse_fprintf (out, QSE_T(" -z allow 0,/regex/ address\n")); qse_fprintf (out, QSE_T(" -m number specify the maximum amount of memory to use in bytes\n")); #if defined(QSE_BUILD_DEBUG) qse_fprintf (out, QSE_T(" -X number fail the number'th memory allocation\n")); @@ -129,9 +129,9 @@ static int handle_args (int argc, qse_char_t* argv[]) static qse_opt_t opt = { #if defined(QSE_BUILD_DEBUG) - QSE_T("hnf:o:rRsawxyzm:X:"), + QSE_T("hnf:o:rRsawxym:X:"), #else - QSE_T("hnf:o:rRsawxyzm:"), + QSE_T("hnf:o:rRsawxym:"), #endif QSE_NULL }; @@ -194,7 +194,7 @@ static int handle_args (int argc, qse_char_t* argv[]) break; case QSE_T('w'): - g_option |= QSE_SED_STARTSTEP; + g_option |= QSE_SED_EXTENDEDADR; break; case QSE_T('x'): @@ -205,10 +205,6 @@ static int handle_args (int argc, qse_char_t* argv[]) g_option |= QSE_SED_ENSURENL; break; - case QSE_T('z'): - g_option |= QSE_SED_ZEROA1; - break; - case QSE_T('m'): g_memlimit = qse_strtoulong (opt.arg); break; diff --git a/qse/include/qse/sed/sed.h b/qse/include/qse/sed/sed.h index dbf25504..9a9c6a19 100644 --- a/qse/include/qse/sed/sed.h +++ b/qse/include/qse/sed/sed.h @@ -142,8 +142,7 @@ enum qse_sed_option_t QSE_SED_ENSURENL = (1 << 2), /**< ensure NL at the text end */ QSE_SED_QUIET = (1 << 3), /**< do not print pattern space */ QSE_SED_STRICT = (1 << 4), /**< do strict address and label check */ - QSE_SED_STARTSTEP = (1 << 5), /**< allow start~step */ - QSE_SED_ZEROA1 = (1 << 6), /**< allow 0,/regex/ */ + QSE_SED_EXTENDEDADR = (1 << 5), /**< allow start~step , addr1,+line, addr1,~line */ QSE_SED_SAMELINE = (1 << 7), /**< allow text on the same line as c, a, i */ QSE_SED_EXTENDEDREX = (1 << 8), /**< use extended regex */ QSE_SED_NONSTDEXTREX = (1 << 9) /**< enable non-standard extensions to regex */ diff --git a/qse/lib/sed/sed.c b/qse/lib/sed/sed.c index 3e01b8e2..2b8b7ed1 100644 --- a/qse/lib/sed/sed.c +++ b/qse/lib/sed/sed.c @@ -747,7 +747,7 @@ static QSE_INLINE void* compile_rex_address (qse_sed_t* sed, qse_char_t rxend) return build_rex (sed, QSE_STR_CSTR(&sed->tmp.rex), ignorecase, &sed->src.loc); } -static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a) +static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a, int extended) { qse_cint_t c; @@ -794,6 +794,28 @@ static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a) a->type = QSE_SED_ADR_REX; NXTSC (sed); } + else if (extended && (c == QSE_T('+') || c == QSE_T('~'))) + { + qse_size_t lno = 0; + + a->type = (c == QSE_T('+'))? QSE_SED_ADR_RELLINE: QSE_SED_ADR_RELLINEM; + + NXTSC (sed); + if (!((c = CURSC(sed)) >= QSE_T('0') && c <= QSE_T('9'))) + { + SETERR0 (sed, QSE_SED_EA2MOI, &sed->src.loc); + return QSE_NULL; + } + + do + { + lno = lno * 10 + c - QSE_T('0'); + NXTSC (sed); + } + while ((c = CURSC(sed)) >= QSE_T('0') && c <= QSE_T('9')); + + a->u.lno = lno; + } else { a->type = QSE_SED_ADR_NONE; @@ -1633,7 +1655,7 @@ int qse_sed_comp (qse_sed_t* sed, const qse_char_t* sptr, qse_size_t slen) QSE_MEMSET (cmd, 0, QSE_SIZEOF(*cmd)); /* process the first address */ - if (get_address (sed, &cmd->a1) == QSE_NULL) + if (get_address (sed, &cmd->a1, 0) == QSE_NULL) { cmd = QSE_NULL; SETERR0 (sed, QSE_SED_EA1MOI, &sed->src.loc); @@ -1646,14 +1668,14 @@ int qse_sed_comp (qse_sed_t* sed, const qse_char_t* sptr, qse_size_t slen) while (IS_SPACE(c)) c = NXTSC (sed); if (c == QSE_T(',') || - ((sed->option & QSE_SED_STARTSTEP) && c == QSE_T('~'))) + ((sed->option & QSE_SED_EXTENDEDADR) && c == QSE_T('~'))) { qse_char_t delim = c; /* maybe an address range */ do { c = NXTSC (sed); } while (IS_SPACE(c)); - if (get_address (sed, &cmd->a2) == QSE_NULL) + if (get_address (sed, &cmd->a2, (sed->option & QSE_SED_EXTENDEDADR)) == QSE_NULL) { QSE_ASSERT (cmd->a2.type == QSE_SED_ADR_NONE); SETERR0 (sed, QSE_SED_EA2MOI, &sed->src.loc); @@ -1667,8 +1689,17 @@ int qse_sed_comp (qse_sed_t* sed, const qse_char_t* sptr, qse_size_t slen) SETERR0 (sed, QSE_SED_EA2MOI, &sed->src.loc); goto oops; } + if (cmd->a2.type == QSE_SED_ADR_RELLINE || + cmd->a2.type == QSE_SED_ADR_RELLINEM) + { + if (cmd->a2.u.lno <= 0) + { + /* tranform 'addr1,+0' and 'addr1,~0' to 'addr1' */ + cmd->a2.type = QSE_SED_ADR_NONE; + } + } } - else if ((sed->option & QSE_SED_STARTSTEP) && + else if ((sed->option & QSE_SED_EXTENDEDADR) && (delim == QSE_T('~'))) { if (cmd->a1.type != QSE_SED_ADR_LINE || @@ -1678,7 +1709,15 @@ int qse_sed_comp (qse_sed_t* sed, const qse_char_t* sptr, qse_size_t slen) goto oops; } - cmd->a2.type = QSE_SED_ADR_STEP; + if (cmd->a2.u.lno > 0) + { + cmd->a2.type = QSE_SED_ADR_STEP; + } + else + { + /* transform 'X,~0' to 'X' */ + cmd->a2.type = QSE_SED_ADR_NONE; + } } c = CURSC (sed); @@ -1686,15 +1725,24 @@ int qse_sed_comp (qse_sed_t* sed, const qse_char_t* sptr, qse_size_t slen) else cmd->a2.type = QSE_SED_ADR_NONE; } - if (cmd->a2.type != QSE_SED_ADR_STEP && - cmd->a1.type == QSE_SED_ADR_LINE && - cmd->a1.u.lno <= 0) + if (cmd->a1.type == QSE_SED_ADR_LINE && cmd->a1.u.lno <= 0) { - if (!(sed->option & QSE_SED_ZEROA1) || - cmd->a2.type != QSE_SED_ADR_REX) + if (cmd->a2.type == QSE_SED_ADR_STEP || + ((sed->option & QSE_SED_EXTENDEDADR) && cmd->a2.type == QSE_SED_ADR_REX)) + { + /* 0 as the first address is allowed in this two contexts. + * 0~step + * 0,/regex/ + * however, '0~0' is not allowed. but at this point '0~0' is + * already transformed to '0'. and disallowing it is achieved + * gratuitously. + */ + /* nothing to do - adding negation to the condition dropped + * code readability so i decided to write this part of code this way. + */ + } + else { - /* 0 is not allowed as a normal line number. - * 0,/regex/ is allowed */ SETERR0 (sed, QSE_SED_EA1MOI, &sed->src.loc); goto oops; } @@ -2636,6 +2684,29 @@ static int match_a (qse_sed_t* sed, qse_sed_cmd_t* cmd, qse_sed_adr_t* a) } } + case QSE_SED_ADR_RELLINE: + /* this address type should be seen only when matching + * the second address */ + QSE_ASSERT (cmd->state.a1_matched && cmd->state.a1_match_line >= 1); + return (sed->e.in.num >= cmd->state.a1_match_line + a->u.lno)? 1: 0; + + case QSE_SED_ADR_RELLINEM: + { + /* this address type should be seen only when matching + * the second address */ + qse_size_t tmp; + + QSE_ASSERT (cmd->state.a1_matched && cmd->state.a1_match_line >= 1); + QSE_ASSERT (a->u.lno > 0); + + /* TODO: is it better to store this value some in the state + * not to calculate this every time?? */ + tmp = (cmd->state.a1_match_line + a->u.lno) - + (cmd->state.a1_match_line % a->u.lno); + + return (sed->e.in.num >= tmp)? 1: 0; + } + default: QSE_ASSERT (a->type == QSE_SED_ADR_NONE); return 1; /* match */ @@ -2662,6 +2733,7 @@ static int match_address (qse_sed_t* sed, qse_sed_cmd_t* cmd) /* stepping address */ cmd->state.c_ready = 1; if (sed->e.in.num < cmd->a1.u.lno) return 0; + QSE_ASSERT (cmd->a2.u.lno > 0); if ((sed->e.in.num - cmd->a1.u.lno) % cmd->a2.u.lno == 0) return 1; return 0; } @@ -2677,7 +2749,18 @@ static int match_address (qse_sed_t* sed, qse_sed_cmd_t* cmd) if (cmd->a2.type == QSE_SED_ADR_LINE && sed->e.in.num > cmd->a2.u.lno) { - /* exit the range */ + /* This check is needed because matching of the second + * address could be skipped while it could match. + * + * Consider commands like '1,3p;2N'. + * '3' in '1,3p' is skipped because 'N' in '2N' triggers + * reading of the third line. + * + * Unfortunately, I can't handle a non-line-number + * second address like this. If 'abcxyz' is given as the third + * line for command '1,/abc/p;2N', 'abcxyz' is not matched + * against '/abc/'. so it doesn't exit the range. + */ cmd->state.a1_matched = 0; return 0; } @@ -2714,6 +2797,7 @@ static int match_address (qse_sed_t* sed, qse_sed_cmd_t* cmd) /* mark that the first is matched so as to * move on to the range test */ cmd->state.a1_matched = 1; + cmd->state.a1_match_line = sed->e.in.num; } return 1; @@ -3070,7 +3154,7 @@ static int init_command_block_for_exec (qse_sed_t* sed, qse_sed_cmd_blk_t* b) /* clear states */ c->state.a1_matched = 0; - if (sed->option & QSE_SED_ZEROA1) + if (sed->option & QSE_SED_EXTENDEDADR) { if (c->a2.type == QSE_SED_ADR_REX && c->a1.type == QSE_SED_ADR_LINE && @@ -3078,6 +3162,7 @@ static int init_command_block_for_exec (qse_sed_t* sed, qse_sed_cmd_blk_t* b) { /* special handling for 0,/regex/ */ c->state.a1_matched = 1; + c->state.a1_match_line = 0; } } diff --git a/qse/lib/sed/sed.h b/qse/lib/sed/sed.h index 0ecd6d11..d3c457b3 100644 --- a/qse/lib/sed/sed.h +++ b/qse/lib/sed/sed.h @@ -50,11 +50,13 @@ struct qse_sed_adr_t { enum { - QSE_SED_ADR_NONE, /* no address */ - QSE_SED_ADR_DOL, /* $ - last line */ - QSE_SED_ADR_LINE, /* specified line */ - QSE_SED_ADR_REX, /* lines matching regular expression */ - QSE_SED_ADR_STEP /* line steps - only in the second address */ + QSE_SED_ADR_NONE, /* no address */ + QSE_SED_ADR_DOL, /* $ - last line */ + QSE_SED_ADR_LINE, /* specified line */ + QSE_SED_ADR_REX, /* lines matching regular expression */ + QSE_SED_ADR_STEP, /* line steps - only in the second address */ + QSE_SED_ADR_RELLINE, /* relative line - only in second address */ + QSE_SED_ADR_RELLINEM /* relative line in the multiples - only in second address */ } type; union @@ -144,6 +146,8 @@ struct qse_sed_cmd_t struct { int a1_matched; + qse_size_t a1_match_line; + int c_ready; /* points to the next command for fast traversal and diff --git a/qse/regress/sed/Makefile.am b/qse/regress/sed/Makefile.am index 319d9f3c..fc5797fa 100644 --- a/qse/regress/sed/Makefile.am +++ b/qse/regress/sed/Makefile.am @@ -9,4 +9,6 @@ EXTRA_DIST = \ s003.sed s003.dat \ s004.sed s004.dat \ s005.sed s005.dat \ - s006.sed s006.dat + s006.sed s006.dat \ + s007.sed s007.dat \ + s008.sed s006.dat diff --git a/qse/regress/sed/Makefile.in b/qse/regress/sed/Makefile.in index 479accd4..23e07061 100644 --- a/qse/regress/sed/Makefile.in +++ b/qse/regress/sed/Makefile.in @@ -205,7 +205,9 @@ EXTRA_DIST = \ s003.sed s003.dat \ s004.sed s004.dat \ s005.sed s005.dat \ - s006.sed s006.dat + s006.sed s006.dat \ + s007.sed s007.dat \ + s008.sed s006.dat all: all-am diff --git a/qse/regress/sed/regress.out b/qse/regress/sed/regress.out index f01efe79..4df34041 100644 --- a/qse/regress/sed/regress.out +++ b/qse/regress/sed/regress.out @@ -81,3 +81,43 @@ ADDRESS: 45.34.34.33 4 ------------------- 5 +-------------------------------------------------------------------------------- +[CMD] qsesed -f s007.sed s007.dat &1 +-------------------------------------------------------------------------------- + +# copy binaries to ramdisk +cp -a /bin/* /ramdisk/busybin/ +echo -n "done${CRE}" +#COPY DISK TO /ramdisk/cdrom +if [ -n "$FROMINIT" ];then + cp -af /image/* /ramdisk/cdrom/ +fi + + +# copy binaries again to ramdisk +cp -a /bin/* /ramdisk/busybin/ +echo -n "done${CRE}" +#COPY DISK TO /ramdisk/cdrom +if [ -n "$FROMINIT" ];then + cp -af /image/* /ramdisk/cdrom/ +fi +-------------------------------------------------------------------------------- +[CMD] qsesed -w -f s008.sed s008.dat &1 +-------------------------------------------------------------------------------- + +# copy binaries to ramdisk +cp -a /bin/* /ramdisk/busybin/ +echo -n "done${CRE}" +#COPY DISK TO /ramdisk/cdrom +if [ -n "$FROMINIT" ];then + cp -af /image/* /ramdisk/cdrom/ +fi + + +# copy binaries again to ramdisk +cp -a /bin/* /ramdisk/busybin/ +echo -n "done${CRE}" +#COPY DISK TO /ramdisk/cdrom +if [ -n "$FROMINIT" ];then + cp -af /image/* /ramdisk/cdrom/ +fi diff --git a/qse/regress/sed/regress.out.xma b/qse/regress/sed/regress.out.xma index ddc01c48..554dc3e2 100644 --- a/qse/regress/sed/regress.out.xma +++ b/qse/regress/sed/regress.out.xma @@ -81,3 +81,43 @@ ADDRESS: 45.34.34.33 4 ------------------- 5 +-------------------------------------------------------------------------------- +[CMD] qsesed -m 500000 -f s007.sed s007.dat &1 +-------------------------------------------------------------------------------- + +# copy binaries to ramdisk +cp -a /bin/* /ramdisk/busybin/ +echo -n "done${CRE}" +#COPY DISK TO /ramdisk/cdrom +if [ -n "$FROMINIT" ];then + cp -af /image/* /ramdisk/cdrom/ +fi + + +# copy binaries again to ramdisk +cp -a /bin/* /ramdisk/busybin/ +echo -n "done${CRE}" +#COPY DISK TO /ramdisk/cdrom +if [ -n "$FROMINIT" ];then + cp -af /image/* /ramdisk/cdrom/ +fi +-------------------------------------------------------------------------------- +[CMD] qsesed -m 500000 -w -f s008.sed s008.dat &1 +-------------------------------------------------------------------------------- + +# copy binaries to ramdisk +cp -a /bin/* /ramdisk/busybin/ +echo -n "done${CRE}" +#COPY DISK TO /ramdisk/cdrom +if [ -n "$FROMINIT" ];then + cp -af /image/* /ramdisk/cdrom/ +fi + + +# copy binaries again to ramdisk +cp -a /bin/* /ramdisk/busybin/ +echo -n "done${CRE}" +#COPY DISK TO /ramdisk/cdrom +if [ -n "$FROMINIT" ];then + cp -af /image/* /ramdisk/cdrom/ +fi diff --git a/qse/regress/sed/regress.sh.in b/qse/regress/sed/regress.sh.in index b357eec2..6c66b400 100755 --- a/qse/regress/sed/regress.sh.in +++ b/qse/regress/sed/regress.sh.in @@ -62,6 +62,8 @@ PROGS=" s004.sed/s004.dat// s005.sed/s005.dat// s006.sed/s006.dat// + s007.sed/s007.dat// + s008.sed/s008.dat//-w " [ -x "${QSESED}" ] || diff --git a/qse/regress/sed/s007.dat b/qse/regress/sed/s007.dat new file mode 100644 index 00000000..207e1ceb --- /dev/null +++ b/qse/regress/sed/s007.dat @@ -0,0 +1,9 @@ + +# copy binaries to ramdisk +cp -a /bin/* /ramdisk/busybin/ +echo -n "done${CRE}" + + +# copy binaries again to ramdisk +cp -a /bin/* /ramdisk/busybin/ +echo -n "done${CRE}" diff --git a/qse/regress/sed/s007.sed b/qse/regress/sed/s007.sed new file mode 100644 index 00000000..872a40df --- /dev/null +++ b/qse/regress/sed/s007.sed @@ -0,0 +1,18 @@ +# +# Taken from message #8918 by Davide Brini in the sed-users mailing list +# + +$!N + +\|^cp -a /bin/\* /ramdisk/busybin/\necho -n "done\${CRE}"$| { + s|$|\ +#COPY DISK TO /ramdisk/cdrom\ +if [ -n "$FROMINIT" ];then\ + cp -af /image/* /ramdisk/cdrom/\ +fi| + p + d +} + +P +D diff --git a/qse/regress/sed/s008.dat b/qse/regress/sed/s008.dat new file mode 100644 index 00000000..207e1ceb --- /dev/null +++ b/qse/regress/sed/s008.dat @@ -0,0 +1,9 @@ + +# copy binaries to ramdisk +cp -a /bin/* /ramdisk/busybin/ +echo -n "done${CRE}" + + +# copy binaries again to ramdisk +cp -a /bin/* /ramdisk/busybin/ +echo -n "done${CRE}" diff --git a/qse/regress/sed/s008.sed b/qse/regress/sed/s008.sed new file mode 100644 index 00000000..30ffd5d8 --- /dev/null +++ b/qse/regress/sed/s008.sed @@ -0,0 +1,11 @@ +# +# Taken from message #8926 by nburns1980 in the sed-users mailing list +# +\|cp -a /bin/\* /ramdisk/busybin/|,+1 { +//n +\|echo -n "done\${CRE}"| a\ +#COPY DISK TO /ramdisk/cdrom\ +if [ -n "$FROMINIT" ];then\ + cp -af /image/* /ramdisk/cdrom/\ +fi +}