fixed a bug of wrong field buffer management in sed

This commit is contained in:
hyung-hwan 2012-08-20 06:00:22 +00:00
parent 9bc182de7a
commit a2b51062a9
3 changed files with 50 additions and 29 deletions

View File

@ -421,7 +421,7 @@ struct opttab_t
{ QSE_T("ncmponstr"), QSE_AWK_NCMPONSTR, QSE_T("perform numeric comparsion on numeric strings") },
{ QSE_T("strictnaming"), QSE_AWK_STRICTNAMING, QSE_T("enable the strict naming rule") },
{ QSE_T("include"), QSE_AWK_INCLUDE, QSE_T("enable '@include'") },
{ QSE_T("tolerant"), QSE_AWK_TOLERANT, QSE_T("make more I/O fault-tolerant") },
{ QSE_T("tolerant"), QSE_AWK_TOLERANT, QSE_T("make more fault-tolerant") },
{ QSE_T("abort"), QSE_AWK_ABORT, QSE_T("enable 'abort'") },
{ QSE_NULL, 0, QSE_NULL }
};

View File

@ -3,7 +3,6 @@
@section sed_contents CONTENTS
- \ref sed_intro
- \ref sed_command
- \ref sed_embed
@section sed_intro INTRODUCTION
@ -261,8 +260,7 @@ Replaces all occurrences of characters in @b src with characters in @b dst.
- <b>c/selector/opts</b>
Selects characters or fields from the pattern space as specified by the
@b selector and update the pattern space with the selected text. A selector
is a comma-separated list of selector atoms. A selector atom is one of
the followings:
is a comma-separated list of specifiers. A specifier is one of the followings:
<ul>
<li>@b d specifies the input field delimiter with the next character. e.g) d:
<li>@b D sepcifies the output field delimiter with the next character. e.g) D;
@ -291,16 +289,29 @@ Prints the last line. If #QSE_SED_QUIET is on, try <b>$p</b>.
- <b>1!G;h;$!d</b>
Prints input lines in the reverse order. That is, it prints the last line
first and the first line last.
@code
$ echo -e "a\nb\nc" | qsesed '1!G;h;$!d'
c
b
a
@endcode
- <b>s/[[:space:]]{2,}/ /g</b>
Compacts whitespaces if #QSE_SED_REXBOUND is on.
@section sed_embed HOW TO EMBED
In the simplest form,
- Create a stream editor - qse_sed_open()
- Compile editing commands - qse_sed_comp()
- Executes compiled commands - qse_sed_exec()
- Destroy the stream editor - qse_sed_close()
- <b>C/d:,f3,1/</b>
Prints the third field and the first field from a colon separated text.
@code
$ head -5 /etc/passwd
root:x:0:0:root:/root:/bin/bash
daemon:x:1:1:daemon:/usr/sbin:/bin/sh
bin:x:2:2:bin:/bin:/bin/sh
sys:x:3:3:sys:/dev:/bin/sh
sync:x:4:65534:sync:/bin:/bin/sync
$ qsesed '1,3C/d:,f3,1/;4,$d' /etc/passwd
0 root
1 daemon
2 bin
@endcode
*/

View File

@ -2981,12 +2981,6 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
return 0;
}
static QSE_INLINE int isdelim (qse_sed_cmd_t* cmd, qse_char_t c)
{
return (cmd->u.cut.w && QSE_ISSPACE(c)) ||
(!cmd->u.cut.w && c == cmd->u.cut.delim[0]);
}
static int split_into_fields_for_cut (
qse_sed_t* sed, qse_sed_cmd_t* cmd, const qse_cstr_t* str)
{
@ -2997,15 +2991,34 @@ static int split_into_fields_for_cut (
for (i = 0; i < str->len; )
{
int isdelim = 0;
qse_char_t c = str->ptr[i++];
if (isdelim(cmd,c))
{
if (cmd->u.cut.f)
if (cmd->u.cut.w)
{
/* the w option ignores the d specifier */
if (QSE_ISSPACE(c))
{
while (i < str->len && isdelim(cmd,str->ptr[i])) i++;
/* the w option assumes the f option */
while (i < str->len && QSE_ISSPACE(str->ptr[i])) i++;
isdelim = 1;
}
}
else
{
if (c == cmd->u.cut.delim[0])
{
if (cmd->u.cut.f)
{
/* fold consecutive delimiters */
while (i < str->len && str->ptr[i] == cmd->u.cut.delim[0]) i++;
}
isdelim = 1;
}
}
if (isdelim)
{
sed->e.cutf.flds[x++].len = xl;
if (x >= sed->e.cutf.cflds)
@ -3014,10 +3027,10 @@ static int split_into_fields_for_cut (
qse_size_t nsz;
nsz = sed->e.cutf.cflds;
if (nsz > 100000) nsz += 100000;
if (nsz > 50000) nsz += 50000;
else nsz *= 2;
if (sed->e.cutf.flds != sed->e.cutf.sflds)
if (sed->e.cutf.flds == sed->e.cutf.sflds)
{
tmp = QSE_MMGR_ALLOC (sed->mmgr, QSE_SIZEOF(*tmp) * nsz);
if (tmp == QSE_NULL)
@ -3025,12 +3038,7 @@ static int split_into_fields_for_cut (
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1;
}
QSE_MEMCPY (tmp, sed->e.cutf.flds, QSE_SIZEOF(*tmp) * sed->e.cutf.cflds);
QSE_MMGR_FREE (sed->mmgr, sed->e.cutf.flds);
if (sed->e.cutf.flds != sed->e.cutf.sflds)
QSE_MMGR_FREE (sed->mmgr, sed->e.cutf.flds);
}
else
{
@ -3048,7 +3056,9 @@ static int split_into_fields_for_cut (
xl = 0;
sed->e.cutf.flds[x].ptr = &str->ptr[i];
sed->e.cutf.delimited = 1;
/* mark that this line is delimited at least once */
sed->e.cutf.delimited = 1;
}
else xl++;
}