fixed a bug of wrong field buffer management in sed

This commit is contained in:
hyung-hwan 2012-08-20 06:00:22 +00:00
parent 9bc182de7a
commit a2b51062a9
3 changed files with 50 additions and 29 deletions

View File

@ -421,7 +421,7 @@ struct opttab_t
{ QSE_T("ncmponstr"), QSE_AWK_NCMPONSTR, QSE_T("perform numeric comparsion on numeric strings") }, { QSE_T("ncmponstr"), QSE_AWK_NCMPONSTR, QSE_T("perform numeric comparsion on numeric strings") },
{ QSE_T("strictnaming"), QSE_AWK_STRICTNAMING, QSE_T("enable the strict naming rule") }, { QSE_T("strictnaming"), QSE_AWK_STRICTNAMING, QSE_T("enable the strict naming rule") },
{ QSE_T("include"), QSE_AWK_INCLUDE, QSE_T("enable '@include'") }, { QSE_T("include"), QSE_AWK_INCLUDE, QSE_T("enable '@include'") },
{ QSE_T("tolerant"), QSE_AWK_TOLERANT, QSE_T("make more I/O fault-tolerant") }, { QSE_T("tolerant"), QSE_AWK_TOLERANT, QSE_T("make more fault-tolerant") },
{ QSE_T("abort"), QSE_AWK_ABORT, QSE_T("enable 'abort'") }, { QSE_T("abort"), QSE_AWK_ABORT, QSE_T("enable 'abort'") },
{ QSE_NULL, 0, QSE_NULL } { QSE_NULL, 0, QSE_NULL }
}; };

View File

@ -3,7 +3,6 @@
@section sed_contents CONTENTS @section sed_contents CONTENTS
- \ref sed_intro - \ref sed_intro
- \ref sed_command - \ref sed_command
- \ref sed_embed
@section sed_intro INTRODUCTION @section sed_intro INTRODUCTION
@ -261,8 +260,7 @@ Replaces all occurrences of characters in @b src with characters in @b dst.
- <b>c/selector/opts</b> - <b>c/selector/opts</b>
Selects characters or fields from the pattern space as specified by the Selects characters or fields from the pattern space as specified by the
@b selector and update the pattern space with the selected text. A selector @b selector and update the pattern space with the selected text. A selector
is a comma-separated list of selector atoms. A selector atom is one of is a comma-separated list of specifiers. A specifier is one of the followings:
the followings:
<ul> <ul>
<li>@b d specifies the input field delimiter with the next character. e.g) d: <li>@b d specifies the input field delimiter with the next character. e.g) d:
<li>@b D sepcifies the output field delimiter with the next character. e.g) D; <li>@b D sepcifies the output field delimiter with the next character. e.g) D;
@ -291,16 +289,29 @@ Prints the last line. If #QSE_SED_QUIET is on, try <b>$p</b>.
- <b>1!G;h;$!d</b> - <b>1!G;h;$!d</b>
Prints input lines in the reverse order. That is, it prints the last line Prints input lines in the reverse order. That is, it prints the last line
first and the first line last. first and the first line last.
@code
$ echo -e "a\nb\nc" | qsesed '1!G;h;$!d'
c
b
a
@endcode
- <b>s/[[:space:]]{2,}/ /g</b> - <b>s/[[:space:]]{2,}/ /g</b>
Compacts whitespaces if #QSE_SED_REXBOUND is on. Compacts whitespaces if #QSE_SED_REXBOUND is on.
@section sed_embed HOW TO EMBED - <b>C/d:,f3,1/</b>
Prints the third field and the first field from a colon separated text.
In the simplest form, @code
- Create a stream editor - qse_sed_open() $ head -5 /etc/passwd
- Compile editing commands - qse_sed_comp() root:x:0:0:root:/root:/bin/bash
- Executes compiled commands - qse_sed_exec() daemon:x:1:1:daemon:/usr/sbin:/bin/sh
- Destroy the stream editor - qse_sed_close() bin:x:2:2:bin:/bin:/bin/sh
sys:x:3:3:sys:/dev:/bin/sh
sync:x:4:65534:sync:/bin:/bin/sync
$ qsesed '1,3C/d:,f3,1/;4,$d' /etc/passwd
0 root
1 daemon
2 bin
@endcode
*/ */

View File

@ -2981,12 +2981,6 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
return 0; return 0;
} }
static QSE_INLINE int isdelim (qse_sed_cmd_t* cmd, qse_char_t c)
{
return (cmd->u.cut.w && QSE_ISSPACE(c)) ||
(!cmd->u.cut.w && c == cmd->u.cut.delim[0]);
}
static int split_into_fields_for_cut ( static int split_into_fields_for_cut (
qse_sed_t* sed, qse_sed_cmd_t* cmd, const qse_cstr_t* str) qse_sed_t* sed, qse_sed_cmd_t* cmd, const qse_cstr_t* str)
{ {
@ -2997,15 +2991,34 @@ static int split_into_fields_for_cut (
for (i = 0; i < str->len; ) for (i = 0; i < str->len; )
{ {
int isdelim = 0;
qse_char_t c = str->ptr[i++]; qse_char_t c = str->ptr[i++];
if (isdelim(cmd,c)) if (cmd->u.cut.w)
{
/* the w option ignores the d specifier */
if (QSE_ISSPACE(c))
{
/* the w option assumes the f option */
while (i < str->len && QSE_ISSPACE(str->ptr[i])) i++;
isdelim = 1;
}
}
else
{
if (c == cmd->u.cut.delim[0])
{ {
if (cmd->u.cut.f) if (cmd->u.cut.f)
{ {
while (i < str->len && isdelim(cmd,str->ptr[i])) i++; /* fold consecutive delimiters */
while (i < str->len && str->ptr[i] == cmd->u.cut.delim[0]) i++;
}
isdelim = 1;
}
} }
if (isdelim)
{
sed->e.cutf.flds[x++].len = xl; sed->e.cutf.flds[x++].len = xl;
if (x >= sed->e.cutf.cflds) if (x >= sed->e.cutf.cflds)
@ -3014,10 +3027,10 @@ static int split_into_fields_for_cut (
qse_size_t nsz; qse_size_t nsz;
nsz = sed->e.cutf.cflds; nsz = sed->e.cutf.cflds;
if (nsz > 100000) nsz += 100000; if (nsz > 50000) nsz += 50000;
else nsz *= 2; else nsz *= 2;
if (sed->e.cutf.flds != sed->e.cutf.sflds) if (sed->e.cutf.flds == sed->e.cutf.sflds)
{ {
tmp = QSE_MMGR_ALLOC (sed->mmgr, QSE_SIZEOF(*tmp) * nsz); tmp = QSE_MMGR_ALLOC (sed->mmgr, QSE_SIZEOF(*tmp) * nsz);
if (tmp == QSE_NULL) if (tmp == QSE_NULL)
@ -3025,12 +3038,7 @@ static int split_into_fields_for_cut (
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL); SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1; return -1;
} }
QSE_MEMCPY (tmp, sed->e.cutf.flds, QSE_SIZEOF(*tmp) * sed->e.cutf.cflds); QSE_MEMCPY (tmp, sed->e.cutf.flds, QSE_SIZEOF(*tmp) * sed->e.cutf.cflds);
QSE_MMGR_FREE (sed->mmgr, sed->e.cutf.flds);
if (sed->e.cutf.flds != sed->e.cutf.sflds)
QSE_MMGR_FREE (sed->mmgr, sed->e.cutf.flds);
} }
else else
{ {
@ -3048,6 +3056,8 @@ static int split_into_fields_for_cut (
xl = 0; xl = 0;
sed->e.cutf.flds[x].ptr = &str->ptr[i]; sed->e.cutf.flds[x].ptr = &str->ptr[i];
/* mark that this line is delimited at least once */
sed->e.cutf.delimited = 1; sed->e.cutf.delimited = 1;
} }
else xl++; else xl++;