fixed bugs in rex and awk
- fixed bugs of not parsing some forms of ranges such as {,m} {n,} properly - fixed bugs in substitution functions that did not handle 0-length substring match properly.
This commit is contained in:
parent
e774c0bbd1
commit
944a492c88
@ -1,5 +1,10 @@
|
|||||||
/** @page sed SED STREAM EDITOR
|
/** @page sed SED STREAM EDITOR
|
||||||
|
|
||||||
|
@section sed_contents CONTENTS
|
||||||
|
- \ref sed_intro
|
||||||
|
- \ref sed_command
|
||||||
|
- \ref sed_embed
|
||||||
|
|
||||||
@section sed_intro INTRODUCTION
|
@section sed_intro INTRODUCTION
|
||||||
|
|
||||||
The sed stream editor is a non-interactive text editing tool commonly used
|
The sed stream editor is a non-interactive text editing tool commonly used
|
||||||
@ -61,7 +66,7 @@ A command without a line selector is applied to all input lines;
|
|||||||
A command with a single address is applied to an input line that matches
|
A command with a single address is applied to an input line that matches
|
||||||
the address; A command with an address range is applied to all input
|
the address; A command with an address range is applied to all input
|
||||||
lines within the range, inclusive; A command with a start and a step is
|
lines within the range, inclusive; A command with a start and a step is
|
||||||
applied to every @b step'th line starting from the line @b start.
|
applied to every <b>step</b>'th line starting from the line @b start.
|
||||||
|
|
||||||
Here is the summary of the commands.
|
Here is the summary of the commands.
|
||||||
|
|
||||||
@ -69,88 +74,151 @@ Here is the summary of the commands.
|
|||||||
The text beginning from # to the line end is ignored; # in a line following
|
The text beginning from # to the line end is ignored; # in a line following
|
||||||
<b>a \\</b>, <b>i \\</b>, and <b>c \\</b> is treated literally and does not
|
<b>a \\</b>, <b>i \\</b>, and <b>c \\</b> is treated literally and does not
|
||||||
introduce a comment.
|
introduce a comment.
|
||||||
|
|
||||||
- <b>: label</b>
|
- <b>: label</b>
|
||||||
A label can be composed of letters, digits, periods, hyphens, and underlines.
|
A label can be composed of letters, digits, periods, hyphens, and underlines.
|
||||||
It remembers a target label for @b b and @b t commands and prohibits a line
|
It remembers a target label for @b b and @b t commands and prohibits a line
|
||||||
selector.
|
selector.
|
||||||
|
|
||||||
- <b>{</b>
|
- <b>{</b>
|
||||||
The left curly bracket forms a command group where you can nest other
|
The left curly bracket forms a command group where you can nest other
|
||||||
commands. It should be paired with an ending }.
|
commands. It should be paired with an ending @b }.
|
||||||
|
|
||||||
- <b>q</b>
|
- <b>q</b>
|
||||||
Terminates the exection of commands. Upon termination, it prints the pattern
|
Terminates the exection of commands. Upon termination, it prints the pattern
|
||||||
space if #QSE_SED_QUIET is not set.
|
space if #QSE_SED_QUIET is not set.
|
||||||
|
|
||||||
- <b>Q</b>
|
- <b>Q</b>
|
||||||
Terminates the exection of commands quietly.
|
Terminates the exection of commands quietly.
|
||||||
|
|
||||||
- <b>a \\ \n text</b>
|
- <b>a \\ \n text</b>
|
||||||
Stores @b text into an append buffer which is printed after the pattern
|
Stores @b text into the append buffer which is printed after the pattern
|
||||||
space for each input line. If #QSE_SED_STRICT is specified, an address range
|
space for each input line. If #QSE_SED_STRICT is specified, a line selector
|
||||||
is not allowed in the line selector.
|
of an address range is not allowed.
|
||||||
|
|
||||||
- <b>i \\ \n text</b>
|
- <b>i \\ \n text</b>
|
||||||
Inserts @b text into an insert buffer which is printed before the pattern
|
Inserts @b text into an insert buffer which is printed before the pattern
|
||||||
space for each input line. If #QSE_SED_STRICT is specified, an address range
|
space for each input line. If #QSE_SED_STRICT is specified, a line selector
|
||||||
is not allowed in the line selector.
|
of an address range is not allowed.
|
||||||
|
|
||||||
- <b>c \\ \n text</b>
|
- <b>c \\ \n text</b>
|
||||||
If a single line is selected for the command (i.e. no line selector, a single
|
If a single line is selected for the command (i.e. no line selector, a single
|
||||||
address line selector, or a start~step line selector is specified), it changes
|
address line selector, or a start~step line selector is specified), it changes
|
||||||
pattern space to @b text and branches to the end of commands for the line.
|
the pattern space to @b text and branches to the end of commands for the line.
|
||||||
If an address range is specified, it deletes pattern space and branches
|
If an address range is specified, it deletes the pattern space and branches
|
||||||
to the end of commands for all input lines but the last, and changes pattern
|
to the end of commands for all input lines but the last, and changes pattern
|
||||||
space to @b text and branches to the end of commands.
|
space to @b text and branches to the end of commands.
|
||||||
|
|
||||||
- <b>d</b>
|
- <b>d</b>
|
||||||
Deletes pattern space and branches to the end of commands.
|
Deletes the pattern space and branches to the end of commands.
|
||||||
|
|
||||||
- <b>D</b>
|
- <b>D</b>
|
||||||
Deletes the first line of pattern space. If the pattern space is emptied,
|
Deletes the first line of the pattern space. If the pattern space is emptied,
|
||||||
it branches to the end of script. Otherwise, the commands from the first are
|
it branches to the end of script. Otherwise, the commands from the first are
|
||||||
reapplied to the current pattern space.
|
reapplied to the current pattern space.
|
||||||
|
|
||||||
- <b>=</b>
|
- <b>=</b>
|
||||||
Prints the current line number. If #QSE_SED_STRICT is speccified, an address
|
Prints the current line number. If #QSE_SED_STRICT is speccified, an address
|
||||||
range is not allowed in the line selector.
|
range is not allowed in the line selector.
|
||||||
|
|
||||||
- <b>p</b>
|
- <b>p</b>
|
||||||
Prints pattern space.
|
Prints the pattern space.
|
||||||
|
|
||||||
- <b>P</b>
|
- <b>P</b>
|
||||||
Prints the first line of pattern space.
|
Prints the first line of the pattern space.
|
||||||
|
|
||||||
- <b>l</b>
|
- <b>l</b>
|
||||||
Prints pattern space in a visually unambiguous form.
|
Prints the pattern space in a visually unambiguous form.
|
||||||
|
|
||||||
- <b>h</b>
|
- <b>h</b>
|
||||||
Copies pattern space to hold space
|
Copies the pattern space to the hold space
|
||||||
|
|
||||||
- <b>H</b>
|
- <b>H</b>
|
||||||
Appends pattern space to hold space
|
Appends the pattern space to the hold space
|
||||||
|
|
||||||
- <b>g</b>
|
- <b>g</b>
|
||||||
Copies hold space to pattern space
|
Copies the hold space to the pattern space
|
||||||
|
|
||||||
- <b>G</b>
|
- <b>G</b>
|
||||||
Appends hold space to pattern space
|
Appends the hold space to the pattern space
|
||||||
|
|
||||||
- <b>x</b>
|
- <b>x</b>
|
||||||
Exchanges pattern space and hold space
|
Exchanges the pattern space and the hold space
|
||||||
|
|
||||||
- <b>n</b>
|
- <b>n</b>
|
||||||
Prints pattern space and read the next line from the input stream to fill
|
Prints the pattern space and read the next line from the input stream to fill
|
||||||
pattern space.
|
the pattern space.
|
||||||
|
|
||||||
- <b>N</b>
|
- <b>N</b>
|
||||||
Prints pattern space and read the next line from the input stream to append it
|
Prints the pattern space and read the next line from the input stream
|
||||||
to pattern space with a newline inserted.
|
to append it to the pattern space with a newline inserted.
|
||||||
|
|
||||||
- <b>b</b>
|
- <b>b</b>
|
||||||
Branches to the end of commands.
|
Branches to the end of commands.
|
||||||
|
|
||||||
- <b>b label</b>
|
- <b>b label</b>
|
||||||
Branches to @b label
|
Branches to @b label
|
||||||
|
|
||||||
- <b>t</b>
|
- <b>t</b>
|
||||||
Branches to the end of commands if substitution(s//) has been made
|
Branches to the end of commands if substitution(s//) has been made
|
||||||
successfully since the last reading of an input line or the last @b t command.
|
successfully since the last reading of an input line or the last @b t command.
|
||||||
|
|
||||||
- <b>t label</b>
|
- <b>t label</b>
|
||||||
Branches to @b label if substitution(s//) has been made successfully
|
Branches to @b label if substitution(s//) has been made successfully
|
||||||
since the last reading of an input line or the last @b t command.
|
since the last reading of an input line or the last @b t command.
|
||||||
|
|
||||||
- <b>r file</b>
|
- <b>r file</b>
|
||||||
Reads text from @b file and prints it after printing pattern space but before
|
Reads text from @b file and prints it after printing the pattern space but
|
||||||
printing append buffer. Failure to read @b file does not cause an error.
|
before printing the append buffer. Failure to read @b file does not cause an
|
||||||
|
error.
|
||||||
|
|
||||||
- <b>R file</b>
|
- <b>R file</b>
|
||||||
Reads a line of text from @b file and prints it after printing pattern space
|
Reads a line of text from @b file and prints it after printing pattern space
|
||||||
but before printing append buffer. Failure to read @b file does not cause an
|
but before printing the append buffer. Failure to read @b file does not cause an
|
||||||
error.
|
error.
|
||||||
|
|
||||||
- <b>w file</b>
|
- <b>w file</b>
|
||||||
|
Writes the pattern space to @b file
|
||||||
|
|
||||||
- <b>W file</b>
|
- <b>W file</b>
|
||||||
|
Writes the first line of the pattern space to @b file
|
||||||
|
|
||||||
- <b>s/rex/repl/opt</b>
|
- <b>s/rex/repl/opts</b>
|
||||||
|
Finds a matching substring with @b rex in pattern space and replaces it
|
||||||
|
with @repl. @b & in @b repl refers to the matching substring. @b opts may
|
||||||
|
be empty; You can combine the following options into @opts:
|
||||||
|
- @b g replaces all occurrences of a matching substring with @b rex
|
||||||
|
- @b number replaces the <b>number</b>'th occurrence of a matching substring
|
||||||
|
with @b rex
|
||||||
|
- @b p prints pattern space if a successful replacement was made
|
||||||
|
- @b w file writes pattern space to @b file if a successful replacement
|
||||||
|
was made. It, if specified, should be the last option.
|
||||||
|
|
||||||
- <b>y/src/dst/</b>
|
- <b>y/src/dst/</b>
|
||||||
Replaces all occurrences of characters in @b src with characters in @b dst.
|
Replaces all occurrences of characters in @b src with characters in @b dst.
|
||||||
@b src and @b dst must contain equal number of characters.
|
@b src and @b dst must contain equal number of characters.
|
||||||
|
|
||||||
|
Let's see actual examples:
|
||||||
|
- <b>G;G;G</b>
|
||||||
|
Triple spaces input lines. If #QSE_SED_QUIET is on, <b>G;G;G;p</b>.
|
||||||
|
It works because the hold space is empty unless something is copied to it.
|
||||||
|
|
||||||
|
- <b>$!d</b>
|
||||||
|
Prints the last line. If #QSE_SED_QUIET is on, try <b>$p</b>.
|
||||||
|
|
||||||
|
- <b>1!G;h;$!d</b>
|
||||||
|
Prints input lines in the reverse order. That is, it prints the last line
|
||||||
|
first and the first line last.
|
||||||
|
|
||||||
|
- <b>s/[[:space:]]{2,}/ /g</b>
|
||||||
|
Compacts whitespaces if #QSE_SED_REXBOUND is on.
|
||||||
|
|
||||||
|
@section sed_embed HOW TO EMBED
|
||||||
|
|
||||||
|
In the simplest form,
|
||||||
|
- Create a stream editor - qse_sed_open()
|
||||||
|
- Compile editing commands - qse_sed_comp()
|
||||||
|
- Executes compiled commands - qse_sed_exec()
|
||||||
|
- Destroy the stream editor - qse_sed_close()
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: rex.h 195 2009-06-10 13:18:25Z hyunghwan.chung $
|
* $Id: rex.h 203 2009-06-17 12:43:50Z hyunghwan.chung $
|
||||||
*
|
*
|
||||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||||
|
|
||||||
@ -22,7 +22,8 @@
|
|||||||
#include <qse/types.h>
|
#include <qse/types.h>
|
||||||
#include <qse/macros.h>
|
#include <qse/macros.h>
|
||||||
|
|
||||||
/*
|
/** @file
|
||||||
|
*
|
||||||
* Regular Esseression Syntax
|
* Regular Esseression Syntax
|
||||||
* A regular expression is zero or more branches, separated by '|'.
|
* A regular expression is zero or more branches, separated by '|'.
|
||||||
* ......
|
* ......
|
||||||
@ -30,24 +31,29 @@
|
|||||||
*
|
*
|
||||||
* Compiled form of a regular expression:
|
* Compiled form of a regular expression:
|
||||||
*
|
*
|
||||||
* | expression |
|
* | expression |
|
||||||
* | header | branch | branch | branch |
|
* | header | branch | branch | branch |
|
||||||
* | nb | el | na | bl | cmd | arg | cmd | arg | na | bl | cmd | arg | na | bl | cmd |
|
* | nb | el | na | bl | cmd | arg | cmd | arg | na | bl | cmd | arg | na | bl | cmd |
|
||||||
*
|
*
|
||||||
* nb: the number of branches
|
* - nb: the number of branches
|
||||||
* el: the length of a expression including the length of nb and el
|
* - el: the length of a expression including the length of nb and el
|
||||||
* na: the number of atoms
|
* - na: the number of atoms
|
||||||
* bl: the length of a branch including the length of na and bl
|
* - bl: the length of a branch including the length of na and bl
|
||||||
* cmd: The command and repetition info encoded together.
|
* - cmd: The command and repetition info encoded together.
|
||||||
* Some commands require an argument to follow them but some other don't.
|
|
||||||
* It is encoded as follows:
|
|
||||||
*
|
*
|
||||||
* Subexpressions can be nested by having the command "GROUP"
|
* Some commands require an argument to follow them but some other don't.
|
||||||
* and a subexpression as its argument.
|
* It is encoded as follows:
|
||||||
|
* .................
|
||||||
|
*
|
||||||
|
* Subexpressions can be nested by having the command "GROUP"
|
||||||
|
* and a subexpression as its argument.
|
||||||
*
|
*
|
||||||
* Examples:
|
* Examples:
|
||||||
* a.c -> |1|6|5|ORD_CHAR(no bound)|a|ANY_CHAR(no bound)|ORD_CHAR(no bound)|c|
|
* a.c -> |1|6|5|ORD_CHAR(no bound)|a|ANY_CHAR(no bound)|ORD_CHAR(no bound)|c|
|
||||||
* ab|xy -> |2|10|4|ORD_CHAR(no bound)|a|ORD_CHAR(no bound)|b|4|ORD_CHAR(no bound)|x|ORD_CHAR(no bound)|y|
|
* ab|xy -> |2|10|4|ORD_CHAR(no bound)|a|ORD_CHAR(no bound)|b|4|ORD_CHAR(no bound)|x|ORD_CHAR(no bound)|y|
|
||||||
|
*
|
||||||
|
* @todo
|
||||||
|
* - support \\n to refer to the nth matching substring
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define QSE_REX_NA(code) (*(qse_size_t*)(code))
|
#define QSE_REX_NA(code) (*(qse_size_t*)(code))
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: fnc.c 199 2009-06-14 08:40:52Z hyunghwan.chung $
|
* $Id: fnc.c 203 2009-06-17 12:43:50Z hyunghwan.chung $
|
||||||
*
|
*
|
||||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||||
|
|
||||||
@ -919,14 +919,14 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
|||||||
{
|
{
|
||||||
qse_size_t nargs;
|
qse_size_t nargs;
|
||||||
qse_awk_val_t* a0, * a1, * a2, ** a2_ref, * v;
|
qse_awk_val_t* a0, * a1, * a2, ** a2_ref, * v;
|
||||||
qse_char_t* a0_ptr, * a1_ptr, * a2_ptr;
|
qse_char_t* a0_ptr, * a1_ptr, * a2_ptr, * a2_end;
|
||||||
qse_size_t a0_len, a1_len, a2_len;
|
qse_size_t a0_len, a1_len, a2_len;
|
||||||
qse_char_t* a0_ptr_free = QSE_NULL;
|
qse_char_t* a0_ptr_free = QSE_NULL;
|
||||||
qse_char_t* a1_ptr_free = QSE_NULL;
|
qse_char_t* a1_ptr_free = QSE_NULL;
|
||||||
qse_char_t* a2_ptr_free = QSE_NULL;
|
qse_char_t* a2_ptr_free = QSE_NULL;
|
||||||
void* rex = QSE_NULL;
|
void* rex = QSE_NULL;
|
||||||
int opt, n;
|
int opt, n;
|
||||||
qse_cstr_t mat;
|
qse_cstr_t mat, pmat;
|
||||||
const qse_char_t* cur_ptr;
|
const qse_char_t* cur_ptr;
|
||||||
qse_size_t cur_len, i, m;
|
qse_size_t cur_len, i, m;
|
||||||
qse_str_t new;
|
qse_str_t new;
|
||||||
@ -1053,7 +1053,8 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
|||||||
|
|
||||||
if (a0->type != QSE_AWK_VAL_REX)
|
if (a0->type != QSE_AWK_VAL_REX)
|
||||||
{
|
{
|
||||||
rex = QSE_AWK_BUILDREX (run->awk, a0_ptr, a0_len, &run->errinf.num);
|
rex = QSE_AWK_BUILDREX (
|
||||||
|
run->awk, a0_ptr, a0_len, &run->errinf.num);
|
||||||
if (rex == QSE_NULL)
|
if (rex == QSE_NULL)
|
||||||
{
|
{
|
||||||
qse_str_fini (&new);
|
qse_str_fini (&new);
|
||||||
@ -1063,11 +1064,18 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
|||||||
}
|
}
|
||||||
|
|
||||||
opt = (run->gbl.ignorecase)? QSE_REX_MATCH_IGNORECASE: 0;
|
opt = (run->gbl.ignorecase)? QSE_REX_MATCH_IGNORECASE: 0;
|
||||||
|
|
||||||
|
a2_end = a2_ptr + a2_len;
|
||||||
cur_ptr = a2_ptr;
|
cur_ptr = a2_ptr;
|
||||||
cur_len = a2_len;
|
cur_len = a2_len;
|
||||||
sub_count = 0;
|
sub_count = 0;
|
||||||
|
|
||||||
while (1)
|
pmat.ptr = QSE_NULL;
|
||||||
|
pmat.len = 0;
|
||||||
|
|
||||||
|
/* perform test when cur_ptr == a2_end also because
|
||||||
|
* end of string($) needs to be tested */
|
||||||
|
while (cur_ptr <= a2_end)
|
||||||
{
|
{
|
||||||
if (max_count == 0 || sub_count < max_count)
|
if (max_count == 0 || sub_count < max_count)
|
||||||
{
|
{
|
||||||
@ -1096,17 +1104,28 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
|||||||
FREE_A0_REX (run->awk, rex);
|
FREE_A0_REX (run->awk, rex);
|
||||||
qse_str_fini (&new);
|
qse_str_fini (&new);
|
||||||
FREE_A_PTRS (run->awk);
|
FREE_A_PTRS (run->awk);
|
||||||
|
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (mat.len == 0 &&
|
||||||
|
pmat.ptr != QSE_NULL &&
|
||||||
|
mat.ptr == pmat.ptr + pmat.len)
|
||||||
|
{
|
||||||
|
/* match length is 0 and the match is still at the
|
||||||
|
* end of the previous match */
|
||||||
|
goto skip_one_char;
|
||||||
|
}
|
||||||
|
|
||||||
if (qse_str_ncat (
|
if (qse_str_ncat (
|
||||||
&new, cur_ptr, mat.ptr - cur_ptr) == (qse_size_t)-1)
|
&new, cur_ptr, mat.ptr - cur_ptr) == (qse_size_t)-1)
|
||||||
{
|
{
|
||||||
FREE_A0_REX (run->awk, rex);
|
FREE_A0_REX (run->awk, rex);
|
||||||
qse_str_fini (&new);
|
qse_str_fini (&new);
|
||||||
FREE_A_PTRS (run->awk);
|
FREE_A_PTRS (run->awk);
|
||||||
|
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1133,6 +1152,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
|||||||
FREE_A0_REX (run->awk, rex);
|
FREE_A0_REX (run->awk, rex);
|
||||||
qse_str_fini (&new);
|
qse_str_fini (&new);
|
||||||
FREE_A_PTRS (run->awk);
|
FREE_A_PTRS (run->awk);
|
||||||
|
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1140,6 +1160,26 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
|||||||
sub_count++;
|
sub_count++;
|
||||||
cur_len = cur_len - ((mat.ptr - cur_ptr) + mat.len);
|
cur_len = cur_len - ((mat.ptr - cur_ptr) + mat.len);
|
||||||
cur_ptr = mat.ptr + mat.len;
|
cur_ptr = mat.ptr + mat.len;
|
||||||
|
|
||||||
|
pmat = mat;
|
||||||
|
|
||||||
|
if (mat.len == 0)
|
||||||
|
{
|
||||||
|
skip_one_char:
|
||||||
|
/* special treatment is needed if match length is 0 */
|
||||||
|
|
||||||
|
m = qse_str_ncat (&new, cur_ptr, 1);
|
||||||
|
if (m == (qse_size_t)-1)
|
||||||
|
{
|
||||||
|
FREE_A0_REX (run->awk, rex);
|
||||||
|
qse_str_fini (&new);
|
||||||
|
FREE_A_PTRS (run->awk);
|
||||||
|
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
cur_ptr++; cur_len--;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
FREE_A0_REX (run->awk, rex);
|
FREE_A0_REX (run->awk, rex);
|
||||||
@ -1179,7 +1219,6 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
|||||||
{
|
{
|
||||||
qse_str_fini (&new);
|
qse_str_fini (&new);
|
||||||
FREE_A_PTRS (run->awk);
|
FREE_A_PTRS (run->awk);
|
||||||
/*qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);*/
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1196,11 +1235,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
|||||||
#undef FREE_A_PTRS
|
#undef FREE_A_PTRS
|
||||||
|
|
||||||
v = qse_awk_rtx_makeintval (run, sub_count);
|
v = qse_awk_rtx_makeintval (run, sub_count);
|
||||||
if (v == QSE_NULL)
|
if (v == QSE_NULL) return -1;
|
||||||
{
|
|
||||||
/*qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);*/
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
qse_awk_rtx_setretval (run, v);
|
qse_awk_rtx_setretval (run, v);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: rex.c 195 2009-06-10 13:18:25Z hyunghwan.chung $
|
* $Id: rex.c 203 2009-06-17 12:43:50Z hyunghwan.chung $
|
||||||
*
|
*
|
||||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||||
|
|
||||||
@ -1014,18 +1014,26 @@ what if it is not in the raight format? convert it to ordinary characters?? */
|
|||||||
{
|
{
|
||||||
NEXT_CHAR (builder, LEVEL_RANGE);
|
NEXT_CHAR (builder, LEVEL_RANGE);
|
||||||
|
|
||||||
bound = 0;
|
if (builder->ptn.curc.type == CT_NORMAL &&
|
||||||
while (builder->ptn.curc.type == CT_NORMAL &&
|
(builder->ptn.curc.value >= QSE_T('0') &&
|
||||||
(builder->ptn.curc.value >= QSE_T('0') &&
|
builder->ptn.curc.value <= QSE_T('9')))
|
||||||
builder->ptn.curc.value <= QSE_T('9')))
|
|
||||||
{
|
{
|
||||||
bound = bound * 10 + builder->ptn.curc.value - QSE_T('0');
|
bound = 0;
|
||||||
NEXT_CHAR (builder, LEVEL_RANGE);
|
|
||||||
}
|
|
||||||
|
|
||||||
cmd->ubound = bound;
|
do
|
||||||
|
{
|
||||||
|
bound = bound * 10 + builder->ptn.curc.value - QSE_T('0');
|
||||||
|
NEXT_CHAR (builder, LEVEL_RANGE);
|
||||||
|
}
|
||||||
|
while (builder->ptn.curc.type == CT_NORMAL &&
|
||||||
|
(builder->ptn.curc.value >= QSE_T('0') &&
|
||||||
|
builder->ptn.curc.value <= QSE_T('9')));
|
||||||
|
|
||||||
|
cmd->ubound = bound;
|
||||||
|
}
|
||||||
|
else cmd->ubound = BOUND_MAX;
|
||||||
}
|
}
|
||||||
else cmd->ubound = BOUND_MAX;
|
else cmd->ubound = cmd->lbound;
|
||||||
|
|
||||||
if (cmd->lbound > cmd->ubound)
|
if (cmd->lbound > cmd->ubound)
|
||||||
{
|
{
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: sed.c 195 2009-06-10 13:18:25Z hyunghwan.chung $
|
* $Id: sed.c 203 2009-06-17 12:43:50Z hyunghwan.chung $
|
||||||
*
|
*
|
||||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||||
|
|
||||||
@ -1850,10 +1850,10 @@ static int write_str_to_file (
|
|||||||
|
|
||||||
static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||||
{
|
{
|
||||||
qse_cstr_t mat;
|
qse_cstr_t mat, pmat;
|
||||||
int opt = 0, repl = 0, n;
|
int opt = 0, repl = 0, n;
|
||||||
qse_rex_errnum_t errnum;
|
qse_rex_errnum_t errnum;
|
||||||
const qse_char_t* cur_ptr, * str_ptr;
|
const qse_char_t* cur_ptr, * str_ptr, * str_end;
|
||||||
qse_size_t cur_len, str_len, m, i;
|
qse_size_t cur_len, str_len, m, i;
|
||||||
qse_size_t max_count, sub_count;
|
qse_size_t max_count, sub_count;
|
||||||
|
|
||||||
@ -1868,13 +1868,19 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
/* TODO: support different line end convension */
|
/* TODO: support different line end convension */
|
||||||
if (str_len > 0 && str_ptr[str_len-1] == QSE_T('\n')) str_len--;
|
if (str_len > 0 && str_ptr[str_len-1] == QSE_T('\n')) str_len--;
|
||||||
|
|
||||||
|
str_end = str_ptr + str_len;
|
||||||
cur_ptr = str_ptr;
|
cur_ptr = str_ptr;
|
||||||
cur_len = str_len;
|
cur_len = str_len;
|
||||||
|
|
||||||
sub_count = 0;
|
sub_count = 0;
|
||||||
max_count = (cmd->u.subst.g)? 0: cmd->u.subst.occ;
|
max_count = (cmd->u.subst.g)? 0: cmd->u.subst.occ;
|
||||||
|
|
||||||
while (1)
|
pmat.ptr = QSE_NULL;
|
||||||
|
pmat.len = 0;
|
||||||
|
|
||||||
|
/* perform test when cur_ptr == str_end also because
|
||||||
|
* end of string($) needs to be tested */
|
||||||
|
while (cur_ptr <= str_end)
|
||||||
{
|
{
|
||||||
if (max_count == 0 || sub_count < max_count)
|
if (max_count == 0 || sub_count < max_count)
|
||||||
{
|
{
|
||||||
@ -1908,6 +1914,15 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (mat.len == 0 &&
|
||||||
|
pmat.ptr != QSE_NULL &&
|
||||||
|
mat.ptr == pmat.ptr + pmat.len)
|
||||||
|
{
|
||||||
|
/* match length is 0 and the match is still at the
|
||||||
|
* end of the previous match */
|
||||||
|
goto skip_one_char;
|
||||||
|
}
|
||||||
|
|
||||||
if (max_count > 0 && sub_count + 1 != max_count)
|
if (max_count > 0 && sub_count + 1 != max_count)
|
||||||
{
|
{
|
||||||
m = qse_str_ncat (
|
m = qse_str_ncat (
|
||||||
@ -1967,6 +1982,23 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
sub_count++;
|
sub_count++;
|
||||||
cur_len = cur_len - ((mat.ptr - cur_ptr) + mat.len);
|
cur_len = cur_len - ((mat.ptr - cur_ptr) + mat.len);
|
||||||
cur_ptr = mat.ptr + mat.len;
|
cur_ptr = mat.ptr + mat.len;
|
||||||
|
|
||||||
|
pmat = mat;
|
||||||
|
|
||||||
|
if (mat.len == 0)
|
||||||
|
{
|
||||||
|
skip_one_char:
|
||||||
|
/* special treament is need if the match length is 0 */
|
||||||
|
|
||||||
|
m = qse_str_ncat (&sed->e.txt.subst, cur_ptr, 1);
|
||||||
|
if (m == (qse_size_t)-1)
|
||||||
|
{
|
||||||
|
SETERR0 (sed, QSE_SED_ENOMEM, 0);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
cur_ptr++; cur_len--;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (str_len < QSE_STR_LEN(&sed->e.in.line))
|
if (str_len < QSE_STR_LEN(&sed->e.in.line))
|
||||||
|
@ -119,13 +119,14 @@ static void print_usage (QSE_FILE* out, int argc, qse_char_t* argv[])
|
|||||||
qse_fprintf (out, QSE_T(" -h show this message\n"));
|
qse_fprintf (out, QSE_T(" -h show this message\n"));
|
||||||
qse_fprintf (out, QSE_T(" -n disable auto-print\n"));
|
qse_fprintf (out, QSE_T(" -n disable auto-print\n"));
|
||||||
qse_fprintf (out, QSE_T(" -a perform strict address check\n"));
|
qse_fprintf (out, QSE_T(" -a perform strict address check\n"));
|
||||||
|
qse_fprintf (out, QSE_T(" -r allows {n,m} in a regular expression\n"));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int handle_args (int argc, qse_char_t* argv[])
|
static int handle_args (int argc, qse_char_t* argv[])
|
||||||
{
|
{
|
||||||
static qse_opt_t opt =
|
static qse_opt_t opt =
|
||||||
{
|
{
|
||||||
QSE_T("hna"),
|
QSE_T("hnar"),
|
||||||
QSE_NULL
|
QSE_NULL
|
||||||
};
|
};
|
||||||
qse_cint_t c;
|
qse_cint_t c;
|
||||||
@ -165,6 +166,10 @@ static int handle_args (int argc, qse_char_t* argv[])
|
|||||||
case QSE_T('a'):
|
case QSE_T('a'):
|
||||||
g_option |= QSE_SED_STRICT;
|
g_option |= QSE_SED_STRICT;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case QSE_T('r'):
|
||||||
|
g_option |= QSE_SED_REXBOUND;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user