fixed bugs in rex and awk
- fixed bugs of not parsing some forms of ranges such as {,m} {n,} properly - fixed bugs in substitution functions that did not handle 0-length substring match properly.
This commit is contained in:
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: fnc.c 199 2009-06-14 08:40:52Z hyunghwan.chung $
|
||||
* $Id: fnc.c 203 2009-06-17 12:43:50Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
|
||||
@ -919,14 +919,14 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
||||
{
|
||||
qse_size_t nargs;
|
||||
qse_awk_val_t* a0, * a1, * a2, ** a2_ref, * v;
|
||||
qse_char_t* a0_ptr, * a1_ptr, * a2_ptr;
|
||||
qse_char_t* a0_ptr, * a1_ptr, * a2_ptr, * a2_end;
|
||||
qse_size_t a0_len, a1_len, a2_len;
|
||||
qse_char_t* a0_ptr_free = QSE_NULL;
|
||||
qse_char_t* a1_ptr_free = QSE_NULL;
|
||||
qse_char_t* a2_ptr_free = QSE_NULL;
|
||||
void* rex = QSE_NULL;
|
||||
int opt, n;
|
||||
qse_cstr_t mat;
|
||||
qse_cstr_t mat, pmat;
|
||||
const qse_char_t* cur_ptr;
|
||||
qse_size_t cur_len, i, m;
|
||||
qse_str_t new;
|
||||
@ -1053,7 +1053,8 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
||||
|
||||
if (a0->type != QSE_AWK_VAL_REX)
|
||||
{
|
||||
rex = QSE_AWK_BUILDREX (run->awk, a0_ptr, a0_len, &run->errinf.num);
|
||||
rex = QSE_AWK_BUILDREX (
|
||||
run->awk, a0_ptr, a0_len, &run->errinf.num);
|
||||
if (rex == QSE_NULL)
|
||||
{
|
||||
qse_str_fini (&new);
|
||||
@ -1063,11 +1064,18 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
||||
}
|
||||
|
||||
opt = (run->gbl.ignorecase)? QSE_REX_MATCH_IGNORECASE: 0;
|
||||
|
||||
a2_end = a2_ptr + a2_len;
|
||||
cur_ptr = a2_ptr;
|
||||
cur_len = a2_len;
|
||||
sub_count = 0;
|
||||
|
||||
while (1)
|
||||
pmat.ptr = QSE_NULL;
|
||||
pmat.len = 0;
|
||||
|
||||
/* perform test when cur_ptr == a2_end also because
|
||||
* end of string($) needs to be tested */
|
||||
while (cur_ptr <= a2_end)
|
||||
{
|
||||
if (max_count == 0 || sub_count < max_count)
|
||||
{
|
||||
@ -1096,17 +1104,28 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
||||
FREE_A0_REX (run->awk, rex);
|
||||
qse_str_fini (&new);
|
||||
FREE_A_PTRS (run->awk);
|
||||
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (mat.len == 0 &&
|
||||
pmat.ptr != QSE_NULL &&
|
||||
mat.ptr == pmat.ptr + pmat.len)
|
||||
{
|
||||
/* match length is 0 and the match is still at the
|
||||
* end of the previous match */
|
||||
goto skip_one_char;
|
||||
}
|
||||
|
||||
if (qse_str_ncat (
|
||||
&new, cur_ptr, mat.ptr - cur_ptr) == (qse_size_t)-1)
|
||||
{
|
||||
FREE_A0_REX (run->awk, rex);
|
||||
qse_str_fini (&new);
|
||||
FREE_A_PTRS (run->awk);
|
||||
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -1133,6 +1152,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
||||
FREE_A0_REX (run->awk, rex);
|
||||
qse_str_fini (&new);
|
||||
FREE_A_PTRS (run->awk);
|
||||
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@ -1140,6 +1160,26 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
||||
sub_count++;
|
||||
cur_len = cur_len - ((mat.ptr - cur_ptr) + mat.len);
|
||||
cur_ptr = mat.ptr + mat.len;
|
||||
|
||||
pmat = mat;
|
||||
|
||||
if (mat.len == 0)
|
||||
{
|
||||
skip_one_char:
|
||||
/* special treatment is needed if match length is 0 */
|
||||
|
||||
m = qse_str_ncat (&new, cur_ptr, 1);
|
||||
if (m == (qse_size_t)-1)
|
||||
{
|
||||
FREE_A0_REX (run->awk, rex);
|
||||
qse_str_fini (&new);
|
||||
FREE_A_PTRS (run->awk);
|
||||
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);
|
||||
return -1;
|
||||
}
|
||||
|
||||
cur_ptr++; cur_len--;
|
||||
}
|
||||
}
|
||||
|
||||
FREE_A0_REX (run->awk, rex);
|
||||
@ -1179,7 +1219,6 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
||||
{
|
||||
qse_str_fini (&new);
|
||||
FREE_A_PTRS (run->awk);
|
||||
/*qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);*/
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -1196,11 +1235,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
||||
#undef FREE_A_PTRS
|
||||
|
||||
v = qse_awk_rtx_makeintval (run, sub_count);
|
||||
if (v == QSE_NULL)
|
||||
{
|
||||
/*qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);*/
|
||||
return -1;
|
||||
}
|
||||
if (v == QSE_NULL) return -1;
|
||||
|
||||
qse_awk_rtx_setretval (run, v);
|
||||
return 0;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: rex.c 195 2009-06-10 13:18:25Z hyunghwan.chung $
|
||||
* $Id: rex.c 203 2009-06-17 12:43:50Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
|
||||
@ -1014,18 +1014,26 @@ what if it is not in the raight format? convert it to ordinary characters?? */
|
||||
{
|
||||
NEXT_CHAR (builder, LEVEL_RANGE);
|
||||
|
||||
bound = 0;
|
||||
while (builder->ptn.curc.type == CT_NORMAL &&
|
||||
(builder->ptn.curc.value >= QSE_T('0') &&
|
||||
builder->ptn.curc.value <= QSE_T('9')))
|
||||
if (builder->ptn.curc.type == CT_NORMAL &&
|
||||
(builder->ptn.curc.value >= QSE_T('0') &&
|
||||
builder->ptn.curc.value <= QSE_T('9')))
|
||||
{
|
||||
bound = bound * 10 + builder->ptn.curc.value - QSE_T('0');
|
||||
NEXT_CHAR (builder, LEVEL_RANGE);
|
||||
}
|
||||
bound = 0;
|
||||
|
||||
cmd->ubound = bound;
|
||||
do
|
||||
{
|
||||
bound = bound * 10 + builder->ptn.curc.value - QSE_T('0');
|
||||
NEXT_CHAR (builder, LEVEL_RANGE);
|
||||
}
|
||||
while (builder->ptn.curc.type == CT_NORMAL &&
|
||||
(builder->ptn.curc.value >= QSE_T('0') &&
|
||||
builder->ptn.curc.value <= QSE_T('9')));
|
||||
|
||||
cmd->ubound = bound;
|
||||
}
|
||||
else cmd->ubound = BOUND_MAX;
|
||||
}
|
||||
else cmd->ubound = BOUND_MAX;
|
||||
else cmd->ubound = cmd->lbound;
|
||||
|
||||
if (cmd->lbound > cmd->ubound)
|
||||
{
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: sed.c 195 2009-06-10 13:18:25Z hyunghwan.chung $
|
||||
* $Id: sed.c 203 2009-06-17 12:43:50Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
|
||||
@ -1850,10 +1850,10 @@ static int write_str_to_file (
|
||||
|
||||
static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
{
|
||||
qse_cstr_t mat;
|
||||
qse_cstr_t mat, pmat;
|
||||
int opt = 0, repl = 0, n;
|
||||
qse_rex_errnum_t errnum;
|
||||
const qse_char_t* cur_ptr, * str_ptr;
|
||||
const qse_char_t* cur_ptr, * str_ptr, * str_end;
|
||||
qse_size_t cur_len, str_len, m, i;
|
||||
qse_size_t max_count, sub_count;
|
||||
|
||||
@ -1868,13 +1868,19 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
/* TODO: support different line end convension */
|
||||
if (str_len > 0 && str_ptr[str_len-1] == QSE_T('\n')) str_len--;
|
||||
|
||||
str_end = str_ptr + str_len;
|
||||
cur_ptr = str_ptr;
|
||||
cur_len = str_len;
|
||||
|
||||
sub_count = 0;
|
||||
max_count = (cmd->u.subst.g)? 0: cmd->u.subst.occ;
|
||||
|
||||
while (1)
|
||||
pmat.ptr = QSE_NULL;
|
||||
pmat.len = 0;
|
||||
|
||||
/* perform test when cur_ptr == str_end also because
|
||||
* end of string($) needs to be tested */
|
||||
while (cur_ptr <= str_end)
|
||||
{
|
||||
if (max_count == 0 || sub_count < max_count)
|
||||
{
|
||||
@ -1908,6 +1914,15 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
break;
|
||||
}
|
||||
|
||||
if (mat.len == 0 &&
|
||||
pmat.ptr != QSE_NULL &&
|
||||
mat.ptr == pmat.ptr + pmat.len)
|
||||
{
|
||||
/* match length is 0 and the match is still at the
|
||||
* end of the previous match */
|
||||
goto skip_one_char;
|
||||
}
|
||||
|
||||
if (max_count > 0 && sub_count + 1 != max_count)
|
||||
{
|
||||
m = qse_str_ncat (
|
||||
@ -1967,6 +1982,23 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
sub_count++;
|
||||
cur_len = cur_len - ((mat.ptr - cur_ptr) + mat.len);
|
||||
cur_ptr = mat.ptr + mat.len;
|
||||
|
||||
pmat = mat;
|
||||
|
||||
if (mat.len == 0)
|
||||
{
|
||||
skip_one_char:
|
||||
/* special treament is need if the match length is 0 */
|
||||
|
||||
m = qse_str_ncat (&sed->e.txt.subst, cur_ptr, 1);
|
||||
if (m == (qse_size_t)-1)
|
||||
{
|
||||
SETERR0 (sed, QSE_SED_ENOMEM, 0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
cur_ptr++; cur_len--;
|
||||
}
|
||||
}
|
||||
|
||||
if (str_len < QSE_STR_LEN(&sed->e.in.line))
|
||||
|
Reference in New Issue
Block a user