fixed bugs in rex and awk

- fixed bugs of not parsing some forms of ranges such as {,m} {n,} properly
- fixed bugs in substitution functions that did not handle 0-length substring match properly.
This commit is contained in:
2009-06-18 06:43:50 +00:00
parent e774c0bbd1
commit 944a492c88
6 changed files with 223 additions and 69 deletions

View File

@ -1,5 +1,5 @@
/*
* $Id: fnc.c 199 2009-06-14 08:40:52Z hyunghwan.chung $
* $Id: fnc.c 203 2009-06-17 12:43:50Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -919,14 +919,14 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
{
qse_size_t nargs;
qse_awk_val_t* a0, * a1, * a2, ** a2_ref, * v;
qse_char_t* a0_ptr, * a1_ptr, * a2_ptr;
qse_char_t* a0_ptr, * a1_ptr, * a2_ptr, * a2_end;
qse_size_t a0_len, a1_len, a2_len;
qse_char_t* a0_ptr_free = QSE_NULL;
qse_char_t* a1_ptr_free = QSE_NULL;
qse_char_t* a2_ptr_free = QSE_NULL;
void* rex = QSE_NULL;
int opt, n;
qse_cstr_t mat;
qse_cstr_t mat, pmat;
const qse_char_t* cur_ptr;
qse_size_t cur_len, i, m;
qse_str_t new;
@ -1053,7 +1053,8 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
if (a0->type != QSE_AWK_VAL_REX)
{
rex = QSE_AWK_BUILDREX (run->awk, a0_ptr, a0_len, &run->errinf.num);
rex = QSE_AWK_BUILDREX (
run->awk, a0_ptr, a0_len, &run->errinf.num);
if (rex == QSE_NULL)
{
qse_str_fini (&new);
@ -1063,11 +1064,18 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
}
opt = (run->gbl.ignorecase)? QSE_REX_MATCH_IGNORECASE: 0;
a2_end = a2_ptr + a2_len;
cur_ptr = a2_ptr;
cur_len = a2_len;
sub_count = 0;
while (1)
pmat.ptr = QSE_NULL;
pmat.len = 0;
/* perform test when cur_ptr == a2_end also because
* end of string($) needs to be tested */
while (cur_ptr <= a2_end)
{
if (max_count == 0 || sub_count < max_count)
{
@ -1096,17 +1104,28 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
FREE_A0_REX (run->awk, rex);
qse_str_fini (&new);
FREE_A_PTRS (run->awk);
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);
return -1;
}
break;
}
if (mat.len == 0 &&
pmat.ptr != QSE_NULL &&
mat.ptr == pmat.ptr + pmat.len)
{
/* match length is 0 and the match is still at the
* end of the previous match */
goto skip_one_char;
}
if (qse_str_ncat (
&new, cur_ptr, mat.ptr - cur_ptr) == (qse_size_t)-1)
{
FREE_A0_REX (run->awk, rex);
qse_str_fini (&new);
FREE_A_PTRS (run->awk);
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);
return -1;
}
@ -1133,6 +1152,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
FREE_A0_REX (run->awk, rex);
qse_str_fini (&new);
FREE_A_PTRS (run->awk);
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);
return -1;
}
}
@ -1140,6 +1160,26 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
sub_count++;
cur_len = cur_len - ((mat.ptr - cur_ptr) + mat.len);
cur_ptr = mat.ptr + mat.len;
pmat = mat;
if (mat.len == 0)
{
skip_one_char:
/* special treatment is needed if match length is 0 */
m = qse_str_ncat (&new, cur_ptr, 1);
if (m == (qse_size_t)-1)
{
FREE_A0_REX (run->awk, rex);
qse_str_fini (&new);
FREE_A_PTRS (run->awk);
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);
return -1;
}
cur_ptr++; cur_len--;
}
}
FREE_A0_REX (run->awk, rex);
@ -1179,7 +1219,6 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
{
qse_str_fini (&new);
FREE_A_PTRS (run->awk);
/*qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);*/
return -1;
}
@ -1196,11 +1235,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
#undef FREE_A_PTRS
v = qse_awk_rtx_makeintval (run, sub_count);
if (v == QSE_NULL)
{
/*qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);*/
return -1;
}
if (v == QSE_NULL) return -1;
qse_awk_rtx_setretval (run, v);
return 0;

View File

@ -1,5 +1,5 @@
/*
* $Id: rex.c 195 2009-06-10 13:18:25Z hyunghwan.chung $
* $Id: rex.c 203 2009-06-17 12:43:50Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -1014,18 +1014,26 @@ what if it is not in the raight format? convert it to ordinary characters?? */
{
NEXT_CHAR (builder, LEVEL_RANGE);
bound = 0;
while (builder->ptn.curc.type == CT_NORMAL &&
(builder->ptn.curc.value >= QSE_T('0') &&
builder->ptn.curc.value <= QSE_T('9')))
if (builder->ptn.curc.type == CT_NORMAL &&
(builder->ptn.curc.value >= QSE_T('0') &&
builder->ptn.curc.value <= QSE_T('9')))
{
bound = bound * 10 + builder->ptn.curc.value - QSE_T('0');
NEXT_CHAR (builder, LEVEL_RANGE);
}
bound = 0;
cmd->ubound = bound;
do
{
bound = bound * 10 + builder->ptn.curc.value - QSE_T('0');
NEXT_CHAR (builder, LEVEL_RANGE);
}
while (builder->ptn.curc.type == CT_NORMAL &&
(builder->ptn.curc.value >= QSE_T('0') &&
builder->ptn.curc.value <= QSE_T('9')));
cmd->ubound = bound;
}
else cmd->ubound = BOUND_MAX;
}
else cmd->ubound = BOUND_MAX;
else cmd->ubound = cmd->lbound;
if (cmd->lbound > cmd->ubound)
{

View File

@ -1,5 +1,5 @@
/*
* $Id: sed.c 195 2009-06-10 13:18:25Z hyunghwan.chung $
* $Id: sed.c 203 2009-06-17 12:43:50Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -1850,10 +1850,10 @@ static int write_str_to_file (
static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
qse_cstr_t mat;
qse_cstr_t mat, pmat;
int opt = 0, repl = 0, n;
qse_rex_errnum_t errnum;
const qse_char_t* cur_ptr, * str_ptr;
const qse_char_t* cur_ptr, * str_ptr, * str_end;
qse_size_t cur_len, str_len, m, i;
qse_size_t max_count, sub_count;
@ -1868,13 +1868,19 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
/* TODO: support different line end convension */
if (str_len > 0 && str_ptr[str_len-1] == QSE_T('\n')) str_len--;
str_end = str_ptr + str_len;
cur_ptr = str_ptr;
cur_len = str_len;
sub_count = 0;
max_count = (cmd->u.subst.g)? 0: cmd->u.subst.occ;
while (1)
pmat.ptr = QSE_NULL;
pmat.len = 0;
/* perform test when cur_ptr == str_end also because
* end of string($) needs to be tested */
while (cur_ptr <= str_end)
{
if (max_count == 0 || sub_count < max_count)
{
@ -1908,6 +1914,15 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
break;
}
if (mat.len == 0 &&
pmat.ptr != QSE_NULL &&
mat.ptr == pmat.ptr + pmat.len)
{
/* match length is 0 and the match is still at the
* end of the previous match */
goto skip_one_char;
}
if (max_count > 0 && sub_count + 1 != max_count)
{
m = qse_str_ncat (
@ -1967,6 +1982,23 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
sub_count++;
cur_len = cur_len - ((mat.ptr - cur_ptr) + mat.len);
cur_ptr = mat.ptr + mat.len;
pmat = mat;
if (mat.len == 0)
{
skip_one_char:
/* special treament is need if the match length is 0 */
m = qse_str_ncat (&sed->e.txt.subst, cur_ptr, 1);
if (m == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, 0);
return -1;
}
cur_ptr++; cur_len--;
}
}
if (str_len < QSE_STR_LEN(&sed->e.in.line))