fixed a bug in AWK and enhanced sed

- awk: fixed a bug of not handling ^ in gsub(), split(), and FS.
- sed: added code for y and s command
This commit is contained in:
hyung-hwan 2009-05-16 07:31:43 +00:00
parent b36f20a4a2
commit 164b3d9a98
12 changed files with 433 additions and 126 deletions

View File

@ -1,5 +1,5 @@
/*
* $Id: rex.h 127 2009-05-07 13:15:04Z hyunghwan.chung $
* $Id: rex.h 135 2009-05-15 13:31:43Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -76,24 +76,84 @@ enum qse_rex_errnum_t
QSE_REX_EEND, /* unexpected end of the pattern */
QSE_REX_EGARBAGE /* garbage after the pattern */
};
typedef enum qse_rex_errnum_t qse_rex_errnum_t;
typedef struct qse_rex_t qse_rex_t;
struct qse_rex_t
{
QSE_DEFINE_COMMON_FIELDS (rex)
qse_rex_errnum_t errnum;
int option;
struct
{
int build;
int match;
} depth;
void* code;
};
#ifdef __cplusplus
extern "C" {
#endif
QSE_DEFINE_COMMON_FUNCTIONS (rex)
qse_rex_t* qse_rex_open (
qse_mmgr_t* mmgr,
qse_size_t xtn
);
void qse_rex_close (
qse_rex_t* rex
);
int qse_rex_build (
qse_rex_t* rex,
const qse_char_t* ptn,
qse_size_t len
);
int qse_rex_match (
qse_rex_t* rex,
const qse_char_t* str,
qse_size_t len,
const qse_char_t* substr,
qse_size_t sublen,
qse_cstr_t* match
);
void* qse_buildrex (
qse_mmgr_t* mmgr, qse_size_t depth,
const qse_char_t* ptn, qse_size_t len, int* errnum);
qse_mmgr_t* mmgr,
qse_size_t depth,
const qse_char_t* ptn,
qse_size_t len,
qse_rex_errnum_t* errnum
);
int qse_matchrex (
qse_mmgr_t* mmgr, qse_size_t depth,
void* code, int option,
const qse_char_t* str, qse_size_t len,
const qse_char_t** match_ptr, qse_size_t* match_len, int* errnum);
qse_mmgr_t* mmgr,
qse_size_t depth,
void* code,
int option,
const qse_char_t* str,
qse_size_t len,
const qse_char_t* substr,
qse_size_t sublen,
qse_cstr_t* match,
qse_rex_errnum_t* errnum
);
void qse_freerex (qse_mmgr_t* mmgr, void* code);
void qse_freerex (
qse_mmgr_t* mmgr,
void* code
);
qse_bool_t qse_isemptyrex (void* code);
qse_bool_t qse_isemptyrex (
void* code
);
#if 0
void qse_dprintrex (qse_rex_t* rex, void* rex);

View File

@ -198,6 +198,7 @@ struct qse_sed_t
{
qse_lda_t appended;
qse_str_t held;
qse_str_t subst;
} text;
};

View File

@ -1,5 +1,5 @@
/*
* $Id: awk.h 127 2009-05-07 13:15:04Z hyunghwan.chung $
* $Id: awk.h 135 2009-05-15 13:31:43Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -379,7 +379,7 @@ struct qse_awk_rtx_t
#define QSE_AWK_ISEMPTYREX(awk,code) qse_isemptyrex(code)
#define QSE_AWK_BUILDREX(awk,ptn,len,errnum) \
qse_awk_buildrex(awk,ptn,len,errnum)
#define QSE_AWK_MATCHREX(awk,code,option,str,len,match_ptr,match_len,errnum) \
qse_awk_matchrex(awk,code,option,str,len,match_ptr,match_len,errnum)
#define QSE_AWK_MATCHREX(awk,code,option,str,len,substr,sublen,match,errnum) \
qse_awk_matchrex(awk,code,option,str,len,substr,sublen,match,errnum)
#endif

View File

@ -1,5 +1,5 @@
/*
* $Id: fnc.c 90 2009-03-01 09:58:19Z hyunghwan.chung $
* $Id: fnc.c 135 2009-05-15 13:31:43Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -583,7 +583,7 @@ static int fnc_split (
qse_size_t nargs;
qse_awk_val_t* a0, * a1, * a2, * t1, * t2, ** a1_ref;
qse_char_t* str, * str_free, * p, * tok;
qse_size_t str_len, str_left, tok_len;
qse_size_t str_len, str_left, tok_len, org_len;
qse_long_t num;
qse_char_t key[QSE_SIZEOF(qse_long_t)*8+2];
qse_size_t key_len;
@ -719,7 +719,6 @@ static int fnc_split (
QSE_AWK_FREE (run->awk, fs_free);
if (fs_rex_free != QSE_NULL)
QSE_AWK_FREEREX (run->awk, fs_rex_free);
/*qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);*/
return -1;
}
@ -727,7 +726,7 @@ static int fnc_split (
*a1_ref = t1;
qse_awk_rtx_refupval (run, *a1_ref);
p = str; str_left = str_len;
p = str; str_left = str_len; org_len = str_len;
num = 1;
while (p != QSE_NULL)
@ -739,8 +738,10 @@ static int fnc_split (
}
else
{
p = qse_awk_rtx_strxntokbyrex (run, p, str_len,
fs_rex, &tok, &tok_len, &errnum);
p = qse_awk_rtx_strxntokbyrex (
run, str, org_len, p, str_len,
fs_rex, &tok, &tok_len, &errnum
);
if (p == QSE_NULL && errnum != QSE_AWK_ENOERR)
{
if (str_free != QSE_NULL)
@ -919,8 +920,9 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
qse_char_t* a2_ptr_free = QSE_NULL;
void* rex = QSE_NULL;
int opt, n;
const qse_char_t* cur_ptr, * mat_ptr;
qse_size_t cur_len, mat_len, i, m;
qse_cstr_t mat;
const qse_char_t* cur_ptr;
qse_size_t cur_len, i, m;
qse_str_t new;
qse_long_t sub_count;
@ -1064,8 +1066,10 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
if (max_count == 0 || sub_count < max_count)
{
n = QSE_AWK_MATCHREX (
run->awk, rex, opt, cur_ptr, cur_len,
&mat_ptr, &mat_len, &run->errnum);
run->awk, rex, opt,
a2_ptr, a2_len,
cur_ptr, cur_len,
&mat, &run->errnum);
}
else n = 0;
@ -1092,7 +1096,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
}
if (qse_str_ncat (
&new, cur_ptr, mat_ptr - cur_ptr) == (qse_size_t)-1)
&new, cur_ptr, mat.ptr - cur_ptr) == (qse_size_t)-1)
{
FREE_A0_REX (run->awk, rex);
qse_str_fini (&new);
@ -1111,7 +1115,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
}
else if (a1_ptr[i] == QSE_T('&'))
{
m = qse_str_ncat (&new, mat_ptr, mat_len);
m = qse_str_ncat (&new, mat.ptr, mat.len);
}
else
{
@ -1128,8 +1132,8 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
}
sub_count++;
cur_len = cur_len - ((mat_ptr - cur_ptr) + mat_len);
cur_ptr = mat_ptr + mat_len;
cur_len = cur_len - ((mat.ptr - cur_ptr) + mat.len);
cur_ptr = mat.ptr + mat.len;
}
FREE_A0_REX (run->awk, rex);
@ -1218,8 +1222,7 @@ static int fnc_match (
qse_long_t idx;
void* rex;
int opt, n;
const qse_char_t* mat_ptr;
qse_size_t mat_len;
qse_cstr_t mat;
nargs = qse_awk_rtx_getnargs (run);
QSE_ASSERT (nargs == 2);
@ -1273,15 +1276,17 @@ static int fnc_match (
opt = (run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0;
n = QSE_AWK_MATCHREX (
run->awk, rex, opt, str0, len0,
&mat_ptr, &mat_len, &run->errnum);
run->awk, rex, opt,
str0, len0, str0, len0,
&mat, &run->errnum
);
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str0);
if (a1->type != QSE_AWK_VAL_REX) QSE_AWK_FREEREX (run->awk, rex);
if (n == -1) return -1;
idx = (n == 0)? 0: ((qse_long_t)(mat_ptr-str0) + 1);
idx = (n == 0)? 0: ((qse_long_t)(mat.ptr-str0) + 1);
a0 = qse_awk_rtx_makeintval (run, idx);
if (a0 == QSE_NULL)
@ -1293,7 +1298,7 @@ static int fnc_match (
qse_awk_rtx_refupval (run, a0);
a1 = qse_awk_rtx_makeintval (run,
((n == 0)? (qse_long_t)-1: (qse_long_t)mat_len));
((n == 0)? (qse_long_t)-1: (qse_long_t)mat.len));
if (a1 == QSE_NULL)
{
qse_awk_rtx_refdownval (run, a0);

View File

@ -1,5 +1,5 @@
/*
* $Id: misc.c 127 2009-05-07 13:15:04Z hyunghwan.chung $
* $Id: misc.c 135 2009-05-15 13:31:43Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -831,24 +831,24 @@ exit_loop:
}
qse_char_t* qse_awk_rtx_strxntokbyrex (
qse_awk_rtx_t* rtx, const qse_char_t* s, qse_size_t len,
qse_awk_rtx_t* rtx,
const qse_char_t* str, qse_size_t len,
const qse_char_t* substr, qse_size_t sublen,
void* rex, qse_char_t** tok, qse_size_t* tok_len, int* errnum)
{
int n;
qse_char_t* match_ptr;
qse_size_t match_len, i;
qse_size_t left = len;
const qse_char_t* ptr = s;
const qse_char_t* str_ptr = s;
qse_size_t str_len = len;
qse_size_t i, left = sublen;
const qse_char_t* ptr = substr;
const qse_char_t* str_ptr = substr;
qse_size_t str_len = sublen;
qse_cstr_t match;
while (len > 0)
while (sublen > 0)
{
n = QSE_AWK_MATCHREX (
rtx->awk, rex,
((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
ptr, left, (const qse_char_t**)&match_ptr, &match_len,
errnum);
str, len, ptr, left, &match, errnum);
if (n == -1) return QSE_NULL;
if (n == 0)
{
@ -862,7 +862,7 @@ qse_char_t* qse_awk_rtx_strxntokbyrex (
QSE_ASSERT (n == 1);
if (match_len == 0)
if (match.len == 0)
{
ptr++;
left--;
@ -870,20 +870,20 @@ qse_char_t* qse_awk_rtx_strxntokbyrex (
else if (rtx->awk->option & QSE_AWK_STRIPSPACES)
{
/* match at the beginning of the input string */
if (match_ptr == s)
if (match.ptr == substr)
{
for (i = 0; i < match_len; i++)
for (i = 0; i < match.len; i++)
{
if (!QSE_AWK_ISSPACE(rtx->awk, match_ptr[i]))
if (!QSE_AWK_ISSPACE(rtx->awk, match.ptr[i]))
goto exit_loop;
}
/* the match that are all spaces at the
* beginning of the input string is skipped */
ptr += match_len;
left -= match_len;
str_ptr = s + match_len;
str_len -= match_len;
ptr += match.len;
left -= match.len;
str_ptr = substr + match.len;
str_len -= match.len;
}
else break;
}
@ -891,7 +891,7 @@ qse_char_t* qse_awk_rtx_strxntokbyrex (
}
exit_loop:
if (len == 0)
if (sublen == 0)
{
*tok = (qse_char_t*)str_ptr;
*tok_len = str_len;
@ -900,14 +900,14 @@ exit_loop:
}
*tok = (qse_char_t*)str_ptr;
*tok_len = match_ptr - str_ptr;
*tok_len = match.ptr - str_ptr;
for (i = 0; i < match_len; i++)
for (i = 0; i < match.len; i++)
{
if (!QSE_AWK_ISSPACE(rtx->awk, match_ptr[i]))
if (!QSE_AWK_ISSPACE(rtx->awk, match.ptr[i]))
{
*errnum = QSE_AWK_ENOERR;
return match_ptr+match_len;
return (qse_char_t*)match.ptr+match.len;
}
}
@ -915,13 +915,13 @@ exit_loop:
if (rtx->awk->option & QSE_AWK_STRIPSPACES)
{
return (match_ptr+match_len >= s+len)?
QSE_NULL: (match_ptr+match_len);
return (match.ptr+match.len >= substr+sublen)?
QSE_NULL: ((qse_char_t*)match.ptr+match.len);
}
else
{
return (match_ptr+match_len > s+len)?
QSE_NULL: (match_ptr+match_len);
return (match.ptr+match.len > substr+sublen)?
QSE_NULL: ((qse_char_t*)match.ptr+match.len);
}
}
@ -944,7 +944,7 @@ exit_loop:
void* qse_awk_buildrex (
qse_awk_t* awk, const qse_char_t* ptn, qse_size_t len, int* errnum)
{
int err;
qse_rex_errnum_t err;
void* p;
p = qse_buildrex (
@ -956,13 +956,15 @@ void* qse_awk_buildrex (
int qse_awk_matchrex (
qse_awk_t* awk, void* code, int option,
const qse_char_t* str, qse_size_t len,
const qse_char_t** match_ptr, qse_size_t* match_len, int* errnum)
const qse_char_t* substr, qse_size_t sublen,
qse_cstr_t* match, int* errnum)
{
int err, x;
int x;
qse_rex_errnum_t err;
x = qse_matchrex (
awk->mmgr, awk->rex.depth.max.match,
code, option, str, len, match_ptr, match_len, &err);
code, option, str, len, substr, sublen, match, &err);
if (x < 0) *errnum = QSE_AWK_REXERRTOERR(err);
return x;
}

View File

@ -1,5 +1,5 @@
/*
* $Id: misc.h 75 2009-02-22 14:10:34Z hyunghwan.chung $
* $Id: misc.h 135 2009-05-15 13:31:43Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -42,8 +42,16 @@ qse_char_t* qse_awk_rtx_strxntok (
qse_char_t** tok, qse_size_t* tok_len);
qse_char_t* qse_awk_rtx_strxntokbyrex (
qse_awk_rtx_t* rtx, const qse_char_t* s, qse_size_t len,
void* rex, qse_char_t** tok, qse_size_t* tok_len, int* errnum);
qse_awk_rtx_t* rtx,
const qse_char_t* str,
qse_size_t len,
const qse_char_t* substr,
qse_size_t sublen,
void* rex,
qse_char_t** tok,
qse_size_t* tok_len,
int* errnum
);
void* qse_awk_buildrex (
@ -52,7 +60,8 @@ void* qse_awk_buildrex (
int qse_awk_matchrex (
qse_awk_t* awk, void* code, int option,
const qse_char_t* str, qse_size_t len,
const qse_char_t** match_ptr, qse_size_t* match_len, int* errnum);
const qse_char_t* substr, qse_size_t sublen,
qse_cstr_t* match, int* errnum);
#ifdef __cplusplus
}

View File

@ -1,5 +1,5 @@
/*
* $Id: rec.c 89 2009-02-28 15:27:03Z hyunghwan.chung $
* $Id: rec.c 135 2009-05-15 13:31:43Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -146,8 +146,13 @@ static int split_record (qse_awk_rtx_t* run)
}
else
{
p = qse_awk_rtx_strxntokbyrex (run, p, len,
run->gbl.fs, &tok, &tok_len, &errnum);
p = qse_awk_rtx_strxntokbyrex (
run,
QSE_STR_PTR(&run->inrec.line),
QSE_STR_LEN(&run->inrec.line),
p, len,
run->gbl.fs, &tok, &tok_len, &errnum
);
if (p == QSE_NULL && errnum != QSE_AWK_ENOERR)
{
if (fs_free != QSE_NULL)
@ -203,8 +208,13 @@ static int split_record (qse_awk_rtx_t* run)
}
else
{
p = qse_awk_rtx_strxntokbyrex (run, p, len,
run->gbl.fs, &tok, &tok_len, &errnum);
p = qse_awk_rtx_strxntokbyrex (
run,
QSE_STR_PTR(&run->inrec.line),
QSE_STR_LEN(&run->inrec.line),
p, len,
run->gbl.fs, &tok, &tok_len, &errnum
);
if (p == QSE_NULL && errnum != QSE_AWK_ENOERR)
{
if (fs_free != QSE_NULL)

View File

@ -1,5 +1,5 @@
/*
* $Id: rio.c 90 2009-03-01 09:58:19Z hyunghwan.chung $
* $Id: rio.c 135 2009-05-15 13:31:43Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -269,8 +269,7 @@ int qse_awk_rtx_readio (
* the buffer has been appened with the last character
* after the previous matchrex has failed */
const qse_char_t* match_ptr;
qse_size_t match_len;
qse_cstr_t match;
QSE_ASSERT (run->gbl.rs != QSE_NULL);
@ -278,7 +277,8 @@ int qse_awk_rtx_readio (
run->awk, run->gbl.rs,
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
&match_ptr, &match_len, &run->errnum);
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
&match, &run->errnum);
if (n == -1)
{
ret = -1;
@ -291,9 +291,9 @@ int qse_awk_rtx_readio (
* the current buffer */
QSE_ASSERT (
QSE_STR_PTR(buf) + QSE_STR_LEN(buf) ==
match_ptr + match_len);
match.ptr + match.len);
QSE_STR_LEN(buf) -= match_len;
QSE_STR_LEN(buf) -= match.len;
break;
}
}
@ -357,8 +357,7 @@ int qse_awk_rtx_readio (
}
else
{
const qse_char_t* match_ptr;
qse_size_t match_len;
qse_cstr_t match;
QSE_ASSERT (run->gbl.rs != QSE_NULL);
@ -366,7 +365,8 @@ int qse_awk_rtx_readio (
run->awk, run->gbl.rs,
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
&match_ptr, &match_len, &run->errnum);
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
&match, &run->errnum);
if (n == -1)
{
ret = -1;
@ -380,9 +380,9 @@ int qse_awk_rtx_readio (
* the current buffer */
QSE_ASSERT (
QSE_STR_PTR(buf) + QSE_STR_LEN(buf) ==
match_ptr + match_len);
match.ptr + match.len);
QSE_STR_LEN(buf) -= match_len;
QSE_STR_LEN(buf) -= match.len;
p->in.pos--; /* unread the character in c */
break;
}

View File

@ -1,5 +1,5 @@
/*
* $Id: run.c 127 2009-05-07 13:15:04Z hyunghwan.chung $
* $Id: run.c 135 2009-05-15 13:31:43Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -3099,7 +3099,9 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* run, qse_awk_nde_t* nde)
((((qse_awk_rtx_t*)run)->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
((qse_awk_val_str_t*)run->inrec.d0)->ptr,
((qse_awk_val_str_t*)run->inrec.d0)->len,
QSE_NULL, QSE_NULL, &errnum);
((qse_awk_val_str_t*)run->inrec.d0)->ptr,
((qse_awk_val_str_t*)run->inrec.d0)->len,
QSE_NULL, &errnum);
if (n == -1)
{
@ -4773,7 +4775,9 @@ static qse_awk_val_t* eval_binop_match0 (
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
((qse_awk_val_str_t*)left)->ptr,
((qse_awk_val_str_t*)left)->len,
QSE_NULL, QSE_NULL, &errnum);
((qse_awk_val_str_t*)left)->ptr,
((qse_awk_val_str_t*)left)->len,
QSE_NULL, &errnum);
if (n == -1)
{
if (right->type != QSE_AWK_VAL_REX)
@ -4810,7 +4814,8 @@ static qse_awk_val_t* eval_binop_match0 (
run->awk, rex_code,
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
out.u.cpldup.ptr, out.u.cpldup.len,
QSE_NULL, QSE_NULL, &errnum);
out.u.cpldup.ptr, out.u.cpldup.len,
QSE_NULL, &errnum);
if (n == -1)
{
QSE_AWK_FREE (run->awk, out.u.cpldup.ptr);

View File

@ -1,5 +1,5 @@
/*
* $Id: rex.c 127 2009-05-07 13:15:04Z hyunghwan.chung $
* $Id: rex.c 135 2009-05-15 13:31:43Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -107,7 +107,7 @@ struct builder_t
qse_size_t cur;
} depth;
int errnum;
qse_rex_errnum_t errnum;
};
struct matcher_t
@ -121,6 +121,12 @@ struct matcher_t
const qse_char_t* ptr;
const qse_char_t* end;
} str;
struct
{
const qse_char_t* ptr;
const qse_char_t* end;
} realstr;
} match;
struct
@ -130,7 +136,7 @@ struct matcher_t
} depth;
int ignorecase;
int errnum;
qse_rex_errnum_t errnum;
};
struct match_t
@ -331,9 +337,63 @@ static struct __char_class_t __char_class[] =
{ QSE_NULL, 0, QSE_NULL }
};
qse_rex_t* qse_rex_open (qse_mmgr_t* mmgr, qse_size_t xtn)
{
qse_rex_t* rex;
if (mmgr == QSE_NULL)
{
mmgr = QSE_MMGR_GETDFL();
QSE_ASSERTX (mmgr != QSE_NULL,
"Set the memory manager with QSE_MMGR_SETDFL()");
if (mmgr == QSE_NULL) return QSE_NULL;
}
rex = (qse_rex_t*) QSE_MMGR_ALLOC (mmgr, QSE_SIZEOF(qse_rex_t) + xtn);
if (rex == QSE_NULL) return QSE_NULL;
QSE_MEMSET (rex, 0, QSE_SIZEOF(*rex));
rex->mmgr = mmgr;
return rex;
}
void qse_rex_close (qse_rex_t* rex)
{
if (rex->code != QSE_NULL) qse_freerex (rex->mmgr, rex->code);
QSE_MMGR_FREE (rex->mmgr, rex);
}
int qse_rex_build (qse_rex_t* rex, const qse_char_t* ptn, qse_size_t len)
{
void* code;
code = qse_buildrex (
rex->mmgr, rex->depth.build,
ptn, len, &rex->errnum);
if (code == QSE_NULL) return -1;
if (rex->code != QSE_NULL) qse_freerex (rex->mmgr, rex->code);
rex->code = code;
return 0;
}
int qse_rex_match (
qse_rex_t* rex,
const qse_char_t* str, qse_size_t len,
const qse_char_t* substr, qse_size_t sublen, qse_cstr_t* match)
{
return qse_matchrex (
rex->mmgr, rex->depth.match, rex->code, rex->option,
str, len, substr, sublen, match, &rex->errnum);
}
void* qse_buildrex (
qse_mmgr_t* mmgr, qse_size_t depth,
const qse_char_t* ptn, qse_size_t len, int* errnum)
const qse_char_t* ptn, qse_size_t len, qse_rex_errnum_t* errnum)
{
builder_t builder;
@ -399,7 +459,8 @@ int qse_matchrex (
qse_mmgr_t* mmgr, qse_size_t depth,
void* code, int option,
const qse_char_t* str, qse_size_t len,
const qse_char_t** match_ptr, qse_size_t* match_len, int* errnum)
const qse_char_t* substr, qse_size_t sublen,
qse_cstr_t* match, qse_rex_errnum_t* errnum)
{
matcher_t matcher;
match_t mat;
@ -409,8 +470,11 @@ int qse_matchrex (
matcher.mmgr = mmgr;
/* store the source string */
matcher.match.str.ptr = str;
matcher.match.str.end = str + len;
matcher.match.str.ptr = substr;
matcher.match.str.end = substr + sublen;
matcher.match.realstr.ptr = str;
matcher.match.realstr.end = str + len;
matcher.depth.max = depth;
matcher.depth.cur = 0;
@ -418,7 +482,7 @@ int qse_matchrex (
mat.matched = QSE_FALSE;
/* TODO: should it allow an offset here??? */
mat.match_ptr = str + offset;
mat.match_ptr = substr + offset;
/*while (mat.match_ptr < matcher.match.str.end)*/
while (mat.match_ptr <= matcher.match.str.end)
@ -441,8 +505,11 @@ int qse_matchrex (
}
*/
if (match_ptr != QSE_NULL) *match_ptr = mat.match_ptr;
if (match_len != QSE_NULL) *match_len = mat.match_len;
if (match != QSE_NULL)
{
match->ptr = mat.match_ptr;
match->len = mat.match_len;
}
/*match_ptr_zero = QSE_NULL;*/
break;
@ -454,8 +521,11 @@ int qse_matchrex (
/*
if (match_ptr_zero != QSE_NULL)
{
if (match_ptr != QSE_NULL) *match_ptr = match_ptr_zero;
if (match_len != QSE_NULL) *match_len = 0;
if (match != QSE_NULL)
{
match->ptr = match_ptr_zero;
match->len = 0;
}
return 1;
}
*/
@ -1349,7 +1419,9 @@ static const qse_byte_t* match_bol (
cp = (const code_t*)p; p += QSE_SIZEOF(*cp);
QSE_ASSERT (cp->cmd == CMD_BOL);
mat->matched = (mat->match_ptr == matcher->match.str.ptr ||
/*mat->matched = (mat->match_ptr == matcher->match.str.ptr ||
(cp->lbound == cp->ubound && cp->lbound == 0));*/
mat->matched = (mat->match_ptr == matcher->match.realstr.ptr ||
(cp->lbound == cp->ubound && cp->lbound == 0));
mat->match_len = 0;
@ -1365,7 +1437,9 @@ static const qse_byte_t* match_eol (
cp = (const code_t*)p; p += QSE_SIZEOF(*cp);
QSE_ASSERT (cp->cmd == CMD_EOL);
mat->matched = (mat->match_ptr == matcher->match.str.end ||
/*mat->matched = (mat->match_ptr == matcher->match.str.end ||
(cp->lbound == cp->ubound && cp->lbound == 0));*/
mat->matched = (mat->match_ptr == matcher->match.realstr.end ||
(cp->lbound == cp->ubound && cp->lbound == 0));
mat->match_len = 0;

View File

@ -110,11 +110,22 @@ qse_sed_t* qse_sed_init (qse_sed_t* sed, qse_mmgr_t* mmgr)
}
if (qse_str_init (&sed->text.subst, mmgr, 256) == QSE_NULL)
{
qse_str_fini (&sed->text.held);
qse_lda_fini (&sed->text.appended);
QSE_MMGR_FREE (sed->mmgr, sed->cmd.buf);
qse_map_fini (&sed->labs);
qse_str_fini (&sed->rexbuf);
return QSE_NULL;
}
return sed;
}
void qse_sed_fini (qse_sed_t* sed)
{
qse_str_fini (&sed->text.subst);
qse_str_fini (&sed->text.held);
qse_lda_fini (&sed->text.appended);
@ -232,7 +243,7 @@ static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
QSE_MMGR_FREE (sed->mmgr, cmd->u.branch.label.ptr);
break;
case QSE_SED_CMD_S:
case QSE_SED_CMD_SUBSTITUTE:
if (cmd->u.subst.file.ptr != QSE_NULL)
QSE_MMGR_FREE (sed->mmgr, cmd->u.subst.file.ptr);
if (cmd->u.subst.rpl.ptr != QSE_NULL)
@ -241,7 +252,7 @@ static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
qse_freerex (sed->mmgr, cmd->u.subst.rex);
break;
case QSE_SED_CMD_Y:
case QSE_SED_CMD_TRANSLATE:
if (cmd->u.transet.ptr != QSE_NULL)
QSE_MMGR_FREE (sed->mmgr, cmd->u.transet.ptr);
break;
@ -1691,6 +1702,110 @@ static int write_str_to_file (
return 0;
}
static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
qse_cstr_t mat;
int opt = 0;
qse_rex_errnum_t errnum;
const qse_char_t* cur_ptr, * str_ptr;
qse_size_t cur_len, str_len, m, i;
qse_size_t max_count, sub_count;
QSE_ASSERT (cmd->type == QSE_SED_CMD_SUBSTITUTE);
qse_str_clear (&sed->text.subst);
if (cmd->u.subst.i) opt = QSE_REX_IGNORECASE;
str_ptr = QSE_STR_PTR(&sed->eio.in.line);
str_len = QSE_STR_LEN(&sed->eio.in.line);
/* TODO: support different line end scheme */
if (str_len > 0 && str_ptr[str_len-1] == QSE_T('\n')) str_len--;
cur_ptr = str_ptr;
cur_len = str_len;
sub_count = 0;
max_count = (cmd->u.subst.g)? 0: cmd->u.subst.occ;
while (1)
{
int n;
if (max_count == 0 || sub_count < max_count)
{
/* TODO: maximum match depth... */
n = qse_matchrex (
sed->mmgr, 0, cmd->u.subst.rex, opt,
str_ptr, str_len,
cur_ptr, cur_len,
&mat, &errnum
);
}
else n = 0;
if (n == -1)
{
sed->errnum = QSE_SED_EREXMA;
return -1;
}
if (n == 0)
{
/* no more match found */
if (qse_str_ncat (
&sed->text.subst,
cur_ptr, cur_len) == (qse_size_t)-1)
{
sed->errnum = QSE_SED_EREXMA;
return -1;
}
break;
}
m = qse_str_ncat (&sed->text.subst, cur_ptr, mat.ptr-cur_ptr);
if (m == (qse_size_t)-1)
{
sed->errnum = QSE_SED_EREXMA;
return -1;
}
for (i = 0; i < cmd->u.subst.rpl.len; i++)
{
if ((i+1) < cmd->u.subst.rpl.len &&
cmd->u.subst.rpl.ptr[i] == QSE_T('\\') &&
cmd->u.subst.rpl.ptr[i+1] == QSE_T('&'))
{
m = qse_str_ccat (&sed->text.subst, QSE_T('&'));
i++;
}
else if (cmd->u.subst.rpl.ptr[i] == QSE_T('&'))
{
m = qse_str_ncat (
&sed->text.subst, mat.ptr, mat.len);
}
else
{
m = qse_str_ccat (
&sed->text.subst, cmd->u.subst.rpl.ptr[i]);
}
if (m == (qse_size_t)-1)
{
sed->errnum = QSE_SED_EREXMA;
return -1;
}
}
sub_count++;
cur_len = cur_len - ((mat.ptr - cur_ptr) + mat.len);
cur_ptr = mat.ptr + mat.len;
}
qse_str_swap (&sed->eio.in.line, &sed->text.subst);
return 0;
}
static int match_a (qse_sed_t* sed, qse_sed_a_t* a)
{
switch (a->type)
@ -1700,10 +1815,11 @@ static int match_a (qse_sed_t* sed, qse_sed_a_t* a)
case QSE_SED_A_REX:
{
qse_str_t match;
int errnum, n;
int n;
qse_cstr_t match;
qse_str_t* line;
qse_size_t llen;
qse_rex_errnum_t errnum;
QSE_ASSERT (a->u.rex != QSE_NULL);
@ -1715,13 +1831,10 @@ static int match_a (qse_sed_t* sed, qse_sed_a_t* a)
QSE_STR_CHAR(line,llen-1) == QSE_T('\n')) llen--;
n = qse_matchrex (
sed->mmgr,
0,
a->u.rex,
0,
QSE_STR_PTR(line),
llen,
&match.ptr, &match.len, &errnum);
sed->mmgr, 0, a->u.rex, 0,
QSE_STR_PTR(line), llen,
QSE_STR_PTR(line), llen,
&match, &errnum);
if (n <= -1)
{
sed->errnum = QSE_SED_EREXMA;
@ -2047,8 +2160,7 @@ static qse_sed_cmd_t* exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd)
case QSE_SED_CMD_WRITE_FILELN:
{
const qse_char_t* ptr = QSE_STR_PTR(&sed->eio.in.line);
const qse_char_t* len = QSE_STR_LEN(&sed->eio.in.line);
qse_size_t i;
qse_size_t i, len = QSE_STR_LEN(&sed->eio.in.line);
for (i = 0; i < len; i++)
{
/* TODO: handle different line end scheme */
@ -2089,6 +2201,37 @@ static qse_sed_cmd_t* exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd)
jumpto = cmd->u.branch.target;
break;
case QSE_SED_CMD_TRANSLATE:
{
qse_char_t* ptr = QSE_STR_PTR(&sed->eio.in.line);
qse_size_t i, len = QSE_STR_LEN(&sed->eio.in.line);
/* TODO: sort cmd->u.transset and do binary search
* when sorted, you can, before binary search, check if ptr[i] < transet[0] || ptr[i] > transset[transset_size-1]. if so, it has not mathing translation */
/* TODO: support different line end scheme */
if (len > 0 && ptr[len-1] == QSE_T('\n')) len--;
for (i = 0; i < len; i++)
{
const qse_char_t* tptr = cmd->u.transet.ptr;
qse_size_t j, tlen = cmd->u.transet.len;
for (j = 0; j < tlen; j += 2)
{
if (ptr[i] == tptr[j])
{
ptr[i] = tptr[j+1];
break;
}
}
}
break;
}
case QSE_SED_CMD_SUBSTITUTE:
n = do_subst (sed, cmd);
if (n <= -1) return QSE_NULL;
break;
}
if (jumpto == NULL) jumpto = cmd + 1;

View File

@ -85,10 +85,8 @@ struct qse_sed_cmd_t
QSE_SED_CMD_WRITE_FILE = QSE_T('w'),
QSE_SED_CMD_WRITE_FILELN = QSE_T('W'),
/* s/regex/str/ - replace matching pattern with a new string */
QSE_SED_CMD_S = QSE_T('s'),
/* y/s/d/ - translate characters in s to characters in d */
QSE_SED_CMD_Y = QSE_T('y')
QSE_SED_CMD_SUBSTITUTE = QSE_T('s'),
QSE_SED_CMD_TRANSLATE = QSE_T('y')
} type;