fixed a bug in AWK and enhanced sed
- awk: fixed a bug of not handling ^ in gsub(), split(), and FS. - sed: added code for y and s command
This commit is contained in:
parent
b36f20a4a2
commit
164b3d9a98
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: rex.h 127 2009-05-07 13:15:04Z hyunghwan.chung $
|
||||
* $Id: rex.h 135 2009-05-15 13:31:43Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
|
||||
@ -76,24 +76,84 @@ enum qse_rex_errnum_t
|
||||
QSE_REX_EEND, /* unexpected end of the pattern */
|
||||
QSE_REX_EGARBAGE /* garbage after the pattern */
|
||||
};
|
||||
typedef enum qse_rex_errnum_t qse_rex_errnum_t;
|
||||
|
||||
typedef struct qse_rex_t qse_rex_t;
|
||||
|
||||
struct qse_rex_t
|
||||
{
|
||||
QSE_DEFINE_COMMON_FIELDS (rex)
|
||||
qse_rex_errnum_t errnum;
|
||||
int option;
|
||||
|
||||
struct
|
||||
{
|
||||
int build;
|
||||
int match;
|
||||
} depth;
|
||||
|
||||
void* code;
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
QSE_DEFINE_COMMON_FUNCTIONS (rex)
|
||||
|
||||
qse_rex_t* qse_rex_open (
|
||||
qse_mmgr_t* mmgr,
|
||||
qse_size_t xtn
|
||||
);
|
||||
|
||||
void qse_rex_close (
|
||||
qse_rex_t* rex
|
||||
);
|
||||
|
||||
int qse_rex_build (
|
||||
qse_rex_t* rex,
|
||||
const qse_char_t* ptn,
|
||||
qse_size_t len
|
||||
);
|
||||
|
||||
int qse_rex_match (
|
||||
qse_rex_t* rex,
|
||||
const qse_char_t* str,
|
||||
qse_size_t len,
|
||||
const qse_char_t* substr,
|
||||
qse_size_t sublen,
|
||||
qse_cstr_t* match
|
||||
);
|
||||
|
||||
void* qse_buildrex (
|
||||
qse_mmgr_t* mmgr, qse_size_t depth,
|
||||
const qse_char_t* ptn, qse_size_t len, int* errnum);
|
||||
qse_mmgr_t* mmgr,
|
||||
qse_size_t depth,
|
||||
const qse_char_t* ptn,
|
||||
qse_size_t len,
|
||||
qse_rex_errnum_t* errnum
|
||||
);
|
||||
|
||||
int qse_matchrex (
|
||||
qse_mmgr_t* mmgr, qse_size_t depth,
|
||||
void* code, int option,
|
||||
const qse_char_t* str, qse_size_t len,
|
||||
const qse_char_t** match_ptr, qse_size_t* match_len, int* errnum);
|
||||
qse_mmgr_t* mmgr,
|
||||
qse_size_t depth,
|
||||
void* code,
|
||||
int option,
|
||||
const qse_char_t* str,
|
||||
qse_size_t len,
|
||||
const qse_char_t* substr,
|
||||
qse_size_t sublen,
|
||||
qse_cstr_t* match,
|
||||
qse_rex_errnum_t* errnum
|
||||
);
|
||||
|
||||
void qse_freerex (qse_mmgr_t* mmgr, void* code);
|
||||
void qse_freerex (
|
||||
qse_mmgr_t* mmgr,
|
||||
void* code
|
||||
);
|
||||
|
||||
qse_bool_t qse_isemptyrex (void* code);
|
||||
qse_bool_t qse_isemptyrex (
|
||||
void* code
|
||||
);
|
||||
|
||||
#if 0
|
||||
void qse_dprintrex (qse_rex_t* rex, void* rex);
|
||||
|
@ -198,6 +198,7 @@ struct qse_sed_t
|
||||
{
|
||||
qse_lda_t appended;
|
||||
qse_str_t held;
|
||||
qse_str_t subst;
|
||||
} text;
|
||||
};
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: awk.h 127 2009-05-07 13:15:04Z hyunghwan.chung $
|
||||
* $Id: awk.h 135 2009-05-15 13:31:43Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
|
||||
@ -379,7 +379,7 @@ struct qse_awk_rtx_t
|
||||
#define QSE_AWK_ISEMPTYREX(awk,code) qse_isemptyrex(code)
|
||||
#define QSE_AWK_BUILDREX(awk,ptn,len,errnum) \
|
||||
qse_awk_buildrex(awk,ptn,len,errnum)
|
||||
#define QSE_AWK_MATCHREX(awk,code,option,str,len,match_ptr,match_len,errnum) \
|
||||
qse_awk_matchrex(awk,code,option,str,len,match_ptr,match_len,errnum)
|
||||
#define QSE_AWK_MATCHREX(awk,code,option,str,len,substr,sublen,match,errnum) \
|
||||
qse_awk_matchrex(awk,code,option,str,len,substr,sublen,match,errnum)
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: fnc.c 90 2009-03-01 09:58:19Z hyunghwan.chung $
|
||||
* $Id: fnc.c 135 2009-05-15 13:31:43Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
|
||||
@ -583,7 +583,7 @@ static int fnc_split (
|
||||
qse_size_t nargs;
|
||||
qse_awk_val_t* a0, * a1, * a2, * t1, * t2, ** a1_ref;
|
||||
qse_char_t* str, * str_free, * p, * tok;
|
||||
qse_size_t str_len, str_left, tok_len;
|
||||
qse_size_t str_len, str_left, tok_len, org_len;
|
||||
qse_long_t num;
|
||||
qse_char_t key[QSE_SIZEOF(qse_long_t)*8+2];
|
||||
qse_size_t key_len;
|
||||
@ -719,7 +719,6 @@ static int fnc_split (
|
||||
QSE_AWK_FREE (run->awk, fs_free);
|
||||
if (fs_rex_free != QSE_NULL)
|
||||
QSE_AWK_FREEREX (run->awk, fs_rex_free);
|
||||
/*qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);*/
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -727,7 +726,7 @@ static int fnc_split (
|
||||
*a1_ref = t1;
|
||||
qse_awk_rtx_refupval (run, *a1_ref);
|
||||
|
||||
p = str; str_left = str_len;
|
||||
p = str; str_left = str_len; org_len = str_len;
|
||||
num = 1;
|
||||
|
||||
while (p != QSE_NULL)
|
||||
@ -739,8 +738,10 @@ static int fnc_split (
|
||||
}
|
||||
else
|
||||
{
|
||||
p = qse_awk_rtx_strxntokbyrex (run, p, str_len,
|
||||
fs_rex, &tok, &tok_len, &errnum);
|
||||
p = qse_awk_rtx_strxntokbyrex (
|
||||
run, str, org_len, p, str_len,
|
||||
fs_rex, &tok, &tok_len, &errnum
|
||||
);
|
||||
if (p == QSE_NULL && errnum != QSE_AWK_ENOERR)
|
||||
{
|
||||
if (str_free != QSE_NULL)
|
||||
@ -919,8 +920,9 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
||||
qse_char_t* a2_ptr_free = QSE_NULL;
|
||||
void* rex = QSE_NULL;
|
||||
int opt, n;
|
||||
const qse_char_t* cur_ptr, * mat_ptr;
|
||||
qse_size_t cur_len, mat_len, i, m;
|
||||
qse_cstr_t mat;
|
||||
const qse_char_t* cur_ptr;
|
||||
qse_size_t cur_len, i, m;
|
||||
qse_str_t new;
|
||||
qse_long_t sub_count;
|
||||
|
||||
@ -1064,8 +1066,10 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
||||
if (max_count == 0 || sub_count < max_count)
|
||||
{
|
||||
n = QSE_AWK_MATCHREX (
|
||||
run->awk, rex, opt, cur_ptr, cur_len,
|
||||
&mat_ptr, &mat_len, &run->errnum);
|
||||
run->awk, rex, opt,
|
||||
a2_ptr, a2_len,
|
||||
cur_ptr, cur_len,
|
||||
&mat, &run->errnum);
|
||||
}
|
||||
else n = 0;
|
||||
|
||||
@ -1092,7 +1096,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
||||
}
|
||||
|
||||
if (qse_str_ncat (
|
||||
&new, cur_ptr, mat_ptr - cur_ptr) == (qse_size_t)-1)
|
||||
&new, cur_ptr, mat.ptr - cur_ptr) == (qse_size_t)-1)
|
||||
{
|
||||
FREE_A0_REX (run->awk, rex);
|
||||
qse_str_fini (&new);
|
||||
@ -1111,7 +1115,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
||||
}
|
||||
else if (a1_ptr[i] == QSE_T('&'))
|
||||
{
|
||||
m = qse_str_ncat (&new, mat_ptr, mat_len);
|
||||
m = qse_str_ncat (&new, mat.ptr, mat.len);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1128,8 +1132,8 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
||||
}
|
||||
|
||||
sub_count++;
|
||||
cur_len = cur_len - ((mat_ptr - cur_ptr) + mat_len);
|
||||
cur_ptr = mat_ptr + mat_len;
|
||||
cur_len = cur_len - ((mat.ptr - cur_ptr) + mat.len);
|
||||
cur_ptr = mat.ptr + mat.len;
|
||||
}
|
||||
|
||||
FREE_A0_REX (run->awk, rex);
|
||||
@ -1218,8 +1222,7 @@ static int fnc_match (
|
||||
qse_long_t idx;
|
||||
void* rex;
|
||||
int opt, n;
|
||||
const qse_char_t* mat_ptr;
|
||||
qse_size_t mat_len;
|
||||
qse_cstr_t mat;
|
||||
|
||||
nargs = qse_awk_rtx_getnargs (run);
|
||||
QSE_ASSERT (nargs == 2);
|
||||
@ -1273,15 +1276,17 @@ static int fnc_match (
|
||||
|
||||
opt = (run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0;
|
||||
n = QSE_AWK_MATCHREX (
|
||||
run->awk, rex, opt, str0, len0,
|
||||
&mat_ptr, &mat_len, &run->errnum);
|
||||
run->awk, rex, opt,
|
||||
str0, len0, str0, len0,
|
||||
&mat, &run->errnum
|
||||
);
|
||||
|
||||
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str0);
|
||||
if (a1->type != QSE_AWK_VAL_REX) QSE_AWK_FREEREX (run->awk, rex);
|
||||
|
||||
if (n == -1) return -1;
|
||||
|
||||
idx = (n == 0)? 0: ((qse_long_t)(mat_ptr-str0) + 1);
|
||||
idx = (n == 0)? 0: ((qse_long_t)(mat.ptr-str0) + 1);
|
||||
|
||||
a0 = qse_awk_rtx_makeintval (run, idx);
|
||||
if (a0 == QSE_NULL)
|
||||
@ -1293,7 +1298,7 @@ static int fnc_match (
|
||||
qse_awk_rtx_refupval (run, a0);
|
||||
|
||||
a1 = qse_awk_rtx_makeintval (run,
|
||||
((n == 0)? (qse_long_t)-1: (qse_long_t)mat_len));
|
||||
((n == 0)? (qse_long_t)-1: (qse_long_t)mat.len));
|
||||
if (a1 == QSE_NULL)
|
||||
{
|
||||
qse_awk_rtx_refdownval (run, a0);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: misc.c 127 2009-05-07 13:15:04Z hyunghwan.chung $
|
||||
* $Id: misc.c 135 2009-05-15 13:31:43Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
|
||||
@ -831,24 +831,24 @@ exit_loop:
|
||||
}
|
||||
|
||||
qse_char_t* qse_awk_rtx_strxntokbyrex (
|
||||
qse_awk_rtx_t* rtx, const qse_char_t* s, qse_size_t len,
|
||||
qse_awk_rtx_t* rtx,
|
||||
const qse_char_t* str, qse_size_t len,
|
||||
const qse_char_t* substr, qse_size_t sublen,
|
||||
void* rex, qse_char_t** tok, qse_size_t* tok_len, int* errnum)
|
||||
{
|
||||
int n;
|
||||
qse_char_t* match_ptr;
|
||||
qse_size_t match_len, i;
|
||||
qse_size_t left = len;
|
||||
const qse_char_t* ptr = s;
|
||||
const qse_char_t* str_ptr = s;
|
||||
qse_size_t str_len = len;
|
||||
qse_size_t i, left = sublen;
|
||||
const qse_char_t* ptr = substr;
|
||||
const qse_char_t* str_ptr = substr;
|
||||
qse_size_t str_len = sublen;
|
||||
qse_cstr_t match;
|
||||
|
||||
while (len > 0)
|
||||
while (sublen > 0)
|
||||
{
|
||||
n = QSE_AWK_MATCHREX (
|
||||
rtx->awk, rex,
|
||||
((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
|
||||
ptr, left, (const qse_char_t**)&match_ptr, &match_len,
|
||||
errnum);
|
||||
str, len, ptr, left, &match, errnum);
|
||||
if (n == -1) return QSE_NULL;
|
||||
if (n == 0)
|
||||
{
|
||||
@ -862,7 +862,7 @@ qse_char_t* qse_awk_rtx_strxntokbyrex (
|
||||
|
||||
QSE_ASSERT (n == 1);
|
||||
|
||||
if (match_len == 0)
|
||||
if (match.len == 0)
|
||||
{
|
||||
ptr++;
|
||||
left--;
|
||||
@ -870,28 +870,28 @@ qse_char_t* qse_awk_rtx_strxntokbyrex (
|
||||
else if (rtx->awk->option & QSE_AWK_STRIPSPACES)
|
||||
{
|
||||
/* match at the beginning of the input string */
|
||||
if (match_ptr == s)
|
||||
if (match.ptr == substr)
|
||||
{
|
||||
for (i = 0; i < match_len; i++)
|
||||
for (i = 0; i < match.len; i++)
|
||||
{
|
||||
if (!QSE_AWK_ISSPACE(rtx->awk, match_ptr[i]))
|
||||
if (!QSE_AWK_ISSPACE(rtx->awk, match.ptr[i]))
|
||||
goto exit_loop;
|
||||
}
|
||||
|
||||
/* the match that are all spaces at the
|
||||
* beginning of the input string is skipped */
|
||||
ptr += match_len;
|
||||
left -= match_len;
|
||||
str_ptr = s + match_len;
|
||||
str_len -= match_len;
|
||||
ptr += match.len;
|
||||
left -= match.len;
|
||||
str_ptr = substr + match.len;
|
||||
str_len -= match.len;
|
||||
}
|
||||
else break;
|
||||
else break;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
||||
exit_loop:
|
||||
if (len == 0)
|
||||
if (sublen == 0)
|
||||
{
|
||||
*tok = (qse_char_t*)str_ptr;
|
||||
*tok_len = str_len;
|
||||
@ -900,14 +900,14 @@ exit_loop:
|
||||
}
|
||||
|
||||
*tok = (qse_char_t*)str_ptr;
|
||||
*tok_len = match_ptr - str_ptr;
|
||||
*tok_len = match.ptr - str_ptr;
|
||||
|
||||
for (i = 0; i < match_len; i++)
|
||||
for (i = 0; i < match.len; i++)
|
||||
{
|
||||
if (!QSE_AWK_ISSPACE(rtx->awk, match_ptr[i]))
|
||||
if (!QSE_AWK_ISSPACE(rtx->awk, match.ptr[i]))
|
||||
{
|
||||
*errnum = QSE_AWK_ENOERR;
|
||||
return match_ptr+match_len;
|
||||
return (qse_char_t*)match.ptr+match.len;
|
||||
}
|
||||
}
|
||||
|
||||
@ -915,13 +915,13 @@ exit_loop:
|
||||
|
||||
if (rtx->awk->option & QSE_AWK_STRIPSPACES)
|
||||
{
|
||||
return (match_ptr+match_len >= s+len)?
|
||||
QSE_NULL: (match_ptr+match_len);
|
||||
return (match.ptr+match.len >= substr+sublen)?
|
||||
QSE_NULL: ((qse_char_t*)match.ptr+match.len);
|
||||
}
|
||||
else
|
||||
{
|
||||
return (match_ptr+match_len > s+len)?
|
||||
QSE_NULL: (match_ptr+match_len);
|
||||
return (match.ptr+match.len > substr+sublen)?
|
||||
QSE_NULL: ((qse_char_t*)match.ptr+match.len);
|
||||
}
|
||||
}
|
||||
|
||||
@ -944,7 +944,7 @@ exit_loop:
|
||||
void* qse_awk_buildrex (
|
||||
qse_awk_t* awk, const qse_char_t* ptn, qse_size_t len, int* errnum)
|
||||
{
|
||||
int err;
|
||||
qse_rex_errnum_t err;
|
||||
void* p;
|
||||
|
||||
p = qse_buildrex (
|
||||
@ -956,13 +956,15 @@ void* qse_awk_buildrex (
|
||||
int qse_awk_matchrex (
|
||||
qse_awk_t* awk, void* code, int option,
|
||||
const qse_char_t* str, qse_size_t len,
|
||||
const qse_char_t** match_ptr, qse_size_t* match_len, int* errnum)
|
||||
const qse_char_t* substr, qse_size_t sublen,
|
||||
qse_cstr_t* match, int* errnum)
|
||||
{
|
||||
int err, x;
|
||||
int x;
|
||||
qse_rex_errnum_t err;
|
||||
|
||||
x = qse_matchrex (
|
||||
awk->mmgr, awk->rex.depth.max.match,
|
||||
code, option, str, len, match_ptr, match_len, &err);
|
||||
code, option, str, len, substr, sublen, match, &err);
|
||||
if (x < 0) *errnum = QSE_AWK_REXERRTOERR(err);
|
||||
return x;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: misc.h 75 2009-02-22 14:10:34Z hyunghwan.chung $
|
||||
* $Id: misc.h 135 2009-05-15 13:31:43Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
|
||||
@ -42,8 +42,16 @@ qse_char_t* qse_awk_rtx_strxntok (
|
||||
qse_char_t** tok, qse_size_t* tok_len);
|
||||
|
||||
qse_char_t* qse_awk_rtx_strxntokbyrex (
|
||||
qse_awk_rtx_t* rtx, const qse_char_t* s, qse_size_t len,
|
||||
void* rex, qse_char_t** tok, qse_size_t* tok_len, int* errnum);
|
||||
qse_awk_rtx_t* rtx,
|
||||
const qse_char_t* str,
|
||||
qse_size_t len,
|
||||
const qse_char_t* substr,
|
||||
qse_size_t sublen,
|
||||
void* rex,
|
||||
qse_char_t** tok,
|
||||
qse_size_t* tok_len,
|
||||
int* errnum
|
||||
);
|
||||
|
||||
|
||||
void* qse_awk_buildrex (
|
||||
@ -52,7 +60,8 @@ void* qse_awk_buildrex (
|
||||
int qse_awk_matchrex (
|
||||
qse_awk_t* awk, void* code, int option,
|
||||
const qse_char_t* str, qse_size_t len,
|
||||
const qse_char_t** match_ptr, qse_size_t* match_len, int* errnum);
|
||||
const qse_char_t* substr, qse_size_t sublen,
|
||||
qse_cstr_t* match, int* errnum);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: rec.c 89 2009-02-28 15:27:03Z hyunghwan.chung $
|
||||
* $Id: rec.c 135 2009-05-15 13:31:43Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
|
||||
@ -146,8 +146,13 @@ static int split_record (qse_awk_rtx_t* run)
|
||||
}
|
||||
else
|
||||
{
|
||||
p = qse_awk_rtx_strxntokbyrex (run, p, len,
|
||||
run->gbl.fs, &tok, &tok_len, &errnum);
|
||||
p = qse_awk_rtx_strxntokbyrex (
|
||||
run,
|
||||
QSE_STR_PTR(&run->inrec.line),
|
||||
QSE_STR_LEN(&run->inrec.line),
|
||||
p, len,
|
||||
run->gbl.fs, &tok, &tok_len, &errnum
|
||||
);
|
||||
if (p == QSE_NULL && errnum != QSE_AWK_ENOERR)
|
||||
{
|
||||
if (fs_free != QSE_NULL)
|
||||
@ -203,8 +208,13 @@ static int split_record (qse_awk_rtx_t* run)
|
||||
}
|
||||
else
|
||||
{
|
||||
p = qse_awk_rtx_strxntokbyrex (run, p, len,
|
||||
run->gbl.fs, &tok, &tok_len, &errnum);
|
||||
p = qse_awk_rtx_strxntokbyrex (
|
||||
run,
|
||||
QSE_STR_PTR(&run->inrec.line),
|
||||
QSE_STR_LEN(&run->inrec.line),
|
||||
p, len,
|
||||
run->gbl.fs, &tok, &tok_len, &errnum
|
||||
);
|
||||
if (p == QSE_NULL && errnum != QSE_AWK_ENOERR)
|
||||
{
|
||||
if (fs_free != QSE_NULL)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: rio.c 90 2009-03-01 09:58:19Z hyunghwan.chung $
|
||||
* $Id: rio.c 135 2009-05-15 13:31:43Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
|
||||
@ -269,8 +269,7 @@ int qse_awk_rtx_readio (
|
||||
* the buffer has been appened with the last character
|
||||
* after the previous matchrex has failed */
|
||||
|
||||
const qse_char_t* match_ptr;
|
||||
qse_size_t match_len;
|
||||
qse_cstr_t match;
|
||||
|
||||
QSE_ASSERT (run->gbl.rs != QSE_NULL);
|
||||
|
||||
@ -278,7 +277,8 @@ int qse_awk_rtx_readio (
|
||||
run->awk, run->gbl.rs,
|
||||
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
|
||||
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
|
||||
&match_ptr, &match_len, &run->errnum);
|
||||
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
|
||||
&match, &run->errnum);
|
||||
if (n == -1)
|
||||
{
|
||||
ret = -1;
|
||||
@ -291,9 +291,9 @@ int qse_awk_rtx_readio (
|
||||
* the current buffer */
|
||||
QSE_ASSERT (
|
||||
QSE_STR_PTR(buf) + QSE_STR_LEN(buf) ==
|
||||
match_ptr + match_len);
|
||||
match.ptr + match.len);
|
||||
|
||||
QSE_STR_LEN(buf) -= match_len;
|
||||
QSE_STR_LEN(buf) -= match.len;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -357,8 +357,7 @@ int qse_awk_rtx_readio (
|
||||
}
|
||||
else
|
||||
{
|
||||
const qse_char_t* match_ptr;
|
||||
qse_size_t match_len;
|
||||
qse_cstr_t match;
|
||||
|
||||
QSE_ASSERT (run->gbl.rs != QSE_NULL);
|
||||
|
||||
@ -366,7 +365,8 @@ int qse_awk_rtx_readio (
|
||||
run->awk, run->gbl.rs,
|
||||
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
|
||||
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
|
||||
&match_ptr, &match_len, &run->errnum);
|
||||
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
|
||||
&match, &run->errnum);
|
||||
if (n == -1)
|
||||
{
|
||||
ret = -1;
|
||||
@ -380,9 +380,9 @@ int qse_awk_rtx_readio (
|
||||
* the current buffer */
|
||||
QSE_ASSERT (
|
||||
QSE_STR_PTR(buf) + QSE_STR_LEN(buf) ==
|
||||
match_ptr + match_len);
|
||||
match.ptr + match.len);
|
||||
|
||||
QSE_STR_LEN(buf) -= match_len;
|
||||
QSE_STR_LEN(buf) -= match.len;
|
||||
p->in.pos--; /* unread the character in c */
|
||||
break;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: run.c 127 2009-05-07 13:15:04Z hyunghwan.chung $
|
||||
* $Id: run.c 135 2009-05-15 13:31:43Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
|
||||
@ -3099,7 +3099,9 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* run, qse_awk_nde_t* nde)
|
||||
((((qse_awk_rtx_t*)run)->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
|
||||
((qse_awk_val_str_t*)run->inrec.d0)->ptr,
|
||||
((qse_awk_val_str_t*)run->inrec.d0)->len,
|
||||
QSE_NULL, QSE_NULL, &errnum);
|
||||
((qse_awk_val_str_t*)run->inrec.d0)->ptr,
|
||||
((qse_awk_val_str_t*)run->inrec.d0)->len,
|
||||
QSE_NULL, &errnum);
|
||||
|
||||
if (n == -1)
|
||||
{
|
||||
@ -4773,7 +4775,9 @@ static qse_awk_val_t* eval_binop_match0 (
|
||||
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
|
||||
((qse_awk_val_str_t*)left)->ptr,
|
||||
((qse_awk_val_str_t*)left)->len,
|
||||
QSE_NULL, QSE_NULL, &errnum);
|
||||
((qse_awk_val_str_t*)left)->ptr,
|
||||
((qse_awk_val_str_t*)left)->len,
|
||||
QSE_NULL, &errnum);
|
||||
if (n == -1)
|
||||
{
|
||||
if (right->type != QSE_AWK_VAL_REX)
|
||||
@ -4810,7 +4814,8 @@ static qse_awk_val_t* eval_binop_match0 (
|
||||
run->awk, rex_code,
|
||||
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
|
||||
out.u.cpldup.ptr, out.u.cpldup.len,
|
||||
QSE_NULL, QSE_NULL, &errnum);
|
||||
out.u.cpldup.ptr, out.u.cpldup.len,
|
||||
QSE_NULL, &errnum);
|
||||
if (n == -1)
|
||||
{
|
||||
QSE_AWK_FREE (run->awk, out.u.cpldup.ptr);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: rex.c 127 2009-05-07 13:15:04Z hyunghwan.chung $
|
||||
* $Id: rex.c 135 2009-05-15 13:31:43Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
|
||||
@ -107,7 +107,7 @@ struct builder_t
|
||||
qse_size_t cur;
|
||||
} depth;
|
||||
|
||||
int errnum;
|
||||
qse_rex_errnum_t errnum;
|
||||
};
|
||||
|
||||
struct matcher_t
|
||||
@ -121,6 +121,12 @@ struct matcher_t
|
||||
const qse_char_t* ptr;
|
||||
const qse_char_t* end;
|
||||
} str;
|
||||
|
||||
struct
|
||||
{
|
||||
const qse_char_t* ptr;
|
||||
const qse_char_t* end;
|
||||
} realstr;
|
||||
} match;
|
||||
|
||||
struct
|
||||
@ -130,7 +136,7 @@ struct matcher_t
|
||||
} depth;
|
||||
|
||||
int ignorecase;
|
||||
int errnum;
|
||||
qse_rex_errnum_t errnum;
|
||||
};
|
||||
|
||||
struct match_t
|
||||
@ -331,9 +337,63 @@ static struct __char_class_t __char_class[] =
|
||||
{ QSE_NULL, 0, QSE_NULL }
|
||||
};
|
||||
|
||||
qse_rex_t* qse_rex_open (qse_mmgr_t* mmgr, qse_size_t xtn)
|
||||
{
|
||||
qse_rex_t* rex;
|
||||
|
||||
if (mmgr == QSE_NULL)
|
||||
{
|
||||
mmgr = QSE_MMGR_GETDFL();
|
||||
|
||||
QSE_ASSERTX (mmgr != QSE_NULL,
|
||||
"Set the memory manager with QSE_MMGR_SETDFL()");
|
||||
|
||||
if (mmgr == QSE_NULL) return QSE_NULL;
|
||||
}
|
||||
|
||||
rex = (qse_rex_t*) QSE_MMGR_ALLOC (mmgr, QSE_SIZEOF(qse_rex_t) + xtn);
|
||||
if (rex == QSE_NULL) return QSE_NULL;
|
||||
|
||||
QSE_MEMSET (rex, 0, QSE_SIZEOF(*rex));
|
||||
rex->mmgr = mmgr;
|
||||
|
||||
return rex;
|
||||
}
|
||||
|
||||
void qse_rex_close (qse_rex_t* rex)
|
||||
{
|
||||
if (rex->code != QSE_NULL) qse_freerex (rex->mmgr, rex->code);
|
||||
QSE_MMGR_FREE (rex->mmgr, rex);
|
||||
}
|
||||
|
||||
int qse_rex_build (qse_rex_t* rex, const qse_char_t* ptn, qse_size_t len)
|
||||
{
|
||||
void* code;
|
||||
|
||||
code = qse_buildrex (
|
||||
rex->mmgr, rex->depth.build,
|
||||
ptn, len, &rex->errnum);
|
||||
if (code == QSE_NULL) return -1;
|
||||
|
||||
if (rex->code != QSE_NULL) qse_freerex (rex->mmgr, rex->code);
|
||||
rex->code = code;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int qse_rex_match (
|
||||
qse_rex_t* rex,
|
||||
const qse_char_t* str, qse_size_t len,
|
||||
const qse_char_t* substr, qse_size_t sublen, qse_cstr_t* match)
|
||||
{
|
||||
return qse_matchrex (
|
||||
rex->mmgr, rex->depth.match, rex->code, rex->option,
|
||||
str, len, substr, sublen, match, &rex->errnum);
|
||||
}
|
||||
|
||||
void* qse_buildrex (
|
||||
qse_mmgr_t* mmgr, qse_size_t depth,
|
||||
const qse_char_t* ptn, qse_size_t len, int* errnum)
|
||||
const qse_char_t* ptn, qse_size_t len, qse_rex_errnum_t* errnum)
|
||||
{
|
||||
builder_t builder;
|
||||
|
||||
@ -399,7 +459,8 @@ int qse_matchrex (
|
||||
qse_mmgr_t* mmgr, qse_size_t depth,
|
||||
void* code, int option,
|
||||
const qse_char_t* str, qse_size_t len,
|
||||
const qse_char_t** match_ptr, qse_size_t* match_len, int* errnum)
|
||||
const qse_char_t* substr, qse_size_t sublen,
|
||||
qse_cstr_t* match, qse_rex_errnum_t* errnum)
|
||||
{
|
||||
matcher_t matcher;
|
||||
match_t mat;
|
||||
@ -409,8 +470,11 @@ int qse_matchrex (
|
||||
matcher.mmgr = mmgr;
|
||||
|
||||
/* store the source string */
|
||||
matcher.match.str.ptr = str;
|
||||
matcher.match.str.end = str + len;
|
||||
matcher.match.str.ptr = substr;
|
||||
matcher.match.str.end = substr + sublen;
|
||||
|
||||
matcher.match.realstr.ptr = str;
|
||||
matcher.match.realstr.end = str + len;
|
||||
|
||||
matcher.depth.max = depth;
|
||||
matcher.depth.cur = 0;
|
||||
@ -418,7 +482,7 @@ int qse_matchrex (
|
||||
|
||||
mat.matched = QSE_FALSE;
|
||||
/* TODO: should it allow an offset here??? */
|
||||
mat.match_ptr = str + offset;
|
||||
mat.match_ptr = substr + offset;
|
||||
|
||||
/*while (mat.match_ptr < matcher.match.str.end)*/
|
||||
while (mat.match_ptr <= matcher.match.str.end)
|
||||
@ -441,8 +505,11 @@ int qse_matchrex (
|
||||
}
|
||||
*/
|
||||
|
||||
if (match_ptr != QSE_NULL) *match_ptr = mat.match_ptr;
|
||||
if (match_len != QSE_NULL) *match_len = mat.match_len;
|
||||
if (match != QSE_NULL)
|
||||
{
|
||||
match->ptr = mat.match_ptr;
|
||||
match->len = mat.match_len;
|
||||
}
|
||||
|
||||
/*match_ptr_zero = QSE_NULL;*/
|
||||
break;
|
||||
@ -454,8 +521,11 @@ int qse_matchrex (
|
||||
/*
|
||||
if (match_ptr_zero != QSE_NULL)
|
||||
{
|
||||
if (match_ptr != QSE_NULL) *match_ptr = match_ptr_zero;
|
||||
if (match_len != QSE_NULL) *match_len = 0;
|
||||
if (match != QSE_NULL)
|
||||
{
|
||||
match->ptr = match_ptr_zero;
|
||||
match->len = 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
*/
|
||||
@ -1349,7 +1419,9 @@ static const qse_byte_t* match_bol (
|
||||
cp = (const code_t*)p; p += QSE_SIZEOF(*cp);
|
||||
QSE_ASSERT (cp->cmd == CMD_BOL);
|
||||
|
||||
mat->matched = (mat->match_ptr == matcher->match.str.ptr ||
|
||||
/*mat->matched = (mat->match_ptr == matcher->match.str.ptr ||
|
||||
(cp->lbound == cp->ubound && cp->lbound == 0));*/
|
||||
mat->matched = (mat->match_ptr == matcher->match.realstr.ptr ||
|
||||
(cp->lbound == cp->ubound && cp->lbound == 0));
|
||||
mat->match_len = 0;
|
||||
|
||||
@ -1365,7 +1437,9 @@ static const qse_byte_t* match_eol (
|
||||
cp = (const code_t*)p; p += QSE_SIZEOF(*cp);
|
||||
QSE_ASSERT (cp->cmd == CMD_EOL);
|
||||
|
||||
mat->matched = (mat->match_ptr == matcher->match.str.end ||
|
||||
/*mat->matched = (mat->match_ptr == matcher->match.str.end ||
|
||||
(cp->lbound == cp->ubound && cp->lbound == 0));*/
|
||||
mat->matched = (mat->match_ptr == matcher->match.realstr.end ||
|
||||
(cp->lbound == cp->ubound && cp->lbound == 0));
|
||||
mat->match_len = 0;
|
||||
|
||||
|
@ -110,11 +110,22 @@ qse_sed_t* qse_sed_init (qse_sed_t* sed, qse_mmgr_t* mmgr)
|
||||
|
||||
}
|
||||
|
||||
if (qse_str_init (&sed->text.subst, mmgr, 256) == QSE_NULL)
|
||||
{
|
||||
qse_str_fini (&sed->text.held);
|
||||
qse_lda_fini (&sed->text.appended);
|
||||
QSE_MMGR_FREE (sed->mmgr, sed->cmd.buf);
|
||||
qse_map_fini (&sed->labs);
|
||||
qse_str_fini (&sed->rexbuf);
|
||||
return QSE_NULL;
|
||||
}
|
||||
|
||||
return sed;
|
||||
}
|
||||
|
||||
void qse_sed_fini (qse_sed_t* sed)
|
||||
{
|
||||
qse_str_fini (&sed->text.subst);
|
||||
qse_str_fini (&sed->text.held);
|
||||
qse_lda_fini (&sed->text.appended);
|
||||
|
||||
@ -232,7 +243,7 @@ static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
QSE_MMGR_FREE (sed->mmgr, cmd->u.branch.label.ptr);
|
||||
break;
|
||||
|
||||
case QSE_SED_CMD_S:
|
||||
case QSE_SED_CMD_SUBSTITUTE:
|
||||
if (cmd->u.subst.file.ptr != QSE_NULL)
|
||||
QSE_MMGR_FREE (sed->mmgr, cmd->u.subst.file.ptr);
|
||||
if (cmd->u.subst.rpl.ptr != QSE_NULL)
|
||||
@ -241,7 +252,7 @@ static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
qse_freerex (sed->mmgr, cmd->u.subst.rex);
|
||||
break;
|
||||
|
||||
case QSE_SED_CMD_Y:
|
||||
case QSE_SED_CMD_TRANSLATE:
|
||||
if (cmd->u.transet.ptr != QSE_NULL)
|
||||
QSE_MMGR_FREE (sed->mmgr, cmd->u.transet.ptr);
|
||||
break;
|
||||
@ -1691,6 +1702,110 @@ static int write_str_to_file (
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
{
|
||||
qse_cstr_t mat;
|
||||
int opt = 0;
|
||||
qse_rex_errnum_t errnum;
|
||||
const qse_char_t* cur_ptr, * str_ptr;
|
||||
qse_size_t cur_len, str_len, m, i;
|
||||
qse_size_t max_count, sub_count;
|
||||
|
||||
QSE_ASSERT (cmd->type == QSE_SED_CMD_SUBSTITUTE);
|
||||
|
||||
qse_str_clear (&sed->text.subst);
|
||||
if (cmd->u.subst.i) opt = QSE_REX_IGNORECASE;
|
||||
|
||||
str_ptr = QSE_STR_PTR(&sed->eio.in.line);
|
||||
str_len = QSE_STR_LEN(&sed->eio.in.line);
|
||||
|
||||
/* TODO: support different line end scheme */
|
||||
if (str_len > 0 && str_ptr[str_len-1] == QSE_T('\n')) str_len--;
|
||||
|
||||
cur_ptr = str_ptr;
|
||||
cur_len = str_len;
|
||||
|
||||
sub_count = 0;
|
||||
max_count = (cmd->u.subst.g)? 0: cmd->u.subst.occ;
|
||||
|
||||
while (1)
|
||||
{
|
||||
int n;
|
||||
|
||||
if (max_count == 0 || sub_count < max_count)
|
||||
{
|
||||
/* TODO: maximum match depth... */
|
||||
n = qse_matchrex (
|
||||
sed->mmgr, 0, cmd->u.subst.rex, opt,
|
||||
str_ptr, str_len,
|
||||
cur_ptr, cur_len,
|
||||
&mat, &errnum
|
||||
);
|
||||
}
|
||||
else n = 0;
|
||||
|
||||
if (n == -1)
|
||||
{
|
||||
sed->errnum = QSE_SED_EREXMA;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (n == 0)
|
||||
{
|
||||
/* no more match found */
|
||||
if (qse_str_ncat (
|
||||
&sed->text.subst,
|
||||
cur_ptr, cur_len) == (qse_size_t)-1)
|
||||
{
|
||||
sed->errnum = QSE_SED_EREXMA;
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
m = qse_str_ncat (&sed->text.subst, cur_ptr, mat.ptr-cur_ptr);
|
||||
if (m == (qse_size_t)-1)
|
||||
{
|
||||
sed->errnum = QSE_SED_EREXMA;
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < cmd->u.subst.rpl.len; i++)
|
||||
{
|
||||
if ((i+1) < cmd->u.subst.rpl.len &&
|
||||
cmd->u.subst.rpl.ptr[i] == QSE_T('\\') &&
|
||||
cmd->u.subst.rpl.ptr[i+1] == QSE_T('&'))
|
||||
{
|
||||
m = qse_str_ccat (&sed->text.subst, QSE_T('&'));
|
||||
i++;
|
||||
}
|
||||
else if (cmd->u.subst.rpl.ptr[i] == QSE_T('&'))
|
||||
{
|
||||
m = qse_str_ncat (
|
||||
&sed->text.subst, mat.ptr, mat.len);
|
||||
}
|
||||
else
|
||||
{
|
||||
m = qse_str_ccat (
|
||||
&sed->text.subst, cmd->u.subst.rpl.ptr[i]);
|
||||
}
|
||||
|
||||
if (m == (qse_size_t)-1)
|
||||
{
|
||||
sed->errnum = QSE_SED_EREXMA;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
sub_count++;
|
||||
cur_len = cur_len - ((mat.ptr - cur_ptr) + mat.len);
|
||||
cur_ptr = mat.ptr + mat.len;
|
||||
}
|
||||
|
||||
qse_str_swap (&sed->eio.in.line, &sed->text.subst);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int match_a (qse_sed_t* sed, qse_sed_a_t* a)
|
||||
{
|
||||
switch (a->type)
|
||||
@ -1700,10 +1815,11 @@ static int match_a (qse_sed_t* sed, qse_sed_a_t* a)
|
||||
|
||||
case QSE_SED_A_REX:
|
||||
{
|
||||
qse_str_t match;
|
||||
int errnum, n;
|
||||
int n;
|
||||
qse_cstr_t match;
|
||||
qse_str_t* line;
|
||||
qse_size_t llen;
|
||||
qse_rex_errnum_t errnum;
|
||||
|
||||
QSE_ASSERT (a->u.rex != QSE_NULL);
|
||||
|
||||
@ -1715,13 +1831,10 @@ static int match_a (qse_sed_t* sed, qse_sed_a_t* a)
|
||||
QSE_STR_CHAR(line,llen-1) == QSE_T('\n')) llen--;
|
||||
|
||||
n = qse_matchrex (
|
||||
sed->mmgr,
|
||||
0,
|
||||
a->u.rex,
|
||||
0,
|
||||
QSE_STR_PTR(line),
|
||||
llen,
|
||||
&match.ptr, &match.len, &errnum);
|
||||
sed->mmgr, 0, a->u.rex, 0,
|
||||
QSE_STR_PTR(line), llen,
|
||||
QSE_STR_PTR(line), llen,
|
||||
&match, &errnum);
|
||||
if (n <= -1)
|
||||
{
|
||||
sed->errnum = QSE_SED_EREXMA;
|
||||
@ -2047,8 +2160,7 @@ static qse_sed_cmd_t* exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
case QSE_SED_CMD_WRITE_FILELN:
|
||||
{
|
||||
const qse_char_t* ptr = QSE_STR_PTR(&sed->eio.in.line);
|
||||
const qse_char_t* len = QSE_STR_LEN(&sed->eio.in.line);
|
||||
qse_size_t i;
|
||||
qse_size_t i, len = QSE_STR_LEN(&sed->eio.in.line);
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
/* TODO: handle different line end scheme */
|
||||
@ -2089,6 +2201,37 @@ static qse_sed_cmd_t* exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
|
||||
jumpto = cmd->u.branch.target;
|
||||
break;
|
||||
|
||||
case QSE_SED_CMD_TRANSLATE:
|
||||
{
|
||||
qse_char_t* ptr = QSE_STR_PTR(&sed->eio.in.line);
|
||||
qse_size_t i, len = QSE_STR_LEN(&sed->eio.in.line);
|
||||
|
||||
/* TODO: sort cmd->u.transset and do binary search
|
||||
* when sorted, you can, before binary search, check if ptr[i] < transet[0] || ptr[i] > transset[transset_size-1]. if so, it has not mathing translation */
|
||||
/* TODO: support different line end scheme */
|
||||
if (len > 0 && ptr[len-1] == QSE_T('\n')) len--;
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
const qse_char_t* tptr = cmd->u.transet.ptr;
|
||||
qse_size_t j, tlen = cmd->u.transet.len;
|
||||
for (j = 0; j < tlen; j += 2)
|
||||
{
|
||||
if (ptr[i] == tptr[j])
|
||||
{
|
||||
ptr[i] = tptr[j+1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case QSE_SED_CMD_SUBSTITUTE:
|
||||
n = do_subst (sed, cmd);
|
||||
if (n <= -1) return QSE_NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (jumpto == NULL) jumpto = cmd + 1;
|
||||
|
@ -77,18 +77,16 @@ struct qse_sed_cmd_t
|
||||
QSE_SED_CMD_NEXT_APPEND = QSE_T('N'),
|
||||
|
||||
/* branch */
|
||||
QSE_SED_CMD_BRANCH = QSE_T('b'),
|
||||
QSE_SED_CMD_T = QSE_T('t'),
|
||||
QSE_SED_CMD_BRANCH = QSE_T('b'),
|
||||
QSE_SED_CMD_T = QSE_T('t'),
|
||||
|
||||
QSE_SED_CMD_READ_FILE = QSE_T('r'),
|
||||
QSE_SED_CMD_READ_FILELN = QSE_T('R'),
|
||||
QSE_SED_CMD_WRITE_FILE = QSE_T('w'),
|
||||
QSE_SED_CMD_WRITE_FILELN = QSE_T('W'),
|
||||
QSE_SED_CMD_READ_FILE = QSE_T('r'),
|
||||
QSE_SED_CMD_READ_FILELN = QSE_T('R'),
|
||||
QSE_SED_CMD_WRITE_FILE = QSE_T('w'),
|
||||
QSE_SED_CMD_WRITE_FILELN = QSE_T('W'),
|
||||
|
||||
/* s/regex/str/ - replace matching pattern with a new string */
|
||||
QSE_SED_CMD_S = QSE_T('s'),
|
||||
/* y/s/d/ - translate characters in s to characters in d */
|
||||
QSE_SED_CMD_Y = QSE_T('y')
|
||||
QSE_SED_CMD_SUBSTITUTE = QSE_T('s'),
|
||||
QSE_SED_CMD_TRANSLATE = QSE_T('y')
|
||||
|
||||
} type;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user