fixed a bug in AWK and enhanced sed

- awk: fixed a bug of not handling ^ in gsub(), split(), and FS.
- sed: added code for y and s command
This commit is contained in:
hyung-hwan 2009-05-16 07:31:43 +00:00
parent b36f20a4a2
commit 164b3d9a98
12 changed files with 433 additions and 126 deletions

View File

@ -1,5 +1,5 @@
/* /*
* $Id: rex.h 127 2009-05-07 13:15:04Z hyunghwan.chung $ * $Id: rex.h 135 2009-05-15 13:31:43Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
@ -76,24 +76,84 @@ enum qse_rex_errnum_t
QSE_REX_EEND, /* unexpected end of the pattern */ QSE_REX_EEND, /* unexpected end of the pattern */
QSE_REX_EGARBAGE /* garbage after the pattern */ QSE_REX_EGARBAGE /* garbage after the pattern */
}; };
typedef enum qse_rex_errnum_t qse_rex_errnum_t;
typedef struct qse_rex_t qse_rex_t;
struct qse_rex_t
{
QSE_DEFINE_COMMON_FIELDS (rex)
qse_rex_errnum_t errnum;
int option;
struct
{
int build;
int match;
} depth;
void* code;
};
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
QSE_DEFINE_COMMON_FUNCTIONS (rex)
qse_rex_t* qse_rex_open (
qse_mmgr_t* mmgr,
qse_size_t xtn
);
void qse_rex_close (
qse_rex_t* rex
);
int qse_rex_build (
qse_rex_t* rex,
const qse_char_t* ptn,
qse_size_t len
);
int qse_rex_match (
qse_rex_t* rex,
const qse_char_t* str,
qse_size_t len,
const qse_char_t* substr,
qse_size_t sublen,
qse_cstr_t* match
);
void* qse_buildrex ( void* qse_buildrex (
qse_mmgr_t* mmgr, qse_size_t depth, qse_mmgr_t* mmgr,
const qse_char_t* ptn, qse_size_t len, int* errnum); qse_size_t depth,
const qse_char_t* ptn,
qse_size_t len,
qse_rex_errnum_t* errnum
);
int qse_matchrex ( int qse_matchrex (
qse_mmgr_t* mmgr, qse_size_t depth, qse_mmgr_t* mmgr,
void* code, int option, qse_size_t depth,
const qse_char_t* str, qse_size_t len, void* code,
const qse_char_t** match_ptr, qse_size_t* match_len, int* errnum); int option,
const qse_char_t* str,
qse_size_t len,
const qse_char_t* substr,
qse_size_t sublen,
qse_cstr_t* match,
qse_rex_errnum_t* errnum
);
void qse_freerex (qse_mmgr_t* mmgr, void* code); void qse_freerex (
qse_mmgr_t* mmgr,
void* code
);
qse_bool_t qse_isemptyrex (void* code); qse_bool_t qse_isemptyrex (
void* code
);
#if 0 #if 0
void qse_dprintrex (qse_rex_t* rex, void* rex); void qse_dprintrex (qse_rex_t* rex, void* rex);

View File

@ -198,6 +198,7 @@ struct qse_sed_t
{ {
qse_lda_t appended; qse_lda_t appended;
qse_str_t held; qse_str_t held;
qse_str_t subst;
} text; } text;
}; };

View File

@ -1,5 +1,5 @@
/* /*
* $Id: awk.h 127 2009-05-07 13:15:04Z hyunghwan.chung $ * $Id: awk.h 135 2009-05-15 13:31:43Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
@ -379,7 +379,7 @@ struct qse_awk_rtx_t
#define QSE_AWK_ISEMPTYREX(awk,code) qse_isemptyrex(code) #define QSE_AWK_ISEMPTYREX(awk,code) qse_isemptyrex(code)
#define QSE_AWK_BUILDREX(awk,ptn,len,errnum) \ #define QSE_AWK_BUILDREX(awk,ptn,len,errnum) \
qse_awk_buildrex(awk,ptn,len,errnum) qse_awk_buildrex(awk,ptn,len,errnum)
#define QSE_AWK_MATCHREX(awk,code,option,str,len,match_ptr,match_len,errnum) \ #define QSE_AWK_MATCHREX(awk,code,option,str,len,substr,sublen,match,errnum) \
qse_awk_matchrex(awk,code,option,str,len,match_ptr,match_len,errnum) qse_awk_matchrex(awk,code,option,str,len,substr,sublen,match,errnum)
#endif #endif

View File

@ -1,5 +1,5 @@
/* /*
* $Id: fnc.c 90 2009-03-01 09:58:19Z hyunghwan.chung $ * $Id: fnc.c 135 2009-05-15 13:31:43Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
@ -583,7 +583,7 @@ static int fnc_split (
qse_size_t nargs; qse_size_t nargs;
qse_awk_val_t* a0, * a1, * a2, * t1, * t2, ** a1_ref; qse_awk_val_t* a0, * a1, * a2, * t1, * t2, ** a1_ref;
qse_char_t* str, * str_free, * p, * tok; qse_char_t* str, * str_free, * p, * tok;
qse_size_t str_len, str_left, tok_len; qse_size_t str_len, str_left, tok_len, org_len;
qse_long_t num; qse_long_t num;
qse_char_t key[QSE_SIZEOF(qse_long_t)*8+2]; qse_char_t key[QSE_SIZEOF(qse_long_t)*8+2];
qse_size_t key_len; qse_size_t key_len;
@ -719,7 +719,6 @@ static int fnc_split (
QSE_AWK_FREE (run->awk, fs_free); QSE_AWK_FREE (run->awk, fs_free);
if (fs_rex_free != QSE_NULL) if (fs_rex_free != QSE_NULL)
QSE_AWK_FREEREX (run->awk, fs_rex_free); QSE_AWK_FREEREX (run->awk, fs_rex_free);
/*qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM);*/
return -1; return -1;
} }
@ -727,7 +726,7 @@ static int fnc_split (
*a1_ref = t1; *a1_ref = t1;
qse_awk_rtx_refupval (run, *a1_ref); qse_awk_rtx_refupval (run, *a1_ref);
p = str; str_left = str_len; p = str; str_left = str_len; org_len = str_len;
num = 1; num = 1;
while (p != QSE_NULL) while (p != QSE_NULL)
@ -739,8 +738,10 @@ static int fnc_split (
} }
else else
{ {
p = qse_awk_rtx_strxntokbyrex (run, p, str_len, p = qse_awk_rtx_strxntokbyrex (
fs_rex, &tok, &tok_len, &errnum); run, str, org_len, p, str_len,
fs_rex, &tok, &tok_len, &errnum
);
if (p == QSE_NULL && errnum != QSE_AWK_ENOERR) if (p == QSE_NULL && errnum != QSE_AWK_ENOERR)
{ {
if (str_free != QSE_NULL) if (str_free != QSE_NULL)
@ -919,8 +920,9 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
qse_char_t* a2_ptr_free = QSE_NULL; qse_char_t* a2_ptr_free = QSE_NULL;
void* rex = QSE_NULL; void* rex = QSE_NULL;
int opt, n; int opt, n;
const qse_char_t* cur_ptr, * mat_ptr; qse_cstr_t mat;
qse_size_t cur_len, mat_len, i, m; const qse_char_t* cur_ptr;
qse_size_t cur_len, i, m;
qse_str_t new; qse_str_t new;
qse_long_t sub_count; qse_long_t sub_count;
@ -1064,8 +1066,10 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
if (max_count == 0 || sub_count < max_count) if (max_count == 0 || sub_count < max_count)
{ {
n = QSE_AWK_MATCHREX ( n = QSE_AWK_MATCHREX (
run->awk, rex, opt, cur_ptr, cur_len, run->awk, rex, opt,
&mat_ptr, &mat_len, &run->errnum); a2_ptr, a2_len,
cur_ptr, cur_len,
&mat, &run->errnum);
} }
else n = 0; else n = 0;
@ -1092,7 +1096,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
} }
if (qse_str_ncat ( if (qse_str_ncat (
&new, cur_ptr, mat_ptr - cur_ptr) == (qse_size_t)-1) &new, cur_ptr, mat.ptr - cur_ptr) == (qse_size_t)-1)
{ {
FREE_A0_REX (run->awk, rex); FREE_A0_REX (run->awk, rex);
qse_str_fini (&new); qse_str_fini (&new);
@ -1111,7 +1115,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
} }
else if (a1_ptr[i] == QSE_T('&')) else if (a1_ptr[i] == QSE_T('&'))
{ {
m = qse_str_ncat (&new, mat_ptr, mat_len); m = qse_str_ncat (&new, mat.ptr, mat.len);
} }
else else
{ {
@ -1128,8 +1132,8 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
} }
sub_count++; sub_count++;
cur_len = cur_len - ((mat_ptr - cur_ptr) + mat_len); cur_len = cur_len - ((mat.ptr - cur_ptr) + mat.len);
cur_ptr = mat_ptr + mat_len; cur_ptr = mat.ptr + mat.len;
} }
FREE_A0_REX (run->awk, rex); FREE_A0_REX (run->awk, rex);
@ -1218,8 +1222,7 @@ static int fnc_match (
qse_long_t idx; qse_long_t idx;
void* rex; void* rex;
int opt, n; int opt, n;
const qse_char_t* mat_ptr; qse_cstr_t mat;
qse_size_t mat_len;
nargs = qse_awk_rtx_getnargs (run); nargs = qse_awk_rtx_getnargs (run);
QSE_ASSERT (nargs == 2); QSE_ASSERT (nargs == 2);
@ -1273,15 +1276,17 @@ static int fnc_match (
opt = (run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0; opt = (run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0;
n = QSE_AWK_MATCHREX ( n = QSE_AWK_MATCHREX (
run->awk, rex, opt, str0, len0, run->awk, rex, opt,
&mat_ptr, &mat_len, &run->errnum); str0, len0, str0, len0,
&mat, &run->errnum
);
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str0); if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str0);
if (a1->type != QSE_AWK_VAL_REX) QSE_AWK_FREEREX (run->awk, rex); if (a1->type != QSE_AWK_VAL_REX) QSE_AWK_FREEREX (run->awk, rex);
if (n == -1) return -1; if (n == -1) return -1;
idx = (n == 0)? 0: ((qse_long_t)(mat_ptr-str0) + 1); idx = (n == 0)? 0: ((qse_long_t)(mat.ptr-str0) + 1);
a0 = qse_awk_rtx_makeintval (run, idx); a0 = qse_awk_rtx_makeintval (run, idx);
if (a0 == QSE_NULL) if (a0 == QSE_NULL)
@ -1293,7 +1298,7 @@ static int fnc_match (
qse_awk_rtx_refupval (run, a0); qse_awk_rtx_refupval (run, a0);
a1 = qse_awk_rtx_makeintval (run, a1 = qse_awk_rtx_makeintval (run,
((n == 0)? (qse_long_t)-1: (qse_long_t)mat_len)); ((n == 0)? (qse_long_t)-1: (qse_long_t)mat.len));
if (a1 == QSE_NULL) if (a1 == QSE_NULL)
{ {
qse_awk_rtx_refdownval (run, a0); qse_awk_rtx_refdownval (run, a0);

View File

@ -1,5 +1,5 @@
/* /*
* $Id: misc.c 127 2009-05-07 13:15:04Z hyunghwan.chung $ * $Id: misc.c 135 2009-05-15 13:31:43Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
@ -831,24 +831,24 @@ exit_loop:
} }
qse_char_t* qse_awk_rtx_strxntokbyrex ( qse_char_t* qse_awk_rtx_strxntokbyrex (
qse_awk_rtx_t* rtx, const qse_char_t* s, qse_size_t len, qse_awk_rtx_t* rtx,
const qse_char_t* str, qse_size_t len,
const qse_char_t* substr, qse_size_t sublen,
void* rex, qse_char_t** tok, qse_size_t* tok_len, int* errnum) void* rex, qse_char_t** tok, qse_size_t* tok_len, int* errnum)
{ {
int n; int n;
qse_char_t* match_ptr; qse_size_t i, left = sublen;
qse_size_t match_len, i; const qse_char_t* ptr = substr;
qse_size_t left = len; const qse_char_t* str_ptr = substr;
const qse_char_t* ptr = s; qse_size_t str_len = sublen;
const qse_char_t* str_ptr = s; qse_cstr_t match;
qse_size_t str_len = len;
while (len > 0) while (sublen > 0)
{ {
n = QSE_AWK_MATCHREX ( n = QSE_AWK_MATCHREX (
rtx->awk, rex, rtx->awk, rex,
((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0), ((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
ptr, left, (const qse_char_t**)&match_ptr, &match_len, str, len, ptr, left, &match, errnum);
errnum);
if (n == -1) return QSE_NULL; if (n == -1) return QSE_NULL;
if (n == 0) if (n == 0)
{ {
@ -862,7 +862,7 @@ qse_char_t* qse_awk_rtx_strxntokbyrex (
QSE_ASSERT (n == 1); QSE_ASSERT (n == 1);
if (match_len == 0) if (match.len == 0)
{ {
ptr++; ptr++;
left--; left--;
@ -870,28 +870,28 @@ qse_char_t* qse_awk_rtx_strxntokbyrex (
else if (rtx->awk->option & QSE_AWK_STRIPSPACES) else if (rtx->awk->option & QSE_AWK_STRIPSPACES)
{ {
/* match at the beginning of the input string */ /* match at the beginning of the input string */
if (match_ptr == s) if (match.ptr == substr)
{ {
for (i = 0; i < match_len; i++) for (i = 0; i < match.len; i++)
{ {
if (!QSE_AWK_ISSPACE(rtx->awk, match_ptr[i])) if (!QSE_AWK_ISSPACE(rtx->awk, match.ptr[i]))
goto exit_loop; goto exit_loop;
} }
/* the match that are all spaces at the /* the match that are all spaces at the
* beginning of the input string is skipped */ * beginning of the input string is skipped */
ptr += match_len; ptr += match.len;
left -= match_len; left -= match.len;
str_ptr = s + match_len; str_ptr = substr + match.len;
str_len -= match_len; str_len -= match.len;
} }
else break; else break;
} }
else break; else break;
} }
exit_loop: exit_loop:
if (len == 0) if (sublen == 0)
{ {
*tok = (qse_char_t*)str_ptr; *tok = (qse_char_t*)str_ptr;
*tok_len = str_len; *tok_len = str_len;
@ -900,14 +900,14 @@ exit_loop:
} }
*tok = (qse_char_t*)str_ptr; *tok = (qse_char_t*)str_ptr;
*tok_len = match_ptr - str_ptr; *tok_len = match.ptr - str_ptr;
for (i = 0; i < match_len; i++) for (i = 0; i < match.len; i++)
{ {
if (!QSE_AWK_ISSPACE(rtx->awk, match_ptr[i])) if (!QSE_AWK_ISSPACE(rtx->awk, match.ptr[i]))
{ {
*errnum = QSE_AWK_ENOERR; *errnum = QSE_AWK_ENOERR;
return match_ptr+match_len; return (qse_char_t*)match.ptr+match.len;
} }
} }
@ -915,13 +915,13 @@ exit_loop:
if (rtx->awk->option & QSE_AWK_STRIPSPACES) if (rtx->awk->option & QSE_AWK_STRIPSPACES)
{ {
return (match_ptr+match_len >= s+len)? return (match.ptr+match.len >= substr+sublen)?
QSE_NULL: (match_ptr+match_len); QSE_NULL: ((qse_char_t*)match.ptr+match.len);
} }
else else
{ {
return (match_ptr+match_len > s+len)? return (match.ptr+match.len > substr+sublen)?
QSE_NULL: (match_ptr+match_len); QSE_NULL: ((qse_char_t*)match.ptr+match.len);
} }
} }
@ -944,7 +944,7 @@ exit_loop:
void* qse_awk_buildrex ( void* qse_awk_buildrex (
qse_awk_t* awk, const qse_char_t* ptn, qse_size_t len, int* errnum) qse_awk_t* awk, const qse_char_t* ptn, qse_size_t len, int* errnum)
{ {
int err; qse_rex_errnum_t err;
void* p; void* p;
p = qse_buildrex ( p = qse_buildrex (
@ -956,13 +956,15 @@ void* qse_awk_buildrex (
int qse_awk_matchrex ( int qse_awk_matchrex (
qse_awk_t* awk, void* code, int option, qse_awk_t* awk, void* code, int option,
const qse_char_t* str, qse_size_t len, const qse_char_t* str, qse_size_t len,
const qse_char_t** match_ptr, qse_size_t* match_len, int* errnum) const qse_char_t* substr, qse_size_t sublen,
qse_cstr_t* match, int* errnum)
{ {
int err, x; int x;
qse_rex_errnum_t err;
x = qse_matchrex ( x = qse_matchrex (
awk->mmgr, awk->rex.depth.max.match, awk->mmgr, awk->rex.depth.max.match,
code, option, str, len, match_ptr, match_len, &err); code, option, str, len, substr, sublen, match, &err);
if (x < 0) *errnum = QSE_AWK_REXERRTOERR(err); if (x < 0) *errnum = QSE_AWK_REXERRTOERR(err);
return x; return x;
} }

View File

@ -1,5 +1,5 @@
/* /*
* $Id: misc.h 75 2009-02-22 14:10:34Z hyunghwan.chung $ * $Id: misc.h 135 2009-05-15 13:31:43Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
@ -42,8 +42,16 @@ qse_char_t* qse_awk_rtx_strxntok (
qse_char_t** tok, qse_size_t* tok_len); qse_char_t** tok, qse_size_t* tok_len);
qse_char_t* qse_awk_rtx_strxntokbyrex ( qse_char_t* qse_awk_rtx_strxntokbyrex (
qse_awk_rtx_t* rtx, const qse_char_t* s, qse_size_t len, qse_awk_rtx_t* rtx,
void* rex, qse_char_t** tok, qse_size_t* tok_len, int* errnum); const qse_char_t* str,
qse_size_t len,
const qse_char_t* substr,
qse_size_t sublen,
void* rex,
qse_char_t** tok,
qse_size_t* tok_len,
int* errnum
);
void* qse_awk_buildrex ( void* qse_awk_buildrex (
@ -52,7 +60,8 @@ void* qse_awk_buildrex (
int qse_awk_matchrex ( int qse_awk_matchrex (
qse_awk_t* awk, void* code, int option, qse_awk_t* awk, void* code, int option,
const qse_char_t* str, qse_size_t len, const qse_char_t* str, qse_size_t len,
const qse_char_t** match_ptr, qse_size_t* match_len, int* errnum); const qse_char_t* substr, qse_size_t sublen,
qse_cstr_t* match, int* errnum);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -1,5 +1,5 @@
/* /*
* $Id: rec.c 89 2009-02-28 15:27:03Z hyunghwan.chung $ * $Id: rec.c 135 2009-05-15 13:31:43Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
@ -146,8 +146,13 @@ static int split_record (qse_awk_rtx_t* run)
} }
else else
{ {
p = qse_awk_rtx_strxntokbyrex (run, p, len, p = qse_awk_rtx_strxntokbyrex (
run->gbl.fs, &tok, &tok_len, &errnum); run,
QSE_STR_PTR(&run->inrec.line),
QSE_STR_LEN(&run->inrec.line),
p, len,
run->gbl.fs, &tok, &tok_len, &errnum
);
if (p == QSE_NULL && errnum != QSE_AWK_ENOERR) if (p == QSE_NULL && errnum != QSE_AWK_ENOERR)
{ {
if (fs_free != QSE_NULL) if (fs_free != QSE_NULL)
@ -203,8 +208,13 @@ static int split_record (qse_awk_rtx_t* run)
} }
else else
{ {
p = qse_awk_rtx_strxntokbyrex (run, p, len, p = qse_awk_rtx_strxntokbyrex (
run->gbl.fs, &tok, &tok_len, &errnum); run,
QSE_STR_PTR(&run->inrec.line),
QSE_STR_LEN(&run->inrec.line),
p, len,
run->gbl.fs, &tok, &tok_len, &errnum
);
if (p == QSE_NULL && errnum != QSE_AWK_ENOERR) if (p == QSE_NULL && errnum != QSE_AWK_ENOERR)
{ {
if (fs_free != QSE_NULL) if (fs_free != QSE_NULL)

View File

@ -1,5 +1,5 @@
/* /*
* $Id: rio.c 90 2009-03-01 09:58:19Z hyunghwan.chung $ * $Id: rio.c 135 2009-05-15 13:31:43Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
@ -269,8 +269,7 @@ int qse_awk_rtx_readio (
* the buffer has been appened with the last character * the buffer has been appened with the last character
* after the previous matchrex has failed */ * after the previous matchrex has failed */
const qse_char_t* match_ptr; qse_cstr_t match;
qse_size_t match_len;
QSE_ASSERT (run->gbl.rs != QSE_NULL); QSE_ASSERT (run->gbl.rs != QSE_NULL);
@ -278,7 +277,8 @@ int qse_awk_rtx_readio (
run->awk, run->gbl.rs, run->awk, run->gbl.rs,
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0), ((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
QSE_STR_PTR(buf), QSE_STR_LEN(buf), QSE_STR_PTR(buf), QSE_STR_LEN(buf),
&match_ptr, &match_len, &run->errnum); QSE_STR_PTR(buf), QSE_STR_LEN(buf),
&match, &run->errnum);
if (n == -1) if (n == -1)
{ {
ret = -1; ret = -1;
@ -291,9 +291,9 @@ int qse_awk_rtx_readio (
* the current buffer */ * the current buffer */
QSE_ASSERT ( QSE_ASSERT (
QSE_STR_PTR(buf) + QSE_STR_LEN(buf) == QSE_STR_PTR(buf) + QSE_STR_LEN(buf) ==
match_ptr + match_len); match.ptr + match.len);
QSE_STR_LEN(buf) -= match_len; QSE_STR_LEN(buf) -= match.len;
break; break;
} }
} }
@ -357,8 +357,7 @@ int qse_awk_rtx_readio (
} }
else else
{ {
const qse_char_t* match_ptr; qse_cstr_t match;
qse_size_t match_len;
QSE_ASSERT (run->gbl.rs != QSE_NULL); QSE_ASSERT (run->gbl.rs != QSE_NULL);
@ -366,7 +365,8 @@ int qse_awk_rtx_readio (
run->awk, run->gbl.rs, run->awk, run->gbl.rs,
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0), ((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
QSE_STR_PTR(buf), QSE_STR_LEN(buf), QSE_STR_PTR(buf), QSE_STR_LEN(buf),
&match_ptr, &match_len, &run->errnum); QSE_STR_PTR(buf), QSE_STR_LEN(buf),
&match, &run->errnum);
if (n == -1) if (n == -1)
{ {
ret = -1; ret = -1;
@ -380,9 +380,9 @@ int qse_awk_rtx_readio (
* the current buffer */ * the current buffer */
QSE_ASSERT ( QSE_ASSERT (
QSE_STR_PTR(buf) + QSE_STR_LEN(buf) == QSE_STR_PTR(buf) + QSE_STR_LEN(buf) ==
match_ptr + match_len); match.ptr + match.len);
QSE_STR_LEN(buf) -= match_len; QSE_STR_LEN(buf) -= match.len;
p->in.pos--; /* unread the character in c */ p->in.pos--; /* unread the character in c */
break; break;
} }

View File

@ -1,5 +1,5 @@
/* /*
* $Id: run.c 127 2009-05-07 13:15:04Z hyunghwan.chung $ * $Id: run.c 135 2009-05-15 13:31:43Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
@ -3099,7 +3099,9 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* run, qse_awk_nde_t* nde)
((((qse_awk_rtx_t*)run)->gbl.ignorecase)? QSE_REX_IGNORECASE: 0), ((((qse_awk_rtx_t*)run)->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
((qse_awk_val_str_t*)run->inrec.d0)->ptr, ((qse_awk_val_str_t*)run->inrec.d0)->ptr,
((qse_awk_val_str_t*)run->inrec.d0)->len, ((qse_awk_val_str_t*)run->inrec.d0)->len,
QSE_NULL, QSE_NULL, &errnum); ((qse_awk_val_str_t*)run->inrec.d0)->ptr,
((qse_awk_val_str_t*)run->inrec.d0)->len,
QSE_NULL, &errnum);
if (n == -1) if (n == -1)
{ {
@ -4773,7 +4775,9 @@ static qse_awk_val_t* eval_binop_match0 (
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0), ((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
((qse_awk_val_str_t*)left)->ptr, ((qse_awk_val_str_t*)left)->ptr,
((qse_awk_val_str_t*)left)->len, ((qse_awk_val_str_t*)left)->len,
QSE_NULL, QSE_NULL, &errnum); ((qse_awk_val_str_t*)left)->ptr,
((qse_awk_val_str_t*)left)->len,
QSE_NULL, &errnum);
if (n == -1) if (n == -1)
{ {
if (right->type != QSE_AWK_VAL_REX) if (right->type != QSE_AWK_VAL_REX)
@ -4810,7 +4814,8 @@ static qse_awk_val_t* eval_binop_match0 (
run->awk, rex_code, run->awk, rex_code,
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0), ((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
out.u.cpldup.ptr, out.u.cpldup.len, out.u.cpldup.ptr, out.u.cpldup.len,
QSE_NULL, QSE_NULL, &errnum); out.u.cpldup.ptr, out.u.cpldup.len,
QSE_NULL, &errnum);
if (n == -1) if (n == -1)
{ {
QSE_AWK_FREE (run->awk, out.u.cpldup.ptr); QSE_AWK_FREE (run->awk, out.u.cpldup.ptr);

View File

@ -1,5 +1,5 @@
/* /*
* $Id: rex.c 127 2009-05-07 13:15:04Z hyunghwan.chung $ * $Id: rex.c 135 2009-05-15 13:31:43Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
@ -107,7 +107,7 @@ struct builder_t
qse_size_t cur; qse_size_t cur;
} depth; } depth;
int errnum; qse_rex_errnum_t errnum;
}; };
struct matcher_t struct matcher_t
@ -121,6 +121,12 @@ struct matcher_t
const qse_char_t* ptr; const qse_char_t* ptr;
const qse_char_t* end; const qse_char_t* end;
} str; } str;
struct
{
const qse_char_t* ptr;
const qse_char_t* end;
} realstr;
} match; } match;
struct struct
@ -130,7 +136,7 @@ struct matcher_t
} depth; } depth;
int ignorecase; int ignorecase;
int errnum; qse_rex_errnum_t errnum;
}; };
struct match_t struct match_t
@ -331,9 +337,63 @@ static struct __char_class_t __char_class[] =
{ QSE_NULL, 0, QSE_NULL } { QSE_NULL, 0, QSE_NULL }
}; };
qse_rex_t* qse_rex_open (qse_mmgr_t* mmgr, qse_size_t xtn)
{
qse_rex_t* rex;
if (mmgr == QSE_NULL)
{
mmgr = QSE_MMGR_GETDFL();
QSE_ASSERTX (mmgr != QSE_NULL,
"Set the memory manager with QSE_MMGR_SETDFL()");
if (mmgr == QSE_NULL) return QSE_NULL;
}
rex = (qse_rex_t*) QSE_MMGR_ALLOC (mmgr, QSE_SIZEOF(qse_rex_t) + xtn);
if (rex == QSE_NULL) return QSE_NULL;
QSE_MEMSET (rex, 0, QSE_SIZEOF(*rex));
rex->mmgr = mmgr;
return rex;
}
void qse_rex_close (qse_rex_t* rex)
{
if (rex->code != QSE_NULL) qse_freerex (rex->mmgr, rex->code);
QSE_MMGR_FREE (rex->mmgr, rex);
}
int qse_rex_build (qse_rex_t* rex, const qse_char_t* ptn, qse_size_t len)
{
void* code;
code = qse_buildrex (
rex->mmgr, rex->depth.build,
ptn, len, &rex->errnum);
if (code == QSE_NULL) return -1;
if (rex->code != QSE_NULL) qse_freerex (rex->mmgr, rex->code);
rex->code = code;
return 0;
}
int qse_rex_match (
qse_rex_t* rex,
const qse_char_t* str, qse_size_t len,
const qse_char_t* substr, qse_size_t sublen, qse_cstr_t* match)
{
return qse_matchrex (
rex->mmgr, rex->depth.match, rex->code, rex->option,
str, len, substr, sublen, match, &rex->errnum);
}
void* qse_buildrex ( void* qse_buildrex (
qse_mmgr_t* mmgr, qse_size_t depth, qse_mmgr_t* mmgr, qse_size_t depth,
const qse_char_t* ptn, qse_size_t len, int* errnum) const qse_char_t* ptn, qse_size_t len, qse_rex_errnum_t* errnum)
{ {
builder_t builder; builder_t builder;
@ -399,7 +459,8 @@ int qse_matchrex (
qse_mmgr_t* mmgr, qse_size_t depth, qse_mmgr_t* mmgr, qse_size_t depth,
void* code, int option, void* code, int option,
const qse_char_t* str, qse_size_t len, const qse_char_t* str, qse_size_t len,
const qse_char_t** match_ptr, qse_size_t* match_len, int* errnum) const qse_char_t* substr, qse_size_t sublen,
qse_cstr_t* match, qse_rex_errnum_t* errnum)
{ {
matcher_t matcher; matcher_t matcher;
match_t mat; match_t mat;
@ -409,8 +470,11 @@ int qse_matchrex (
matcher.mmgr = mmgr; matcher.mmgr = mmgr;
/* store the source string */ /* store the source string */
matcher.match.str.ptr = str; matcher.match.str.ptr = substr;
matcher.match.str.end = str + len; matcher.match.str.end = substr + sublen;
matcher.match.realstr.ptr = str;
matcher.match.realstr.end = str + len;
matcher.depth.max = depth; matcher.depth.max = depth;
matcher.depth.cur = 0; matcher.depth.cur = 0;
@ -418,7 +482,7 @@ int qse_matchrex (
mat.matched = QSE_FALSE; mat.matched = QSE_FALSE;
/* TODO: should it allow an offset here??? */ /* TODO: should it allow an offset here??? */
mat.match_ptr = str + offset; mat.match_ptr = substr + offset;
/*while (mat.match_ptr < matcher.match.str.end)*/ /*while (mat.match_ptr < matcher.match.str.end)*/
while (mat.match_ptr <= matcher.match.str.end) while (mat.match_ptr <= matcher.match.str.end)
@ -441,8 +505,11 @@ int qse_matchrex (
} }
*/ */
if (match_ptr != QSE_NULL) *match_ptr = mat.match_ptr; if (match != QSE_NULL)
if (match_len != QSE_NULL) *match_len = mat.match_len; {
match->ptr = mat.match_ptr;
match->len = mat.match_len;
}
/*match_ptr_zero = QSE_NULL;*/ /*match_ptr_zero = QSE_NULL;*/
break; break;
@ -454,8 +521,11 @@ int qse_matchrex (
/* /*
if (match_ptr_zero != QSE_NULL) if (match_ptr_zero != QSE_NULL)
{ {
if (match_ptr != QSE_NULL) *match_ptr = match_ptr_zero; if (match != QSE_NULL)
if (match_len != QSE_NULL) *match_len = 0; {
match->ptr = match_ptr_zero;
match->len = 0;
}
return 1; return 1;
} }
*/ */
@ -1349,7 +1419,9 @@ static const qse_byte_t* match_bol (
cp = (const code_t*)p; p += QSE_SIZEOF(*cp); cp = (const code_t*)p; p += QSE_SIZEOF(*cp);
QSE_ASSERT (cp->cmd == CMD_BOL); QSE_ASSERT (cp->cmd == CMD_BOL);
mat->matched = (mat->match_ptr == matcher->match.str.ptr || /*mat->matched = (mat->match_ptr == matcher->match.str.ptr ||
(cp->lbound == cp->ubound && cp->lbound == 0));*/
mat->matched = (mat->match_ptr == matcher->match.realstr.ptr ||
(cp->lbound == cp->ubound && cp->lbound == 0)); (cp->lbound == cp->ubound && cp->lbound == 0));
mat->match_len = 0; mat->match_len = 0;
@ -1365,7 +1437,9 @@ static const qse_byte_t* match_eol (
cp = (const code_t*)p; p += QSE_SIZEOF(*cp); cp = (const code_t*)p; p += QSE_SIZEOF(*cp);
QSE_ASSERT (cp->cmd == CMD_EOL); QSE_ASSERT (cp->cmd == CMD_EOL);
mat->matched = (mat->match_ptr == matcher->match.str.end || /*mat->matched = (mat->match_ptr == matcher->match.str.end ||
(cp->lbound == cp->ubound && cp->lbound == 0));*/
mat->matched = (mat->match_ptr == matcher->match.realstr.end ||
(cp->lbound == cp->ubound && cp->lbound == 0)); (cp->lbound == cp->ubound && cp->lbound == 0));
mat->match_len = 0; mat->match_len = 0;

View File

@ -110,11 +110,22 @@ qse_sed_t* qse_sed_init (qse_sed_t* sed, qse_mmgr_t* mmgr)
} }
if (qse_str_init (&sed->text.subst, mmgr, 256) == QSE_NULL)
{
qse_str_fini (&sed->text.held);
qse_lda_fini (&sed->text.appended);
QSE_MMGR_FREE (sed->mmgr, sed->cmd.buf);
qse_map_fini (&sed->labs);
qse_str_fini (&sed->rexbuf);
return QSE_NULL;
}
return sed; return sed;
} }
void qse_sed_fini (qse_sed_t* sed) void qse_sed_fini (qse_sed_t* sed)
{ {
qse_str_fini (&sed->text.subst);
qse_str_fini (&sed->text.held); qse_str_fini (&sed->text.held);
qse_lda_fini (&sed->text.appended); qse_lda_fini (&sed->text.appended);
@ -232,7 +243,7 @@ static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
QSE_MMGR_FREE (sed->mmgr, cmd->u.branch.label.ptr); QSE_MMGR_FREE (sed->mmgr, cmd->u.branch.label.ptr);
break; break;
case QSE_SED_CMD_S: case QSE_SED_CMD_SUBSTITUTE:
if (cmd->u.subst.file.ptr != QSE_NULL) if (cmd->u.subst.file.ptr != QSE_NULL)
QSE_MMGR_FREE (sed->mmgr, cmd->u.subst.file.ptr); QSE_MMGR_FREE (sed->mmgr, cmd->u.subst.file.ptr);
if (cmd->u.subst.rpl.ptr != QSE_NULL) if (cmd->u.subst.rpl.ptr != QSE_NULL)
@ -241,7 +252,7 @@ static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
qse_freerex (sed->mmgr, cmd->u.subst.rex); qse_freerex (sed->mmgr, cmd->u.subst.rex);
break; break;
case QSE_SED_CMD_Y: case QSE_SED_CMD_TRANSLATE:
if (cmd->u.transet.ptr != QSE_NULL) if (cmd->u.transet.ptr != QSE_NULL)
QSE_MMGR_FREE (sed->mmgr, cmd->u.transet.ptr); QSE_MMGR_FREE (sed->mmgr, cmd->u.transet.ptr);
break; break;
@ -1691,6 +1702,110 @@ static int write_str_to_file (
return 0; return 0;
} }
static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
qse_cstr_t mat;
int opt = 0;
qse_rex_errnum_t errnum;
const qse_char_t* cur_ptr, * str_ptr;
qse_size_t cur_len, str_len, m, i;
qse_size_t max_count, sub_count;
QSE_ASSERT (cmd->type == QSE_SED_CMD_SUBSTITUTE);
qse_str_clear (&sed->text.subst);
if (cmd->u.subst.i) opt = QSE_REX_IGNORECASE;
str_ptr = QSE_STR_PTR(&sed->eio.in.line);
str_len = QSE_STR_LEN(&sed->eio.in.line);
/* TODO: support different line end scheme */
if (str_len > 0 && str_ptr[str_len-1] == QSE_T('\n')) str_len--;
cur_ptr = str_ptr;
cur_len = str_len;
sub_count = 0;
max_count = (cmd->u.subst.g)? 0: cmd->u.subst.occ;
while (1)
{
int n;
if (max_count == 0 || sub_count < max_count)
{
/* TODO: maximum match depth... */
n = qse_matchrex (
sed->mmgr, 0, cmd->u.subst.rex, opt,
str_ptr, str_len,
cur_ptr, cur_len,
&mat, &errnum
);
}
else n = 0;
if (n == -1)
{
sed->errnum = QSE_SED_EREXMA;
return -1;
}
if (n == 0)
{
/* no more match found */
if (qse_str_ncat (
&sed->text.subst,
cur_ptr, cur_len) == (qse_size_t)-1)
{
sed->errnum = QSE_SED_EREXMA;
return -1;
}
break;
}
m = qse_str_ncat (&sed->text.subst, cur_ptr, mat.ptr-cur_ptr);
if (m == (qse_size_t)-1)
{
sed->errnum = QSE_SED_EREXMA;
return -1;
}
for (i = 0; i < cmd->u.subst.rpl.len; i++)
{
if ((i+1) < cmd->u.subst.rpl.len &&
cmd->u.subst.rpl.ptr[i] == QSE_T('\\') &&
cmd->u.subst.rpl.ptr[i+1] == QSE_T('&'))
{
m = qse_str_ccat (&sed->text.subst, QSE_T('&'));
i++;
}
else if (cmd->u.subst.rpl.ptr[i] == QSE_T('&'))
{
m = qse_str_ncat (
&sed->text.subst, mat.ptr, mat.len);
}
else
{
m = qse_str_ccat (
&sed->text.subst, cmd->u.subst.rpl.ptr[i]);
}
if (m == (qse_size_t)-1)
{
sed->errnum = QSE_SED_EREXMA;
return -1;
}
}
sub_count++;
cur_len = cur_len - ((mat.ptr - cur_ptr) + mat.len);
cur_ptr = mat.ptr + mat.len;
}
qse_str_swap (&sed->eio.in.line, &sed->text.subst);
return 0;
}
static int match_a (qse_sed_t* sed, qse_sed_a_t* a) static int match_a (qse_sed_t* sed, qse_sed_a_t* a)
{ {
switch (a->type) switch (a->type)
@ -1700,10 +1815,11 @@ static int match_a (qse_sed_t* sed, qse_sed_a_t* a)
case QSE_SED_A_REX: case QSE_SED_A_REX:
{ {
qse_str_t match; int n;
int errnum, n; qse_cstr_t match;
qse_str_t* line; qse_str_t* line;
qse_size_t llen; qse_size_t llen;
qse_rex_errnum_t errnum;
QSE_ASSERT (a->u.rex != QSE_NULL); QSE_ASSERT (a->u.rex != QSE_NULL);
@ -1715,13 +1831,10 @@ static int match_a (qse_sed_t* sed, qse_sed_a_t* a)
QSE_STR_CHAR(line,llen-1) == QSE_T('\n')) llen--; QSE_STR_CHAR(line,llen-1) == QSE_T('\n')) llen--;
n = qse_matchrex ( n = qse_matchrex (
sed->mmgr, sed->mmgr, 0, a->u.rex, 0,
0, QSE_STR_PTR(line), llen,
a->u.rex, QSE_STR_PTR(line), llen,
0, &match, &errnum);
QSE_STR_PTR(line),
llen,
&match.ptr, &match.len, &errnum);
if (n <= -1) if (n <= -1)
{ {
sed->errnum = QSE_SED_EREXMA; sed->errnum = QSE_SED_EREXMA;
@ -2047,8 +2160,7 @@ static qse_sed_cmd_t* exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd)
case QSE_SED_CMD_WRITE_FILELN: case QSE_SED_CMD_WRITE_FILELN:
{ {
const qse_char_t* ptr = QSE_STR_PTR(&sed->eio.in.line); const qse_char_t* ptr = QSE_STR_PTR(&sed->eio.in.line);
const qse_char_t* len = QSE_STR_LEN(&sed->eio.in.line); qse_size_t i, len = QSE_STR_LEN(&sed->eio.in.line);
qse_size_t i;
for (i = 0; i < len; i++) for (i = 0; i < len; i++)
{ {
/* TODO: handle different line end scheme */ /* TODO: handle different line end scheme */
@ -2089,6 +2201,37 @@ static qse_sed_cmd_t* exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd)
jumpto = cmd->u.branch.target; jumpto = cmd->u.branch.target;
break; break;
case QSE_SED_CMD_TRANSLATE:
{
qse_char_t* ptr = QSE_STR_PTR(&sed->eio.in.line);
qse_size_t i, len = QSE_STR_LEN(&sed->eio.in.line);
/* TODO: sort cmd->u.transset and do binary search
* when sorted, you can, before binary search, check if ptr[i] < transet[0] || ptr[i] > transset[transset_size-1]. if so, it has not mathing translation */
/* TODO: support different line end scheme */
if (len > 0 && ptr[len-1] == QSE_T('\n')) len--;
for (i = 0; i < len; i++)
{
const qse_char_t* tptr = cmd->u.transet.ptr;
qse_size_t j, tlen = cmd->u.transet.len;
for (j = 0; j < tlen; j += 2)
{
if (ptr[i] == tptr[j])
{
ptr[i] = tptr[j+1];
break;
}
}
}
break;
}
case QSE_SED_CMD_SUBSTITUTE:
n = do_subst (sed, cmd);
if (n <= -1) return QSE_NULL;
break;
} }
if (jumpto == NULL) jumpto = cmd + 1; if (jumpto == NULL) jumpto = cmd + 1;

View File

@ -77,18 +77,16 @@ struct qse_sed_cmd_t
QSE_SED_CMD_NEXT_APPEND = QSE_T('N'), QSE_SED_CMD_NEXT_APPEND = QSE_T('N'),
/* branch */ /* branch */
QSE_SED_CMD_BRANCH = QSE_T('b'), QSE_SED_CMD_BRANCH = QSE_T('b'),
QSE_SED_CMD_T = QSE_T('t'), QSE_SED_CMD_T = QSE_T('t'),
QSE_SED_CMD_READ_FILE = QSE_T('r'), QSE_SED_CMD_READ_FILE = QSE_T('r'),
QSE_SED_CMD_READ_FILELN = QSE_T('R'), QSE_SED_CMD_READ_FILELN = QSE_T('R'),
QSE_SED_CMD_WRITE_FILE = QSE_T('w'), QSE_SED_CMD_WRITE_FILE = QSE_T('w'),
QSE_SED_CMD_WRITE_FILELN = QSE_T('W'), QSE_SED_CMD_WRITE_FILELN = QSE_T('W'),
/* s/regex/str/ - replace matching pattern with a new string */ QSE_SED_CMD_SUBSTITUTE = QSE_T('s'),
QSE_SED_CMD_S = QSE_T('s'), QSE_SED_CMD_TRANSLATE = QSE_T('y')
/* y/s/d/ - translate characters in s to characters in d */
QSE_SED_CMD_Y = QSE_T('y')
} type; } type;