From c45abff9abc9a3b3615a630b119b181c856abeea Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Mon, 21 Jan 2013 14:35:24 +0000 Subject: [PATCH] added str::normspc fixed a bug of mistaking a colon for a backquote in some contexts --- qse/doc/page/awk-lang.md | 79 ++++++++++++++++------------------------ qse/lib/awk/parse.c | 2 +- qse/mod/awk/dir.c | 6 ++- qse/mod/awk/str.c | 28 ++++++++++++-- qse/mod/awk/sys.c | 5 ++- qse/mod/awk/uci.c | 18 ++++++--- 6 files changed, 78 insertions(+), 60 deletions(-) diff --git a/qse/doc/page/awk-lang.md b/qse/doc/page/awk-lang.md index 7db3f742..8e540f73 100644 --- a/qse/doc/page/awk-lang.md +++ b/qse/doc/page/awk-lang.md @@ -126,7 +126,7 @@ and represents the value of 0. A string is enclosed in a pair of double quotes or single quotes. -A character in a string encosed in the double-quotes, when preceded with +A character in a string enclosed in the double-quotes, when preceded with a back-slash, changes the meaning. - \\a - alert @@ -149,16 +149,25 @@ In the octal sequence, you can specify up to 3 octal digits after \\; In the hexadecimal sequence, you can specify as many hexadecimal digits as possible after \\x. -If the number doesn't fit in the range that the default character type -can represent, the character generated from the sequence is undefined. +~~~~~{.awk} + BEGIN { + print "\xC720\xB2C8\xCF54\xB4DC \x7D71\x4E00\x78BC"; + } +~~~~~ -You can use \\u and \\U in a string to specify a character by a Unicode code -point if [Character Type](@ref installation) chosen for building is the -wide character type. +This program should print \em 유니코드 \em 統一碼 if the character type can +represent the numbers in the sequence. If the number doesn't fit in the range +that the current character type can represent, the character generated from +the sequence is undefined. - - \\uXXXX - X is a hexadecimal digit. - - \\UXXXXXXXX - X is a hexadecimal digit. +The \\u and \\U sequences, unlike ths \\x sequence, limits the maximum number of +hexadecimal digits. It is available if the [Character Type](@ref installation) +chosen for building is the wide character type. + - \\uXXXX - X is a hexadecimal digit. up to 4 digits + - \\UXXXXXXXX - X is a hexadecimal digit. up to 8 digits + +The program above can be rewritten like this. ~~~~~{.awk} BEGIN { @@ -166,16 +175,25 @@ wide character type. } ~~~~~ -This program should print 유니코드 統一碼. +If \\x, \\u, \\U are not followed by a hexadecimal digit, *x*, *u*, *U* are +produced respectively. -There are no escaping sequences supported for a string enclosed in the single +There are no special sequences supported for a string enclosed in the single quotes. For that reason, you can't specify the single quote itself within -a single-quoted string. +a single-quoted string. The following program prints *awk* in double quotes. + +~~~~~{.awk} + BEGIN { + print '"awk"'; + } +~~~~~ ### Regular Expressions ### -A regular expression is enclosed in a pair of forward slashes. +A regular expression is enclosed in a pair of forward slashes. The special +sequences for a double-quoted string are all supported in a regular expression. +TBD. ### Note ### @@ -189,42 +207,9 @@ Let's take this as an example. Since 0x not followed by a digit is a valid token, and T is an identifier, it is the same expression as 0x concatenated with T (0x @@ T). -An AWK program can be composed of the following elements shown below. -Each language element requires the option in the second column to be on. - - - - - - - - -
Element Option
Comment
Global variable declaration
Pattern-action block #QSE_AWK_PABLOCK
User-defined function
\@include #QSE_AWK_INCLUDE
- -Single line comments begin with the '#' letter and end at the end of the -same line. The C style multi-line comments are supported as well. -Comments are ignored. - -- pattern-action-block := pattern action-block -- pattern := BEGIN | END | expression | expression-range -- expression-range := expression , expression - -A pattern in a pattern action block can be omitted. -The action part can be omitted if the pattern is not BEGIN nor END. - -A pattern-action block, and a user-defined function can have the following elements. - - - - - - - - - - -
Element Option
Local variable declaration
Statement
getline #QSE_AWK_RIO
print #QSE_AWK_RIO
nextofile #QSE_AWK_NEXTOFILE
reset #QSE_AWK_RESET
abort #QSE_AWK_ABORT
+Commands +-------- AWK has the following statement constructs. - if diff --git a/qse/lib/awk/parse.c b/qse/lib/awk/parse.c index 214acf31..901d8fa8 100644 --- a/qse/lib/awk/parse.c +++ b/qse/lib/awk/parse.c @@ -3729,7 +3729,7 @@ static qse_awk_nde_t* parse_concat (qse_awk_t* awk, const qse_awk_loc_t* xloc) /* unary operators */ MATCH(awk,TOK_PLUS) || MATCH(awk,TOK_MINUS) || MATCH(awk,TOK_LNOT) || MATCH(awk,TOK_BNOT) || - MATCH(awk,TOK_COLON) || + MATCH(awk,TOK_BQUOTE) || /* increment operators */ MATCH(awk,TOK_PLUSPLUS) || MATCH(awk,TOK_MINUSMINUS) || ((awk->opt.trait & QSE_AWK_TOLERANT) && diff --git a/qse/mod/awk/dir.c b/qse/mod/awk/dir.c index 9fb16f65..37c47a61 100644 --- a/qse/mod/awk/dir.c +++ b/qse/mod/awk/dir.c @@ -281,7 +281,11 @@ static int read_byid (qse_awk_rtx_t* rtx, dir_list_t* list, qse_long_t id, qse_a if (!tmp || qse_awk_rtx_setrefval (rtx, ref, tmp) <= -1) { list->errnum = awk_err_to_errnum (qse_awk_rtx_geterrnum (rtx)); - if (tmp) qse_awk_rtx_freeval (rtx, tmp); + if (tmp) + { + qse_awk_rtx_refupval (rtx, tmp); + qse_awk_rtx_refdownval (rtx, tmp); + } return -1; } diff --git a/qse/mod/awk/str.c b/qse/mod/awk/str.c index ae7dcfc7..5a0246ff 100644 --- a/qse/mod/awk/str.c +++ b/qse/mod/awk/str.c @@ -22,6 +22,28 @@ #include #include "../../lib/cmn/mem.h" +static int fnc_normspc (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi) +{ + /* normalize spaces + * - trim leading and trailing spaces + * - replace a series of spaces to a single space + */ + qse_xstr_t path; + qse_awk_val_t* retv; + + path.ptr = qse_awk_rtx_valtostrdup ( + rtx, qse_awk_rtx_getarg(rtx, 0), &path.len); + if (path.ptr) + { + path.len = qse_strxpac (path.ptr, path.len); + retv = qse_awk_rtx_makestrval (rtx, path.ptr, path.len); + qse_awk_rtx_freemem (rtx, path.ptr); + if (retv) qse_awk_rtx_setretval (rtx, retv); + } + + return 0; +} + static int trim (qse_awk_rtx_t* rtx, int flags) { qse_xstr_t path; @@ -33,12 +55,9 @@ static int trim (qse_awk_rtx_t* rtx, int flags) if (path.ptr) { npath = qse_strxtrmx (path.ptr, &path.len, flags); - retv = qse_awk_rtx_makestrval (rtx, npath, path.len); qse_awk_rtx_freemem (rtx, path.ptr); - if (retv == QSE_NULL) return -1; - - qse_awk_rtx_setretval (rtx, retv); + if (retv) qse_awk_rtx_setretval (rtx, retv); } return 0; @@ -68,6 +87,7 @@ static fnctab_t fnctab[] = { /* keep this table sorted for binary search in query(). */ { QSE_T("ltrim"), { { 1, 1, QSE_NULL }, fnc_ltrim, 0 } }, + { QSE_T("normspc"), { { 1, 1, QSE_NULL }, fnc_normspc, 0 } }, { QSE_T("rtrim"), { { 1, 1, QSE_NULL }, fnc_rtrim, 0 } }, { QSE_T("trim"), { { 1, 1, QSE_NULL }, fnc_trim, 0 } } }; diff --git a/qse/mod/awk/sys.c b/qse/mod/awk/sys.c index 7bfdb1f7..8d4b9729 100644 --- a/qse/mod/awk/sys.c +++ b/qse/mod/awk/sys.c @@ -568,7 +568,10 @@ static int fnc_getnwifcfg (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi) if (tmp) { if (qse_awk_rtx_setrefval (rtx, qse_awk_rtx_getarg (rtx, 2), tmp) <= -1) - qse_awk_rtx_freeval (rtx, tmp); + { + qse_awk_rtx_refupval (rtx, tmp); + qse_awk_rtx_refdownval (rtx, tmp); + } else ret = 0; } } diff --git a/qse/mod/awk/uci.c b/qse/mod/awk/uci.c index f00017e6..49b1284a 100644 --- a/qse/mod/awk/uci.c +++ b/qse/mod/awk/uci.c @@ -473,7 +473,8 @@ static int getsection_byid ( { if (qse_awk_rtx_setrefval (rtx, ref, tmp) <= -1) { - qse_awk_rtx_freeval (rtx, tmp); + qse_awk_rtx_refupval (rtx, tmp); + qse_awk_rtx_refdownval (rtx, tmp); x = UCI_ERR_MEM; } } @@ -535,7 +536,8 @@ static int getoption_byid ( { if (qse_awk_rtx_setrefval (rtx, ref, map) <= -1) { - qse_awk_rtx_freeval (rtx, map); + qse_awk_rtx_refupval (rtx, map); + qse_awk_rtx_refdownval (rtx, map); map = QSE_NULL; x = UCI_ERR_MEM; } @@ -579,7 +581,8 @@ static int getoption_byid ( fld = qse_awk_rtx_makestrvalwithmbs (rtx, tmp->name); if (!fld) { - qse_awk_rtx_freeval (rtx, map); + qse_awk_rtx_refupval (rtx, map); + qse_awk_rtx_refdownval (rtx, map); map = QSE_NULL; x = UCI_ERR_MEM; break; @@ -600,8 +603,10 @@ static int getoption_byid ( if (kp == QSE_NULL || qse_awk_rtx_setmapvalfld (rtx, map, kp, kl, fld) == QSE_NULL) { if (kp) QSE_MMGR_FREE (qse_awk_rtx_getmmgr(rtx), kp); - qse_awk_rtx_freeval (rtx, fld); - qse_awk_rtx_freeval (rtx, map); + qse_awk_rtx_refupval (rtx, fld); + qse_awk_rtx_refdownval (rtx, fld); + qse_awk_rtx_refupval (rtx, map); + qse_awk_rtx_refdownval (rtx, map); map = QSE_NULL; x = UCI_ERR_MEM; break; @@ -615,7 +620,8 @@ static int getoption_byid ( { if (qse_awk_rtx_setrefval (rtx, ref, map) <= -1) { - qse_awk_rtx_freeval (rtx, map); + qse_awk_rtx_refupval (rtx, map); + qse_awk_rtx_refdownval (rtx, map); map = QSE_NULL; x = UCI_ERR_MEM; }