added str::normspc

fixed a bug of mistaking a colon for a backquote in some contexts
This commit is contained in:
hyung-hwan 2013-01-21 14:35:24 +00:00
parent d671443442
commit c45abff9ab
6 changed files with 78 additions and 60 deletions

View File

@ -126,7 +126,7 @@ and represents the value of 0.
A string is enclosed in a pair of double quotes or single quotes. A string is enclosed in a pair of double quotes or single quotes.
A character in a string encosed in the double-quotes, when preceded with A character in a string enclosed in the double-quotes, when preceded with
a back-slash, changes the meaning. a back-slash, changes the meaning.
- \\a - alert - \\a - alert
@ -149,16 +149,25 @@ In the octal sequence, you can specify up to 3 octal digits after \\; In the
hexadecimal sequence, you can specify as many hexadecimal digits as possible hexadecimal sequence, you can specify as many hexadecimal digits as possible
after \\x. after \\x.
If the number doesn't fit in the range that the default character type ~~~~~{.awk}
can represent, the character generated from the sequence is undefined. BEGIN {
print "\xC720\xB2C8\xCF54\xB4DC \x7D71\x4E00\x78BC";
}
~~~~~
You can use \\u and \\U in a string to specify a character by a Unicode code This program should print \em 유니코드 \em 統一碼 if the character type can
point if [Character Type](@ref installation) chosen for building is the represent the numbers in the sequence. If the number doesn't fit in the range
wide character type. that the current character type can represent, the character generated from
the sequence is undefined.
- \\uXXXX - X is a hexadecimal digit. The \\u and \\U sequences, unlike ths \\x sequence, limits the maximum number of
- \\UXXXXXXXX - X is a hexadecimal digit. hexadecimal digits. It is available if the [Character Type](@ref installation)
chosen for building is the wide character type.
- \\uXXXX - X is a hexadecimal digit. up to 4 digits
- \\UXXXXXXXX - X is a hexadecimal digit. up to 8 digits
The program above can be rewritten like this.
~~~~~{.awk} ~~~~~{.awk}
BEGIN { BEGIN {
@ -166,16 +175,25 @@ wide character type.
} }
~~~~~ ~~~~~
This program should print 유니코드 統一碼. If \\x, \\u, \\U are not followed by a hexadecimal digit, *x*, *u*, *U* are
produced respectively.
There are no escaping sequences supported for a string enclosed in the single There are no special sequences supported for a string enclosed in the single
quotes. For that reason, you can't specify the single quote itself within quotes. For that reason, you can't specify the single quote itself within
a single-quoted string. a single-quoted string. The following program prints *awk* in double quotes.
~~~~~{.awk}
BEGIN {
print '"awk"';
}
~~~~~
### Regular Expressions ### ### Regular Expressions ###
A regular expression is enclosed in a pair of forward slashes. A regular expression is enclosed in a pair of forward slashes. The special
sequences for a double-quoted string are all supported in a regular expression.
TBD.
### Note ### ### Note ###
@ -189,42 +207,9 @@ Let's take this as an example.
Since 0x not followed by a digit is a valid token, and T is an identifier, Since 0x not followed by a digit is a valid token, and T is an identifier,
it is the same expression as 0x concatenated with T (0x @@ T). it is the same expression as 0x concatenated with T (0x @@ T).
An AWK program can be composed of the following elements shown below.
Each language element requires the option in the second column to be on.
<table>
<tr><th>Element </th><th>Option </th></tr>
<tr><td>Comment </td><td> </td></tr>
<tr><td>Global variable declaration</td><td> </td></tr>
<tr><td>Pattern-action block </td><td>#QSE_AWK_PABLOCK </td></tr>
<tr><td>User-defined function </td><td> </td></tr>
<tr><td>\@include </td><td>#QSE_AWK_INCLUDE </td></tr>
</table>
Single line comments begin with the '#' letter and end at the end of the
same line. The C style multi-line comments are supported as well.
Comments are ignored.
- pattern-action-block := pattern action-block
- pattern := BEGIN | END | expression | expression-range
- expression-range := expression , expression
A pattern in a pattern action block can be omitted.
The action part can be omitted if the pattern is not BEGIN nor END.
A pattern-action block, and a user-defined function can have the following elements.
<table>
<tr><th>Element </th><th>Option </th></tr>
<tr><td>Local variable declaration</td><td> </td></tr>
<tr><td>Statement </td><td> </td></tr>
<tr><td>getline </td><td>#QSE_AWK_RIO </td></tr>
<tr><td>print </td><td>#QSE_AWK_RIO </td></tr>
<tr><td>nextofile </td><td>#QSE_AWK_NEXTOFILE </td></tr>
<tr><td>reset </td><td>#QSE_AWK_RESET </td></tr>
<tr><td>abort </td><td>#QSE_AWK_ABORT </td></tr>
</table>
Commands
--------
AWK has the following statement constructs. AWK has the following statement constructs.
- if - if

View File

@ -3729,7 +3729,7 @@ static qse_awk_nde_t* parse_concat (qse_awk_t* awk, const qse_awk_loc_t* xloc)
/* unary operators */ /* unary operators */
MATCH(awk,TOK_PLUS) || MATCH(awk,TOK_MINUS) || MATCH(awk,TOK_PLUS) || MATCH(awk,TOK_MINUS) ||
MATCH(awk,TOK_LNOT) || MATCH(awk,TOK_BNOT) || MATCH(awk,TOK_LNOT) || MATCH(awk,TOK_BNOT) ||
MATCH(awk,TOK_COLON) || MATCH(awk,TOK_BQUOTE) ||
/* increment operators */ /* increment operators */
MATCH(awk,TOK_PLUSPLUS) || MATCH(awk,TOK_MINUSMINUS) || MATCH(awk,TOK_PLUSPLUS) || MATCH(awk,TOK_MINUSMINUS) ||
((awk->opt.trait & QSE_AWK_TOLERANT) && ((awk->opt.trait & QSE_AWK_TOLERANT) &&

View File

@ -281,7 +281,11 @@ static int read_byid (qse_awk_rtx_t* rtx, dir_list_t* list, qse_long_t id, qse_a
if (!tmp || qse_awk_rtx_setrefval (rtx, ref, tmp) <= -1) if (!tmp || qse_awk_rtx_setrefval (rtx, ref, tmp) <= -1)
{ {
list->errnum = awk_err_to_errnum (qse_awk_rtx_geterrnum (rtx)); list->errnum = awk_err_to_errnum (qse_awk_rtx_geterrnum (rtx));
if (tmp) qse_awk_rtx_freeval (rtx, tmp); if (tmp)
{
qse_awk_rtx_refupval (rtx, tmp);
qse_awk_rtx_refdownval (rtx, tmp);
}
return -1; return -1;
} }

View File

@ -22,6 +22,28 @@
#include <qse/cmn/str.h> #include <qse/cmn/str.h>
#include "../../lib/cmn/mem.h" #include "../../lib/cmn/mem.h"
static int fnc_normspc (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
{
/* normalize spaces
* - trim leading and trailing spaces
* - replace a series of spaces to a single space
*/
qse_xstr_t path;
qse_awk_val_t* retv;
path.ptr = qse_awk_rtx_valtostrdup (
rtx, qse_awk_rtx_getarg(rtx, 0), &path.len);
if (path.ptr)
{
path.len = qse_strxpac (path.ptr, path.len);
retv = qse_awk_rtx_makestrval (rtx, path.ptr, path.len);
qse_awk_rtx_freemem (rtx, path.ptr);
if (retv) qse_awk_rtx_setretval (rtx, retv);
}
return 0;
}
static int trim (qse_awk_rtx_t* rtx, int flags) static int trim (qse_awk_rtx_t* rtx, int flags)
{ {
qse_xstr_t path; qse_xstr_t path;
@ -33,12 +55,9 @@ static int trim (qse_awk_rtx_t* rtx, int flags)
if (path.ptr) if (path.ptr)
{ {
npath = qse_strxtrmx (path.ptr, &path.len, flags); npath = qse_strxtrmx (path.ptr, &path.len, flags);
retv = qse_awk_rtx_makestrval (rtx, npath, path.len); retv = qse_awk_rtx_makestrval (rtx, npath, path.len);
qse_awk_rtx_freemem (rtx, path.ptr); qse_awk_rtx_freemem (rtx, path.ptr);
if (retv == QSE_NULL) return -1; if (retv) qse_awk_rtx_setretval (rtx, retv);
qse_awk_rtx_setretval (rtx, retv);
} }
return 0; return 0;
@ -68,6 +87,7 @@ static fnctab_t fnctab[] =
{ {
/* keep this table sorted for binary search in query(). */ /* keep this table sorted for binary search in query(). */
{ QSE_T("ltrim"), { { 1, 1, QSE_NULL }, fnc_ltrim, 0 } }, { QSE_T("ltrim"), { { 1, 1, QSE_NULL }, fnc_ltrim, 0 } },
{ QSE_T("normspc"), { { 1, 1, QSE_NULL }, fnc_normspc, 0 } },
{ QSE_T("rtrim"), { { 1, 1, QSE_NULL }, fnc_rtrim, 0 } }, { QSE_T("rtrim"), { { 1, 1, QSE_NULL }, fnc_rtrim, 0 } },
{ QSE_T("trim"), { { 1, 1, QSE_NULL }, fnc_trim, 0 } } { QSE_T("trim"), { { 1, 1, QSE_NULL }, fnc_trim, 0 } }
}; };

View File

@ -568,7 +568,10 @@ static int fnc_getnwifcfg (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
if (tmp) if (tmp)
{ {
if (qse_awk_rtx_setrefval (rtx, qse_awk_rtx_getarg (rtx, 2), tmp) <= -1) if (qse_awk_rtx_setrefval (rtx, qse_awk_rtx_getarg (rtx, 2), tmp) <= -1)
qse_awk_rtx_freeval (rtx, tmp); {
qse_awk_rtx_refupval (rtx, tmp);
qse_awk_rtx_refdownval (rtx, tmp);
}
else ret = 0; else ret = 0;
} }
} }

View File

@ -473,7 +473,8 @@ static int getsection_byid (
{ {
if (qse_awk_rtx_setrefval (rtx, ref, tmp) <= -1) if (qse_awk_rtx_setrefval (rtx, ref, tmp) <= -1)
{ {
qse_awk_rtx_freeval (rtx, tmp); qse_awk_rtx_refupval (rtx, tmp);
qse_awk_rtx_refdownval (rtx, tmp);
x = UCI_ERR_MEM; x = UCI_ERR_MEM;
} }
} }
@ -535,7 +536,8 @@ static int getoption_byid (
{ {
if (qse_awk_rtx_setrefval (rtx, ref, map) <= -1) if (qse_awk_rtx_setrefval (rtx, ref, map) <= -1)
{ {
qse_awk_rtx_freeval (rtx, map); qse_awk_rtx_refupval (rtx, map);
qse_awk_rtx_refdownval (rtx, map);
map = QSE_NULL; map = QSE_NULL;
x = UCI_ERR_MEM; x = UCI_ERR_MEM;
} }
@ -579,7 +581,8 @@ static int getoption_byid (
fld = qse_awk_rtx_makestrvalwithmbs (rtx, tmp->name); fld = qse_awk_rtx_makestrvalwithmbs (rtx, tmp->name);
if (!fld) if (!fld)
{ {
qse_awk_rtx_freeval (rtx, map); qse_awk_rtx_refupval (rtx, map);
qse_awk_rtx_refdownval (rtx, map);
map = QSE_NULL; map = QSE_NULL;
x = UCI_ERR_MEM; x = UCI_ERR_MEM;
break; break;
@ -600,8 +603,10 @@ static int getoption_byid (
if (kp == QSE_NULL || qse_awk_rtx_setmapvalfld (rtx, map, kp, kl, fld) == QSE_NULL) if (kp == QSE_NULL || qse_awk_rtx_setmapvalfld (rtx, map, kp, kl, fld) == QSE_NULL)
{ {
if (kp) QSE_MMGR_FREE (qse_awk_rtx_getmmgr(rtx), kp); if (kp) QSE_MMGR_FREE (qse_awk_rtx_getmmgr(rtx), kp);
qse_awk_rtx_freeval (rtx, fld); qse_awk_rtx_refupval (rtx, fld);
qse_awk_rtx_freeval (rtx, map); qse_awk_rtx_refdownval (rtx, fld);
qse_awk_rtx_refupval (rtx, map);
qse_awk_rtx_refdownval (rtx, map);
map = QSE_NULL; map = QSE_NULL;
x = UCI_ERR_MEM; x = UCI_ERR_MEM;
break; break;
@ -615,7 +620,8 @@ static int getoption_byid (
{ {
if (qse_awk_rtx_setrefval (rtx, ref, map) <= -1) if (qse_awk_rtx_setrefval (rtx, ref, map) <= -1)
{ {
qse_awk_rtx_freeval (rtx, map); qse_awk_rtx_refupval (rtx, map);
qse_awk_rtx_refdownval (rtx, map);
map = QSE_NULL; map = QSE_NULL;
x = UCI_ERR_MEM; x = UCI_ERR_MEM;
} }