revised some documentation
This commit is contained in:
parent
7f64932d19
commit
e957f7a1a1
@ -1,5 +1,5 @@
|
||||
QSEAWK Language {#awk-lang}
|
||||
===============
|
||||
================================================================================
|
||||
|
||||
QSEAWK implements the language described in the
|
||||
[The AWK Programming Language][awkbook] with extensions.
|
||||
@ -63,14 +63,24 @@ represents the value of 0.
|
||||
|
||||
A string is enclosed in a pair of double quotes or single quotes.
|
||||
|
||||
A character in a string encosed in the double-quotes can be preceeded with
|
||||
a back-slash to change the meaning of the character.
|
||||
A character in a string encosed in the double-quotes, when preceded with
|
||||
a back-slash, changes the meaning.
|
||||
|
||||
\\
|
||||
\a
|
||||
\b
|
||||
\uXXXX
|
||||
\UXXXXXXXX
|
||||
- \\
|
||||
- \a
|
||||
- \b
|
||||
- \uXXXX
|
||||
- \UXXXXXXXX
|
||||
|
||||
You can use \\u and \\U in a string to specify a character by unicode if
|
||||
[Character Type](@ref installation) chosen for building is the wide character
|
||||
type.
|
||||
|
||||
BEGIN {
|
||||
print "\uC720\uB2C8\uCF54\uB4DC \U00007D71\U00004E00\U000078BC";
|
||||
}
|
||||
|
||||
This program should print 유니코드 統一碼.
|
||||
|
||||
There are no escaping sequences supported for a string enclosed in the single
|
||||
quotes. For that reason, you can't specify the single quote itself within
|
||||
@ -99,7 +109,7 @@ Each language element requires the option in the second column to be on.
|
||||
<table>
|
||||
<tr><th>Element </th><th>Option </th></tr>
|
||||
<tr><td>Comment </td><td> </td></tr>
|
||||
<tr><td>Global variable declaration</td><td>#QSE_AWK_EXPLICIT </td></tr>
|
||||
<tr><td>Global variable declaration</td><td> </td></tr>
|
||||
<tr><td>Pattern-action block </td><td>#QSE_AWK_PABLOCK </td></tr>
|
||||
<tr><td>User-defined function </td><td> </td></tr>
|
||||
<tr><td>\@include </td><td>#QSE_AWK_INCLUDE </td></tr>
|
||||
@ -120,7 +130,7 @@ A pattern-action block, and a user-defined function can have the following eleme
|
||||
|
||||
<table>
|
||||
<tr><th>Element </th><th>Option </th></tr>
|
||||
<tr><td>Local variable declaration</td><td>#QSE_AWK_EXPLICIT </td></tr>
|
||||
<tr><td>Local variable declaration</td><td> </td></tr>
|
||||
<tr><td>Statement </td><td> </td></tr>
|
||||
<tr><td>getline </td><td>#QSE_AWK_RIO </td></tr>
|
||||
<tr><td>print </td><td>#QSE_AWK_RIO </td></tr>
|
||||
@ -218,116 +228,74 @@ BEGIN {
|
||||
The !== operator is a negated form of the === operator.
|
||||
|
||||
|
||||
@subsection awk_ext_vardecl VARIABLE DECLARATION
|
||||
### Variable Declaration ###
|
||||
|
||||
#QSE_AWK_EXPLICIT enables variable declaration. Variables declared are accessed
|
||||
directly bypassing the global named map that stores undeclared variables.
|
||||
The keyword @b global introduces a global variable and the keyword @b local
|
||||
introduces local variable. Local variable declaraion in a block must be
|
||||
located before an expression or a statement appears.
|
||||
|
||||
@code
|
||||
global g1, g2; #declares two global variables g1 and g2
|
||||
Variables declared are accessed directly bypassing the global named map
|
||||
that stores undeclared variables. The keyword \@global introduces a global
|
||||
variable and the keyword \@local introduces local variable. Local variable
|
||||
declaraion in a block must be located before an expression or a statement
|
||||
appears.
|
||||
|
||||
@global g1, g2; #declares two global variables g1 and g2
|
||||
BEGIN {
|
||||
local a1, a2, a3; # declares three local variables
|
||||
|
||||
@local a1, a2, a3; # declares three local variables
|
||||
g1 = 300; a1 = 200;
|
||||
|
||||
{
|
||||
local a1; # a1 here hides the a1 at the outer scope
|
||||
local g1; # g1 here hides the global g1
|
||||
@local a1; # a1 here hides the a1 at the outer scope
|
||||
@local g1; # g1 here hides the global g1
|
||||
a1 = 10; g1 = 5;
|
||||
print a1, g1; # it prints 10 and 5
|
||||
}
|
||||
|
||||
print a1, g1; # it prints 200 and 300
|
||||
}
|
||||
|
||||
@endcode
|
||||
|
||||
However, turning on #QSE_AWK_EXPLICIT does not disable named variables.
|
||||
To disable named variables, you must turn off #QSE_AWK_IMPLICIT.
|
||||
|
||||
@subsection awk_ext_include INCLUDE
|
||||
### \@include ###
|
||||
|
||||
The \@include directive inserts the contents of the object specified in the
|
||||
following string, typically a file name, as if they appeared in the source
|
||||
stream being processed. The directive can only be used at the outmost scope
|
||||
where global variable declarations, @b BEGIN, @b END, and/or pattern-action
|
||||
blocks appear. To use \@include, you must turn on #QSE_AWK_INCLUDE.
|
||||
where global variable declarations, *BEGIN*, *END*, and/or pattern-action
|
||||
blocks appear.
|
||||
|
||||
@code
|
||||
@include "abc.awk"
|
||||
BEGIN { func_in_abc (); }
|
||||
@endcode
|
||||
|
||||
A semicolon is optional after the included file name. The following is the
|
||||
same as the sample above.
|
||||
@code
|
||||
|
||||
@include "abc.awk";
|
||||
BEGIN { func_in_abc(); }
|
||||
@endcode
|
||||
|
||||
If #QSE_AWK_NEWLINE is off, the semicolon is required.
|
||||
|
||||
|
||||
@subsection awk_ext_funcall FUNCTIONC CALL
|
||||
|
||||
### Function Call ###
|
||||
|
||||
name(1);
|
||||
|
||||
if there is no space between 'name' and the left parenthesis, the
|
||||
name is treated as a function name.
|
||||
|
||||
name (1);
|
||||
|
||||
If there is a space, the name is treated as a function name if the
|
||||
name has been declared as the function or if #QSE_AWK_IMPLICIT is on,
|
||||
it may be 'name' concatenated with the expression in the parentheses.
|
||||
|
||||
The following is a valid program.
|
||||
@code
|
||||
@pragma implicit off
|
||||
|
||||
BEGIN { name (1); }
|
||||
function name(a) { print a; }'
|
||||
@endcode
|
||||
|
||||
However, in this program, the first 'name' becomes a named global variable.
|
||||
so the function declaration with 'name' triggers the variable redefinition
|
||||
error.
|
||||
@pragma implicit on
|
||||
|
||||
BEGIN { name (1); }
|
||||
function name(a) { print a; }'
|
||||
@endcode
|
||||
|
||||
@subsection awk_ext_print EXTENDED PRINT/PRINTF
|
||||
When #QSE_AWK_TOLERANT is on, print and printf are treated as if
|
||||
they are function calls. In this mode, they return a negative number
|
||||
on failure and a zero on success and any I/O failure doesn't abort
|
||||
a running program.
|
||||
|
||||
@code
|
||||
BEGIN {
|
||||
a = print "hello, world" > "/dev/null";
|
||||
print a;
|
||||
a = print ("hello, world") > "/dev/null";
|
||||
print a;
|
||||
}
|
||||
@endcode
|
||||
|
||||
Since print and printf are like function calls, you can use them
|
||||
in any context where a normal expression is allowed. For example,
|
||||
printf is used as a conditional expression in an 'if' statement
|
||||
in the sample code below.
|
||||
@code
|
||||
BEGIN {
|
||||
if ((printf "hello, world\n" || "tcp://127.0.0.1:9999") <= -1)
|
||||
print "FAILURE";
|
||||
else
|
||||
print "SUCCESS";
|
||||
}
|
||||
@endcode
|
||||
|
||||
@subsection awk_ext_exprgroup GROUPED EXPRESSION
|
||||
### GROUPED EXPRESSION ###
|
||||
When #QSE_AWK_TOLERANT is on, you can use a grouped expression without
|
||||
the 'in' operator. A grouped expression is a parentheses-enclosed list
|
||||
of expressions separated with a comma. Each expression in the group is
|
||||
@ -344,12 +312,315 @@ BEGIN {
|
||||
}
|
||||
@endcode
|
||||
|
||||
@subsection awk_ext_rwpipe TWO-WAY PIPE
|
||||
|
||||
The two-way pipe indicated by @b || is supproted, in addition to the one-way
|
||||
pipe indicated by @b |. Turn on #QSE_AWK_RWPIPE to enable the two-way pipe.
|
||||
### RETURN ###
|
||||
The return statement is valid in pattern-action blocks as well as in functions.
|
||||
The execution of a calling block is aborted once the return statement is executed.
|
||||
|
||||
@code
|
||||
$ qseawk 'BEGIN { return 20; }' ; echo $?
|
||||
20
|
||||
#endcode
|
||||
|
||||
If #QSE_AWK_MAPTOVAR is on, you can return an arrayed value from a function.
|
||||
@code
|
||||
function getarray() {
|
||||
@local a;
|
||||
a["one"] = 1;
|
||||
a["two"] = 2;
|
||||
a["three"] = 3;
|
||||
return a;
|
||||
}
|
||||
|
||||
BEGIN {
|
||||
@local x;
|
||||
x = getarray();
|
||||
for (i in x) print i, x[i];
|
||||
}
|
||||
@endcode
|
||||
|
||||
|
||||
### RESET ###
|
||||
The reset statement resets an array variable back to the initial state.
|
||||
After that, the array variable can also be used as a scalar variable again.
|
||||
You must have #QSE_AWK_RESET on to be able to be able to use this
|
||||
statement.
|
||||
|
||||
@code
|
||||
BEGIN {
|
||||
a[1] = 20;
|
||||
reset a;
|
||||
a = 20; # this is legal
|
||||
print a;
|
||||
}
|
||||
@endcode
|
||||
|
||||
### ABORT ###
|
||||
The abort statment is similar to the exit statement except that
|
||||
it skips executing the END block. You must have #QSE_AWK_ABORT on to be
|
||||
able to use this statement.
|
||||
|
||||
@code
|
||||
BEGIN {
|
||||
print "--- BEGIN ---";
|
||||
abort 10;
|
||||
}
|
||||
END {
|
||||
print "--- END ---"; # this must not be printed
|
||||
}
|
||||
@endcode
|
||||
|
||||
### EXTENDED FUNCTIONS ###
|
||||
index() and match() can accept the third parameter indicating the position
|
||||
where the search begins. A negative value indicates a position from the back.
|
||||
|
||||
@code
|
||||
BEGIN {
|
||||
xstr = "abcdefabcdefabcdef";
|
||||
xsub = "abc";
|
||||
xlen = length(xsub);
|
||||
|
||||
i = 1;
|
||||
while ((i = index(xstr, xsub, i)) > 0)
|
||||
{
|
||||
print i, substr(xstr, i, xlen);
|
||||
i += xlen;
|
||||
}
|
||||
}
|
||||
@endcode
|
||||
|
||||
### EXTENDED FS ###
|
||||
|
||||
If the value for FS begins with a question mark followed by 4
|
||||
additional letters, QSEAWK can split a record with quoted fields
|
||||
delimited by a single-letter separator.
|
||||
|
||||
The 4 additional letters are composed of a field separator,
|
||||
an escaper, a opening quote, and a closing quote.
|
||||
|
||||
@code
|
||||
$ cat x.awk
|
||||
BEGIN { FS="?:\\[]"; }
|
||||
{
|
||||
for (i = 1; i <= NF; i++)
|
||||
print "$" i ": " $i;
|
||||
print "---------------";
|
||||
}
|
||||
@endcode
|
||||
|
||||
The value of FS above means the following.
|
||||
- : is a field separator.
|
||||
- a backslash is an escaper.
|
||||
- a left bracket is an opening quote.
|
||||
- a right bracket is a closing quote.
|
||||
|
||||
See the following output.
|
||||
@code
|
||||
$ cat x.dat
|
||||
[fx1]:[fx2]:[f\[x\]3]
|
||||
abc:def:[a b c]
|
||||
$ qseawk -f x.awk x.dat
|
||||
$1: fx1
|
||||
$2: fx2
|
||||
$3: f[x]3
|
||||
---------------
|
||||
$1: abc
|
||||
$2: def
|
||||
$3: a b c
|
||||
---------------
|
||||
@endcode
|
||||
|
||||
|
||||
## Built-in I/O ##
|
||||
|
||||
QSEAWK comes with built-in I/O commands and functions in addition to the
|
||||
implicit input streams for pattern-action blocks. The built-in I/O facility
|
||||
is available only if QSEAWK is set with #QSE_AWK_RIO.
|
||||
|
||||
### getline ###
|
||||
|
||||
The *getline* command has multiple forms of usage. It can be used with or
|
||||
without a variable name and can also be associated with a pipe or a file
|
||||
redirection. The default association is the console when no pipe and file
|
||||
redirection is specified. In principle, it reads a record from the associated
|
||||
input stream and updates $0 or a variable with the record. If it managed to
|
||||
perform this successfully, it return 1; it if detected EOF, it returns 0; it
|
||||
return -1 on failure.
|
||||
|
||||
*getline* without a following variable reads a record from an associated
|
||||
input stream, updates $0 with the value and increments *FNR*, *NR*. Updating
|
||||
$0 also causes changes in *NF* and fields from $1 to $NF.
|
||||
|
||||
The sample below reads records from the console and prints them.
|
||||
|
||||
BEGIN {
|
||||
while (getline > 0) print $0;
|
||||
}
|
||||
|
||||
It is equivalent to
|
||||
|
||||
{ print $0 }
|
||||
|
||||
but performs the task in the *BEGIN* block.
|
||||
|
||||
*getline* with a variable reads a record from an associated input stream
|
||||
and updates the variable with the value. It updates *FNR* and *NR*, too.
|
||||
|
||||
BEGIN {
|
||||
while (getline line > 0) print line;
|
||||
}
|
||||
|
||||
You can change the stream association to a pipe or a file. If *getline* or
|
||||
*getline variable* is followed by a input redirection operator(<) and
|
||||
an expression, the evaluation result of the expression becomes the name of
|
||||
the file to read records from. The file is opened at the first occurrence
|
||||
and can be closed with the *close* function.
|
||||
|
||||
BEGIN {
|
||||
filename = "/etc/passwd";
|
||||
while ((getline line < filename) > 0) print line;
|
||||
close (filename);
|
||||
}
|
||||
|
||||
When *getline* or *getline variable* is preceded with an expression and a pipe
|
||||
operator(|), the evaluation result of the expression becomes the name of
|
||||
the external command to execute. The command is executed at the first occurrence
|
||||
and can be terminated with the *close* function. The example below reads
|
||||
the output of the *ls -laF* command and prints it to the console.
|
||||
|
||||
BEGIN {
|
||||
procname = "ls -laF";
|
||||
while ((procname | getline line) > 0) print line;
|
||||
close (procname);
|
||||
}
|
||||
|
||||
The two-way pipe operator(||) can also be used to read records from an
|
||||
external command. There is no visible chanages to the end-user in case
|
||||
of the example above if you switch the operator.
|
||||
|
||||
BEGIN {
|
||||
procname = "ls -laF";
|
||||
while ((procname || getline line) > 0) print line;
|
||||
close (procname);
|
||||
}
|
||||
|
||||
The *getline* command acts like a function in that it returns a value.
|
||||
But you can't place an empty parentheses when no variable name is specified
|
||||
nor can you parenthesize the optional variable name. For example, *getline(a)*
|
||||
is different from *getline a* and means the concatenation of the return value
|
||||
of *getline* and the variable *a*. Besides, it is not clear if
|
||||
|
||||
getline a < b
|
||||
|
||||
is
|
||||
|
||||
(getline a) < b
|
||||
|
||||
or
|
||||
|
||||
(getline) (a < b)
|
||||
|
||||
For this reason, you are advised to parenthesize *getline* and its related
|
||||
components to avoid confusion whenever necessary. The example reading into
|
||||
the variable *line* can be made clearer with parenthesization.
|
||||
|
||||
BEGIN {
|
||||
while ((getline line) > 0) print line;
|
||||
}
|
||||
|
||||
### print ###
|
||||
**TODO**
|
||||
|
||||
### printf ###
|
||||
|
||||
When #QSE_AWK_TOLERANT is on, print and printf are treated as if
|
||||
they are function calls. In this mode, they return a negative number
|
||||
on failure and a zero on success and any I/O failure doesn't abort
|
||||
a running program.
|
||||
|
||||
BEGIN {
|
||||
a = print "hello, world" > "/dev/null";
|
||||
print a;
|
||||
a = print ("hello, world") > "/dev/null";
|
||||
print a;
|
||||
}
|
||||
|
||||
Since print and printf are like function calls, you can use them
|
||||
in any context where a normal expression is allowed. For example,
|
||||
printf is used as a conditional expression in an 'if' statement
|
||||
in the sample code below.
|
||||
|
||||
BEGIN {
|
||||
if ((printf "hello, world\n" || "tcp://127.0.0.1:9999") <= -1)
|
||||
print "FAILURE";
|
||||
else
|
||||
print "SUCCESS";
|
||||
}
|
||||
|
||||
### close (io-name, what) ###
|
||||
|
||||
The *close* function closes a stream indicated by the name *io-name*. It takes
|
||||
an optional parameter *what* indicating whether input or output should be
|
||||
closed.
|
||||
|
||||
If *io-name* is a file, it closes the file handle associated;
|
||||
If *io-name* is a command, it may kill the running process from the command,
|
||||
reclaims other sytstem resources, and closes the pipe handles;
|
||||
If *io-name* is a network stream, it tears down connections to the network
|
||||
peer and closes the socket handles.
|
||||
|
||||
The optional paramenter *what* must be one of *r* or *w* when used is useful
|
||||
when *io-name* is a command invoked for the two-way operator. The value of
|
||||
*r* causes the function to close the read-end of the pipe and the value of
|
||||
*w* causes the function to close the write-end of the pipe.
|
||||
|
||||
The function returns 0 on success and -1 on failure.
|
||||
|
||||
### setioattr (io-name, attr-name, attr-value) ###
|
||||
|
||||
The *setioattr* function changes the I/O attribute of the name *attr-name* to
|
||||
the value *attr-value* for a stream identified by *io-name*. It returns 0 on
|
||||
success and -1 on failure.
|
||||
|
||||
- *io-name* is a source or target name used in *getline*, *print*, *printf*
|
||||
combined with |, ||, >, <, >>.
|
||||
- *attr-name* is one of *codepage*, *ctimeout*, *atimeout*, *rtimeout*,
|
||||
*wtimeout*.
|
||||
- *attr-value* varies depending on *attr-name*.
|
||||
+ codepage: *cp949*, *cp950*, *utf8*
|
||||
+ ctimeout, atimeout, rtimeout, wtimeout: the number of seconds. effective
|
||||
on socket based streams only. you may use a floating-point number for
|
||||
lower resoluation than a second. a negative value turns off timeout.
|
||||
|
||||
See this sample that prints the contents of a document encoded in cp949.
|
||||
|
||||
BEGIN {
|
||||
setioattr ("README.TXT", "codepage", "cp949");
|
||||
while ((getline x < "README.TXT") > 0) print x;
|
||||
}
|
||||
|
||||
### getioattr (io-name, attr-name, attr-value) ###
|
||||
|
||||
The getioattr() function retrieves the current attribute value of the attribute
|
||||
named *attr-name* for the stream identified by *io-name*. The value retrieved
|
||||
is set to the variable referenced by *attr-value*. See *setioattr* for
|
||||
description on *io-name* and *attr-name*. It returns 0 on success and -1 on
|
||||
failure.
|
||||
|
||||
BEGIN {
|
||||
setioattr ("README.TXT", "codepage", "cp949");
|
||||
if (getioattr ("README.TXT", "codepage", codepage) <= -1)
|
||||
print "codepage unknown";
|
||||
else print "codepage: " codepage;
|
||||
}
|
||||
|
||||
### Two-way Pipe ###
|
||||
|
||||
The two-way pipe is indicated by the two-way pipe operator(||) and QSEAWK
|
||||
must be set with #QSE_AWK_RWPIPE to be able to use the two-way pipe.
|
||||
|
||||
The example redirects the output of *print* to the external *sort* command
|
||||
and reads back the output.
|
||||
|
||||
BEGIN {
|
||||
print "15" || "sort";
|
||||
print "14" || "sort";
|
||||
@ -359,9 +630,8 @@ BEGIN {
|
||||
# close the input side of the pipe as 'sort' starts emitting result
|
||||
# once the input is closed.
|
||||
close ("sort", "r");
|
||||
while (("sort" || getline x) > 0) print "xx:", x;
|
||||
while (("sort" || getline x) > 0) print x;
|
||||
}
|
||||
@endcode
|
||||
|
||||
This two-way pipe can create a TCP or UDP connection if the pipe command
|
||||
string is prefixed with one of the followings:
|
||||
@ -371,7 +641,8 @@ string is prefixed with one of the followings:
|
||||
- tcpd:// - binds a TCP socket to a specified IP address/port and waits for the first connection.
|
||||
- udpd:// - binds a TCP socket to a specified IP address/port and waits for the first sender.
|
||||
|
||||
@code
|
||||
See this example.
|
||||
|
||||
BEGIN {
|
||||
# it binds a TCP socket to the IPv6 address :: and the port number
|
||||
# 9999 and waits for the first coming connection. It repeats writing
|
||||
@ -384,33 +655,23 @@ BEGIN {
|
||||
}
|
||||
while(1);
|
||||
}
|
||||
@endcode
|
||||
|
||||
You can specify TCP or UDP timeouts for connection, accepting, reading, and
|
||||
writing with setioattr (pipe-name, timeout-name, timeout-value). timeout-name
|
||||
should be one of "ctimeout", "atimeout", "rtimeout", and "wtimeout".
|
||||
timeout-value is a number specifying the actual timeout in milliseconds.
|
||||
A negative value indicates no timeout.
|
||||
|
||||
You can call getioattr (pipe-name, timeout-name) to get the current
|
||||
timeout-value set.
|
||||
You can manipulate TCP or UDP timeouts for connection, accepting, reading, and
|
||||
writing with the *setioattr* function and the *getioattr* function.
|
||||
|
||||
See the example below.
|
||||
|
||||
@code
|
||||
BEGIN {
|
||||
setioattr ("tcp://127.0.0.1:9999", "ctimeout", 3000);
|
||||
setioattr ("tcp://127.0.0.1:9999", "rtimeout", 5000);
|
||||
setioattr ("tcp://127.0.0.1:9999", "ctimeout", 3);
|
||||
setioattr ("tcp://127.0.0.1:9999", "rtimeout", 5.5);
|
||||
print "hello world" || "tcp://127.0.0.1:9999";
|
||||
"tcp://127.0.0.1:9999" || getline x;
|
||||
print x;
|
||||
}
|
||||
@endcode
|
||||
|
||||
Here is a more interesting example adopting Michael Sanders'
|
||||
AWK web server, modified for QSEAWK.
|
||||
Here is an interesting example adopting Michael Sanders' AWK web server,
|
||||
modified for QSEAWK.
|
||||
|
||||
@code
|
||||
#
|
||||
# Michael Sanders' AWK web server for QSEAWK.
|
||||
# Orginal code in http://awk.info/?tools/server
|
||||
@ -476,166 +737,21 @@ function RunApp(app) {
|
||||
if (app == "xload" ) {system("xload&"); return}
|
||||
if (app == "exit") {x = 0}
|
||||
}
|
||||
@endcode
|
||||
|
||||
@subsection awk_ext_return RETURN
|
||||
The return statement is valid in pattern-action blocks as well as in functions.
|
||||
The execution of a calling block is aborted once the return statement is executed.
|
||||
### I/O Character Encoding ###
|
||||
|
||||
@code
|
||||
$ qseawk 'BEGIN { return 20; }' ; echo $?
|
||||
20
|
||||
@endcode
|
||||
|
||||
If #QSE_AWK_MAPTOVAR is on, you can return an arrayed value from a function.
|
||||
@code
|
||||
function getarray() {
|
||||
local a;
|
||||
a["one"] = 1;
|
||||
a["two"] = 2;
|
||||
a["three"] = 3;
|
||||
return a;
|
||||
}
|
||||
|
||||
BEGIN {
|
||||
local x;
|
||||
|
||||
x = getarray();
|
||||
for (i in x) print i, x[i];
|
||||
}
|
||||
@endcode
|
||||
|
||||
@subsection awk_ext_reset RESET
|
||||
The reset statement resets an array variable back to the initial state.
|
||||
After that, the array variable can also be used as a scalar variable again.
|
||||
You must have #QSE_AWK_RESET on to be able to be able to use this
|
||||
statement.
|
||||
|
||||
@code
|
||||
BEGIN {
|
||||
a[1] = 20;
|
||||
reset a;
|
||||
a = 20; # this is legal
|
||||
print a;
|
||||
}
|
||||
@endcode
|
||||
|
||||
@subsection awk_ext_abort ABORT
|
||||
The abort statment is similar to the exit statement except that
|
||||
it skips executing the END block. You must have #QSE_AWK_ABORT on to be
|
||||
able to use this statement.
|
||||
|
||||
@code
|
||||
BEGIN {
|
||||
print "--- BEGIN ---";
|
||||
abort 10;
|
||||
}
|
||||
END {
|
||||
print "--- END ---"; # this must not be printed
|
||||
}
|
||||
@endcode
|
||||
|
||||
@subsection awk_ext_comment COMMENT
|
||||
You can use the C-style comment as well as the pound comment.
|
||||
|
||||
@subsection awk_ext_fnc EXTENDED FUNCTIONS
|
||||
index() and match() can accept the third parameter indicating the position
|
||||
where the search begins. A negative value indicates a position from the back.
|
||||
|
||||
@code
|
||||
BEGIN {
|
||||
xstr = "abcdefabcdefabcdef";
|
||||
xsub = "abc";
|
||||
xlen = length(xsub);
|
||||
|
||||
i = 1;
|
||||
while ((i = index(xstr, xsub, i)) > 0)
|
||||
{
|
||||
print i, substr(xstr, i, xlen);
|
||||
i += xlen;
|
||||
}
|
||||
}
|
||||
@endcode
|
||||
|
||||
@subsection awk_ext_fs EXTENDED FS
|
||||
|
||||
If the value for FS begins with a question mark followed by 4
|
||||
additional letters, QSEAWK can split a record with quoted fields
|
||||
delimited by a single-letter separator.
|
||||
|
||||
The 4 additional letters are composed of a field separator,
|
||||
an escaper, a opening quote, and a closing quote.
|
||||
|
||||
@code
|
||||
$ cat x.awk
|
||||
BEGIN { FS="?:\\[]"; }
|
||||
{
|
||||
for (i = 1; i <= NF; i++)
|
||||
print "$" i ": " $i;
|
||||
print "---------------";
|
||||
}
|
||||
@endcode
|
||||
|
||||
The value of FS above means the following.
|
||||
- : is a field separator.
|
||||
- a backslash is an escaper.
|
||||
- a left bracket is an opening quote.
|
||||
- a right bracket is a closing quote.
|
||||
|
||||
See the following output.
|
||||
@code
|
||||
$ cat x.dat
|
||||
[fx1]:[fx2]:[f\[x\]3]
|
||||
abc:def:[a b c]
|
||||
$ qseawk -f x.awk x.dat
|
||||
$1: fx1
|
||||
$2: fx2
|
||||
$3: f[x]3
|
||||
---------------
|
||||
$1: abc
|
||||
$2: def
|
||||
$3: a b c
|
||||
---------------
|
||||
@endcode
|
||||
|
||||
|
||||
|
||||
@subsection awk_ext_binnum BINARY NUMBER
|
||||
Use 0b to begin a binary number sequence.
|
||||
|
||||
@code
|
||||
$ qseawk 'BEGIN { printf ("%b %o %d %x\n", 0b1101, 0b1101, 0b1101, 0b1101); }'
|
||||
1101 15 13 d
|
||||
@endcode
|
||||
|
||||
|
||||
|
||||
@subsection awk_ext_unicode UNICODE ESCAPE SEQUENCE
|
||||
|
||||
If QSE is compiled for #QSE_CHAR_IS_WCHAR, you can use \\u and \\U in a
|
||||
string to specify a character by unicode.
|
||||
|
||||
@code
|
||||
$ qseawk 'BEGIN { print "\uC720\uB2C8\uCF54\uB4DC \U00007D71\U00004E00\U000078BC"; }'
|
||||
유니코드 統一碼
|
||||
@endcode
|
||||
|
||||
|
||||
@subsection awk_ext_ioenc I/O ENCODING
|
||||
You can call setioattr() to set the character encoding of a stream resource
|
||||
like a pipe or a file. See qse_findcmgr() for a list of supported encoding names.
|
||||
You can change the character encoding encoding of a stream. See qse_findcmgr()
|
||||
for a list of supported encoding names.
|
||||
|
||||
Let's say you run this simple echoing script on a WIN32 platform that has
|
||||
the active code page of 949 and is reachable at the IP address 192.168.2.8.
|
||||
|
||||
@code
|
||||
C:\> chcp
|
||||
Active code page: 949
|
||||
C:\> type s.awk
|
||||
BEGIN {
|
||||
sock = "tcpd://0.0.0.0:9999";
|
||||
setioattr (sock, "codepage", "cp949"); # this is not needed since the active
|
||||
# code page is already 949.
|
||||
setioattr (sock, "codepage", "cp949");
|
||||
do {
|
||||
if ((sock || getline x) <= 0) break;
|
||||
print "PEER: " x;
|
||||
@ -643,14 +759,12 @@ BEGIN {
|
||||
}
|
||||
while(1);
|
||||
}
|
||||
C:\> qseawk --rwpipe=on -f r.awk
|
||||
C:\> qseawk -f r.awk
|
||||
PEER: 안녕
|
||||
PEER: ?好!
|
||||
@endcode
|
||||
|
||||
Now you run the following script on a UTF-8 console of a Linux box.
|
||||
|
||||
@code
|
||||
$ echo $LANG
|
||||
en_US.UTF-8
|
||||
$ cat c.awk
|
||||
@ -672,94 +786,8 @@ $ qseawk --rwpipe=on -f c.awk
|
||||
PEER: 안녕
|
||||
> 你好!
|
||||
PEER: ?好!
|
||||
@endcode
|
||||
|
||||
Note that 你 has been converted to a question mark since the letter is
|
||||
not supported by cp949.
|
||||
|
||||
## Built-in I/O ##
|
||||
|
||||
QSEAWK comes with built-in I/O commands and functions in addition to the
|
||||
implicit input streams for pattern-action blocks. The built-in I/O facility
|
||||
is available only if QSEAWK is set with #QSE_AWK_RIO.
|
||||
|
||||
### getline ###
|
||||
|
||||
The *getline* command has multiple forms of usage. It can be used with or
|
||||
without a variable name and can also be associated with a pipe or a file
|
||||
redirection. Basically, it reads a record from an input stream associated
|
||||
and stores it.
|
||||
|
||||
*getline* without a following variable reads a record from an associated
|
||||
input stream and updates $0 with the value. It also updates *NF*, *FNR*, *NR*.
|
||||
The sample below reads records from the console and prints them.
|
||||
|
||||
BEGIN {
|
||||
while (getline > 0) print $0;
|
||||
}
|
||||
|
||||
It is equivalent to
|
||||
|
||||
{ print $0 }
|
||||
|
||||
but performs the task in the *BEGIN* block.
|
||||
|
||||
*getline* with a variable reads a record from an associated input stream
|
||||
and updates the variable with the value. It updates *FNR* and *NR*, too.
|
||||
|
||||
BEGIN {
|
||||
while (getline line > 0) print line;
|
||||
}
|
||||
|
||||
*getline* is associated with the console by default. you can change it
|
||||
to a file or a pipe by using |, ||, <.
|
||||
|
||||
The *getline* command acts like a function in that it returns a value: 1 on
|
||||
success, 0 on EOF, -1 on error. But you can't place an empty parentheses
|
||||
when no variable name is specified nor can you parenthesize the optional
|
||||
variable name. For example, *getline(a)* is different from *getline a* and
|
||||
means the concatenation of the return value of *getline* and the variable *a*.
|
||||
|
||||
### print ###
|
||||
|
||||
### printf ###
|
||||
|
||||
### setioattr (io-name, attr-name, attr-value) ###
|
||||
|
||||
The *setioattr* function changes the I/O attribute of the name *attr-name* to
|
||||
the value *attr-value* for a stream identified by *io-name*. It returns 0 on
|
||||
success and -1 on failure.
|
||||
|
||||
- *io-name* is a source or target name used in *getline*, *print*, *printf*
|
||||
combined with |, ||, >, <, >>.
|
||||
- *attr-name* is one of *codepage*, *ctimeout*, *atimeout*, *rtimeout*,
|
||||
*wtimeout*.
|
||||
- *attr-value* varies depending on *attr-name*.
|
||||
+ codepage: *cp949*, *cp950*, *utf8*
|
||||
+ ctimeout, atimeout, rtimeout, wtimeout: the number of seconds. effective
|
||||
on socket based streams only. you may use a floating-point number for
|
||||
lower resoluation than a second. a negative value turns off timeout.
|
||||
|
||||
See this sample that prints the contents of a document encoded in cp949.
|
||||
|
||||
BEGIN {
|
||||
setioattr ("README.TXT", "codepage", "cp949");
|
||||
while ((getline x < "README.TXT") > 0) print x;
|
||||
}
|
||||
|
||||
### getioattr (io-name, attr-name, attr-value) ###
|
||||
|
||||
The getioattr() function retrieves the current attribute value of the attribute
|
||||
named *attr-name* for the stream identified by *io-name*. The value retrieved
|
||||
is set to the variable referenced by *attr-value*. See *setioattr* for
|
||||
description on *io-name* and *attr-name*. It returns 0 on success and -1 on
|
||||
failure.
|
||||
|
||||
BEGIN {
|
||||
setioattr ("README.TXT", "codepage", "cp949");
|
||||
if (getioattr ("README.TXT", "codepage", codepage) <= -1)
|
||||
print "codepage unknown";
|
||||
else print "codepage: " codepage;
|
||||
}
|
||||
|
||||
[awkbook]: http://cm.bell-labs.com/cm/cs/awkbook/
|
||||
|
@ -1,7 +1,8 @@
|
||||
Installation {#installation}
|
||||
============
|
||||
================================================================================
|
||||
|
||||
## Source Package ##
|
||||
Source Package
|
||||
--------------
|
||||
|
||||
You can download the source package from
|
||||
|
||||
@ -14,7 +15,8 @@ repository by executing the following command:
|
||||
|
||||
svn checkout http://qse.googlecode.com/svn/trunk/qse/
|
||||
|
||||
## Building on Unix/Linux ##
|
||||
Building on Unix/Linux
|
||||
----------------------
|
||||
|
||||
The project uses the standard autoconf/automake generated script files for
|
||||
buildiing. If you work on the systems where these scripts can run, you can
|
||||
@ -26,7 +28,8 @@ follow the standard procedures of configuring and making the project.
|
||||
|
||||
You can use this method of building for MinGW or Cygwin on Windows.
|
||||
|
||||
## Cross-compiling for WIN32 ##
|
||||
Cross-compiling for WIN32
|
||||
-------------------------
|
||||
|
||||
While the autoconf/automake scripts may not support your native compilers,
|
||||
you can cross-compile it for WIN32/WIN64 with a cross-compiler. Get a
|
||||
@ -48,7 +51,8 @@ With MINGW-W64, you may run *configure* as shown below:
|
||||
The actual host and target names may vary depending on the cross-compiler
|
||||
installed.
|
||||
|
||||
## Native Makefiles ##
|
||||
Native Makefiles
|
||||
----------------
|
||||
|
||||
The project provides makefiles for some selected compilers and platforms.
|
||||
The makefiles were generated with bakefile (www.bakefile.org) and can be
|
||||
@ -66,7 +70,8 @@ the wide character type, you can execute this:
|
||||
cd bld\os2-watcom
|
||||
wmake BUILD=release CHAR=wchar
|
||||
|
||||
## Build Options ##
|
||||
Build Options
|
||||
-------------
|
||||
|
||||
The configure script and the native makefiles provides some options that you
|
||||
can use to change the build environment. The options presented here can be
|
||||
|
@ -1,5 +1,6 @@
|
||||
QSE {#mainpage}
|
||||
===================
|
||||
================================================================================
|
||||
|
||||
@image html qse-logo.png
|
||||
|
||||
The QSE library implements AWK, SED, and Unix commands in an embeddable form
|
||||
@ -19,7 +20,7 @@ Chung, Hyung-Hwan <hyunghwan.chung@gmail.com>
|
||||
|
||||
See the subpages for more information.
|
||||
|
||||
- @subpage installation
|
||||
- @ref installation
|
||||
- @subpage mem "Memory Management"
|
||||
- @subpage cenc "Character Encoding"
|
||||
- @subpage io "I/O Handling"
|
||||
|
@ -130,7 +130,7 @@ QSE_EXPORT int qse_ismbsdrivecurpath (
|
||||
* @endcode
|
||||
*
|
||||
* If #QSE_CANONPATH_EMPTYSINGLEDOT is clear in the @a flags, a single dot
|
||||
* is produced if the input @path resolves to the current directory logically.
|
||||
* is produced if the input @a path resolves to the current directory logically.
|
||||
* For example, dir/.. is canonicalized to a single period; If it is set,
|
||||
* an empty string is produced. Even a single period as an input produces
|
||||
* an empty string if it is set.
|
||||
@ -204,7 +204,7 @@ QSE_EXPORT int qse_iswcsdrivecurpath (
|
||||
* @endcode
|
||||
*
|
||||
* If #QSE_CANONPATH_EMPTYSINGLEDOT is clear in the @a flags, a single dot
|
||||
* is produced if the input @path resolves to the current directory logically.
|
||||
* is produced if the input @a path resolves to the current directory logically.
|
||||
* For example, dir/.. is canonicalized to a single period; If it is set,
|
||||
* an empty string is produced. Even a single period as an input produces
|
||||
* an empty string if it is set.
|
||||
|
@ -1132,7 +1132,7 @@ int qse_awk_rtx_closeio (
|
||||
qse_awk_rio_impl_t handler;
|
||||
qse_awk_rio_rwcmode_t rwcmode = QSE_AWK_RIO_CLOSE_FULL;
|
||||
|
||||
if (opt != QSE_NULL)
|
||||
if (opt)
|
||||
{
|
||||
if (opt[0] == QSE_T('r'))
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user