revised some documentation
This commit is contained in:
parent
7f64932d19
commit
e957f7a1a1
@ -1,5 +1,5 @@
|
|||||||
QSEAWK Language {#awk-lang}
|
QSEAWK Language {#awk-lang}
|
||||||
===============
|
================================================================================
|
||||||
|
|
||||||
QSEAWK implements the language described in the
|
QSEAWK implements the language described in the
|
||||||
[The AWK Programming Language][awkbook] with extensions.
|
[The AWK Programming Language][awkbook] with extensions.
|
||||||
@ -63,14 +63,24 @@ represents the value of 0.
|
|||||||
|
|
||||||
A string is enclosed in a pair of double quotes or single quotes.
|
A string is enclosed in a pair of double quotes or single quotes.
|
||||||
|
|
||||||
A character in a string encosed in the double-quotes can be preceeded with
|
A character in a string encosed in the double-quotes, when preceded with
|
||||||
a back-slash to change the meaning of the character.
|
a back-slash, changes the meaning.
|
||||||
|
|
||||||
\\
|
- \\
|
||||||
\a
|
- \a
|
||||||
\b
|
- \b
|
||||||
\uXXXX
|
- \uXXXX
|
||||||
\UXXXXXXXX
|
- \UXXXXXXXX
|
||||||
|
|
||||||
|
You can use \\u and \\U in a string to specify a character by unicode if
|
||||||
|
[Character Type](@ref installation) chosen for building is the wide character
|
||||||
|
type.
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
print "\uC720\uB2C8\uCF54\uB4DC \U00007D71\U00004E00\U000078BC";
|
||||||
|
}
|
||||||
|
|
||||||
|
This program should print 유니코드 統一碼.
|
||||||
|
|
||||||
There are no escaping sequences supported for a string enclosed in the single
|
There are no escaping sequences supported for a string enclosed in the single
|
||||||
quotes. For that reason, you can't specify the single quote itself within
|
quotes. For that reason, you can't specify the single quote itself within
|
||||||
@ -99,7 +109,7 @@ Each language element requires the option in the second column to be on.
|
|||||||
<table>
|
<table>
|
||||||
<tr><th>Element </th><th>Option </th></tr>
|
<tr><th>Element </th><th>Option </th></tr>
|
||||||
<tr><td>Comment </td><td> </td></tr>
|
<tr><td>Comment </td><td> </td></tr>
|
||||||
<tr><td>Global variable declaration</td><td>#QSE_AWK_EXPLICIT </td></tr>
|
<tr><td>Global variable declaration</td><td> </td></tr>
|
||||||
<tr><td>Pattern-action block </td><td>#QSE_AWK_PABLOCK </td></tr>
|
<tr><td>Pattern-action block </td><td>#QSE_AWK_PABLOCK </td></tr>
|
||||||
<tr><td>User-defined function </td><td> </td></tr>
|
<tr><td>User-defined function </td><td> </td></tr>
|
||||||
<tr><td>\@include </td><td>#QSE_AWK_INCLUDE </td></tr>
|
<tr><td>\@include </td><td>#QSE_AWK_INCLUDE </td></tr>
|
||||||
@ -120,7 +130,7 @@ A pattern-action block, and a user-defined function can have the following eleme
|
|||||||
|
|
||||||
<table>
|
<table>
|
||||||
<tr><th>Element </th><th>Option </th></tr>
|
<tr><th>Element </th><th>Option </th></tr>
|
||||||
<tr><td>Local variable declaration</td><td>#QSE_AWK_EXPLICIT </td></tr>
|
<tr><td>Local variable declaration</td><td> </td></tr>
|
||||||
<tr><td>Statement </td><td> </td></tr>
|
<tr><td>Statement </td><td> </td></tr>
|
||||||
<tr><td>getline </td><td>#QSE_AWK_RIO </td></tr>
|
<tr><td>getline </td><td>#QSE_AWK_RIO </td></tr>
|
||||||
<tr><td>print </td><td>#QSE_AWK_RIO </td></tr>
|
<tr><td>print </td><td>#QSE_AWK_RIO </td></tr>
|
||||||
@ -218,116 +228,74 @@ BEGIN {
|
|||||||
The !== operator is a negated form of the === operator.
|
The !== operator is a negated form of the === operator.
|
||||||
|
|
||||||
|
|
||||||
@subsection awk_ext_vardecl VARIABLE DECLARATION
|
### Variable Declaration ###
|
||||||
|
|
||||||
#QSE_AWK_EXPLICIT enables variable declaration. Variables declared are accessed
|
Variables declared are accessed directly bypassing the global named map
|
||||||
directly bypassing the global named map that stores undeclared variables.
|
that stores undeclared variables. The keyword \@global introduces a global
|
||||||
The keyword @b global introduces a global variable and the keyword @b local
|
variable and the keyword \@local introduces local variable. Local variable
|
||||||
introduces local variable. Local variable declaraion in a block must be
|
declaraion in a block must be located before an expression or a statement
|
||||||
located before an expression or a statement appears.
|
appears.
|
||||||
|
|
||||||
@code
|
|
||||||
global g1, g2; #declares two global variables g1 and g2
|
|
||||||
|
|
||||||
BEGIN {
|
|
||||||
local a1, a2, a3; # declares three local variables
|
|
||||||
|
|
||||||
|
@global g1, g2; #declares two global variables g1 and g2
|
||||||
|
BEGIN {
|
||||||
|
@local a1, a2, a3; # declares three local variables
|
||||||
g1 = 300; a1 = 200;
|
g1 = 300; a1 = 200;
|
||||||
|
|
||||||
{
|
{
|
||||||
local a1; # a1 here hides the a1 at the outer scope
|
@local a1; # a1 here hides the a1 at the outer scope
|
||||||
local g1; # g1 here hides the global g1
|
@local g1; # g1 here hides the global g1
|
||||||
a1 = 10; g1 = 5;
|
a1 = 10; g1 = 5;
|
||||||
print a1, g1; # it prints 10 and 5
|
print a1, g1; # it prints 10 and 5
|
||||||
}
|
}
|
||||||
|
|
||||||
print a1, g1; # it prints 200 and 300
|
print a1, g1; # it prints 200 and 300
|
||||||
}
|
}
|
||||||
|
|
||||||
@endcode
|
|
||||||
|
|
||||||
However, turning on #QSE_AWK_EXPLICIT does not disable named variables.
|
|
||||||
To disable named variables, you must turn off #QSE_AWK_IMPLICIT.
|
To disable named variables, you must turn off #QSE_AWK_IMPLICIT.
|
||||||
|
|
||||||
@subsection awk_ext_include INCLUDE
|
### \@include ###
|
||||||
|
|
||||||
The \@include directive inserts the contents of the object specified in the
|
The \@include directive inserts the contents of the object specified in the
|
||||||
following string, typically a file name, as if they appeared in the source
|
following string, typically a file name, as if they appeared in the source
|
||||||
stream being processed. The directive can only be used at the outmost scope
|
stream being processed. The directive can only be used at the outmost scope
|
||||||
where global variable declarations, @b BEGIN, @b END, and/or pattern-action
|
where global variable declarations, *BEGIN*, *END*, and/or pattern-action
|
||||||
blocks appear. To use \@include, you must turn on #QSE_AWK_INCLUDE.
|
blocks appear.
|
||||||
|
|
||||||
@code
|
@include "abc.awk"
|
||||||
@include "abc.awk"
|
BEGIN { func_in_abc (); }
|
||||||
BEGIN { func_in_abc (); }
|
|
||||||
@endcode
|
|
||||||
|
|
||||||
A semicolon is optional after the included file name. The following is the
|
A semicolon is optional after the included file name. The following is the
|
||||||
same as the sample above.
|
same as the sample above.
|
||||||
@code
|
|
||||||
@include "abc.awk";
|
@include "abc.awk";
|
||||||
BEGIN { func_in_abc(); }
|
BEGIN { func_in_abc(); }
|
||||||
@endcode
|
|
||||||
|
|
||||||
If #QSE_AWK_NEWLINE is off, the semicolon is required.
|
If #QSE_AWK_NEWLINE is off, the semicolon is required.
|
||||||
|
|
||||||
|
### Function Call ###
|
||||||
|
|
||||||
@subsection awk_ext_funcall FUNCTIONC CALL
|
name(1);
|
||||||
|
|
||||||
|
|
||||||
name(1);
|
|
||||||
if there is no space between 'name' and the left parenthesis, the
|
if there is no space between 'name' and the left parenthesis, the
|
||||||
name is treated as a function name.
|
name is treated as a function name.
|
||||||
|
|
||||||
name (1);
|
name (1);
|
||||||
|
|
||||||
If there is a space, the name is treated as a function name if the
|
If there is a space, the name is treated as a function name if the
|
||||||
name has been declared as the function or if #QSE_AWK_IMPLICIT is on,
|
name has been declared as the function or if #QSE_AWK_IMPLICIT is on,
|
||||||
it may be 'name' concatenated with the expression in the parentheses.
|
it may be 'name' concatenated with the expression in the parentheses.
|
||||||
|
|
||||||
The following is a valid program.
|
The following is a valid program.
|
||||||
@code
|
|
||||||
@pragma implicit off
|
BEGIN { name (1); }
|
||||||
BEGIN { name (1); }
|
function name(a) { print a; }'
|
||||||
function name(a) { print a; }'
|
|
||||||
@endcode
|
|
||||||
|
|
||||||
However, in this program, the first 'name' becomes a named global variable.
|
However, in this program, the first 'name' becomes a named global variable.
|
||||||
so the function declaration with 'name' triggers the variable redefinition
|
so the function declaration with 'name' triggers the variable redefinition
|
||||||
error.
|
error.
|
||||||
@pragma implicit on
|
|
||||||
BEGIN { name (1); }
|
|
||||||
function name(a) { print a; }'
|
|
||||||
@endcode
|
|
||||||
|
|
||||||
@subsection awk_ext_print EXTENDED PRINT/PRINTF
|
BEGIN { name (1); }
|
||||||
When #QSE_AWK_TOLERANT is on, print and printf are treated as if
|
function name(a) { print a; }'
|
||||||
they are function calls. In this mode, they return a negative number
|
|
||||||
on failure and a zero on success and any I/O failure doesn't abort
|
|
||||||
a running program.
|
|
||||||
|
|
||||||
@code
|
### GROUPED EXPRESSION ###
|
||||||
BEGIN {
|
|
||||||
a = print "hello, world" > "/dev/null";
|
|
||||||
print a;
|
|
||||||
a = print ("hello, world") > "/dev/null";
|
|
||||||
print a;
|
|
||||||
}
|
|
||||||
@endcode
|
|
||||||
|
|
||||||
Since print and printf are like function calls, you can use them
|
|
||||||
in any context where a normal expression is allowed. For example,
|
|
||||||
printf is used as a conditional expression in an 'if' statement
|
|
||||||
in the sample code below.
|
|
||||||
@code
|
|
||||||
BEGIN {
|
|
||||||
if ((printf "hello, world\n" || "tcp://127.0.0.1:9999") <= -1)
|
|
||||||
print "FAILURE";
|
|
||||||
else
|
|
||||||
print "SUCCESS";
|
|
||||||
}
|
|
||||||
@endcode
|
|
||||||
|
|
||||||
@subsection awk_ext_exprgroup GROUPED EXPRESSION
|
|
||||||
When #QSE_AWK_TOLERANT is on, you can use a grouped expression without
|
When #QSE_AWK_TOLERANT is on, you can use a grouped expression without
|
||||||
the 'in' operator. A grouped expression is a parentheses-enclosed list
|
the 'in' operator. A grouped expression is a parentheses-enclosed list
|
||||||
of expressions separated with a comma. Each expression in the group is
|
of expressions separated with a comma. Each expression in the group is
|
||||||
@ -344,153 +312,19 @@ BEGIN {
|
|||||||
}
|
}
|
||||||
@endcode
|
@endcode
|
||||||
|
|
||||||
@subsection awk_ext_rwpipe TWO-WAY PIPE
|
### RETURN ###
|
||||||
|
|
||||||
The two-way pipe indicated by @b || is supproted, in addition to the one-way
|
|
||||||
pipe indicated by @b |. Turn on #QSE_AWK_RWPIPE to enable the two-way pipe.
|
|
||||||
|
|
||||||
@code
|
|
||||||
BEGIN {
|
|
||||||
print "15" || "sort";
|
|
||||||
print "14" || "sort";
|
|
||||||
print "13" || "sort";
|
|
||||||
print "12" || "sort";
|
|
||||||
print "11" || "sort";
|
|
||||||
# close the input side of the pipe as 'sort' starts emitting result
|
|
||||||
# once the input is closed.
|
|
||||||
close ("sort", "r");
|
|
||||||
while (("sort" || getline x) > 0) print "xx:", x;
|
|
||||||
}
|
|
||||||
@endcode
|
|
||||||
|
|
||||||
This two-way pipe can create a TCP or UDP connection if the pipe command
|
|
||||||
string is prefixed with one of the followings:
|
|
||||||
|
|
||||||
- tcp:// - establishes a TCP connection to a specified IP address/port.
|
|
||||||
- udp:// - establishes a TCP connection to a specified IP address/port.
|
|
||||||
- tcpd:// - binds a TCP socket to a specified IP address/port and waits for the first connection.
|
|
||||||
- udpd:// - binds a TCP socket to a specified IP address/port and waits for the first sender.
|
|
||||||
|
|
||||||
@code
|
|
||||||
BEGIN {
|
|
||||||
# it binds a TCP socket to the IPv6 address :: and the port number
|
|
||||||
# 9999 and waits for the first coming connection. It repeats writing
|
|
||||||
# "hello world" to the first connected peer and reading a line from
|
|
||||||
# it until the session is torn down.
|
|
||||||
do {
|
|
||||||
print "hello world" || "tcpd://[::]:9999";
|
|
||||||
if (("tcpd://[::]:9999" || getline x) <= 0) break;
|
|
||||||
print x;
|
|
||||||
}
|
|
||||||
while(1);
|
|
||||||
}
|
|
||||||
@endcode
|
|
||||||
|
|
||||||
You can specify TCP or UDP timeouts for connection, accepting, reading, and
|
|
||||||
writing with setioattr (pipe-name, timeout-name, timeout-value). timeout-name
|
|
||||||
should be one of "ctimeout", "atimeout", "rtimeout", and "wtimeout".
|
|
||||||
timeout-value is a number specifying the actual timeout in milliseconds.
|
|
||||||
A negative value indicates no timeout.
|
|
||||||
|
|
||||||
You can call getioattr (pipe-name, timeout-name) to get the current
|
|
||||||
timeout-value set.
|
|
||||||
|
|
||||||
See the example below.
|
|
||||||
|
|
||||||
@code
|
|
||||||
BEGIN {
|
|
||||||
setioattr ("tcp://127.0.0.1:9999", "ctimeout", 3000);
|
|
||||||
setioattr ("tcp://127.0.0.1:9999", "rtimeout", 5000);
|
|
||||||
print "hello world" || "tcp://127.0.0.1:9999";
|
|
||||||
"tcp://127.0.0.1:9999" || getline x;
|
|
||||||
print x;
|
|
||||||
}
|
|
||||||
@endcode
|
|
||||||
|
|
||||||
Here is a more interesting example adopting Michael Sanders'
|
|
||||||
AWK web server, modified for QSEAWK.
|
|
||||||
|
|
||||||
@code
|
|
||||||
#
|
|
||||||
# Michael Sanders' AWK web server for QSEAWK.
|
|
||||||
# Orginal code in http://awk.info/?tools/server
|
|
||||||
#
|
|
||||||
# qseawk --tolerant=on --rwpipe=on webserver.awk
|
|
||||||
#
|
|
||||||
BEGIN {
|
|
||||||
x = 1 # script exits if x < 1
|
|
||||||
port = 8080 # port number
|
|
||||||
host = "tcpd://0.0.0.0:" port # host string
|
|
||||||
url = "http://localhost:" port # server url
|
|
||||||
status = 200 # 200 == OK
|
|
||||||
reason = "OK" # server response
|
|
||||||
RS = ORS = "\r\n" # header line terminators
|
|
||||||
doc = Setup() # html document
|
|
||||||
len = length(doc) + length(ORS) # length of document
|
|
||||||
while (x) {
|
|
||||||
if ($1 == "GET") RunApp(substr($2, 2))
|
|
||||||
if (! x) break
|
|
||||||
print "HTTP/1.0", status, reason || host
|
|
||||||
print "Connection: Close" || host
|
|
||||||
print "Pragma: no-cache" || host
|
|
||||||
print "Content-length:", len || host
|
|
||||||
print ORS doc || host
|
|
||||||
close(host) # close client connection
|
|
||||||
host || getline # wait for new client request
|
|
||||||
}
|
|
||||||
# server terminated...
|
|
||||||
doc = Bye()
|
|
||||||
len = length(doc) + length(ORS)
|
|
||||||
print "HTTP/1.0", status, reason || host
|
|
||||||
print "Connection: Close" || host
|
|
||||||
print "Pragma: no-cache" || host
|
|
||||||
print "Content-length:", len || host
|
|
||||||
print ORS doc || host
|
|
||||||
close(host)
|
|
||||||
}
|
|
||||||
|
|
||||||
function Setup() {
|
|
||||||
tmp = "<html>\
|
|
||||||
<head><title>Simple gawk server</title></head>\
|
|
||||||
<body>\
|
|
||||||
<p><a href=" url "/xterm>xterm</a>\
|
|
||||||
<p><a href=" url "/xcalc>xcalc</a>\
|
|
||||||
<p><a href=" url "/xload>xload</a>\
|
|
||||||
<p><a href=" url "/exit>terminate script</a>\
|
|
||||||
</body>\
|
|
||||||
</html>"
|
|
||||||
return tmp
|
|
||||||
}
|
|
||||||
|
|
||||||
function Bye() {
|
|
||||||
tmp = "<html>\
|
|
||||||
<head><title>Simple gawk server</title></head>\
|
|
||||||
<body><p>Script Terminated...</body>\
|
|
||||||
</html>"
|
|
||||||
return tmp
|
|
||||||
}
|
|
||||||
|
|
||||||
function RunApp(app) {
|
|
||||||
if (app == "xterm") {system("xterm&"); return}
|
|
||||||
if (app == "xcalc" ) {system("xcalc&"); return}
|
|
||||||
if (app == "xload" ) {system("xload&"); return}
|
|
||||||
if (app == "exit") {x = 0}
|
|
||||||
}
|
|
||||||
@endcode
|
|
||||||
|
|
||||||
@subsection awk_ext_return RETURN
|
|
||||||
The return statement is valid in pattern-action blocks as well as in functions.
|
The return statement is valid in pattern-action blocks as well as in functions.
|
||||||
The execution of a calling block is aborted once the return statement is executed.
|
The execution of a calling block is aborted once the return statement is executed.
|
||||||
|
|
||||||
@code
|
@code
|
||||||
$ qseawk 'BEGIN { return 20; }' ; echo $?
|
$ qseawk 'BEGIN { return 20; }' ; echo $?
|
||||||
20
|
20
|
||||||
@endcode
|
#endcode
|
||||||
|
|
||||||
If #QSE_AWK_MAPTOVAR is on, you can return an arrayed value from a function.
|
If #QSE_AWK_MAPTOVAR is on, you can return an arrayed value from a function.
|
||||||
@code
|
@code
|
||||||
function getarray() {
|
function getarray() {
|
||||||
local a;
|
@local a;
|
||||||
a["one"] = 1;
|
a["one"] = 1;
|
||||||
a["two"] = 2;
|
a["two"] = 2;
|
||||||
a["three"] = 3;
|
a["three"] = 3;
|
||||||
@ -498,14 +332,14 @@ function getarray() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
BEGIN {
|
BEGIN {
|
||||||
local x;
|
@local x;
|
||||||
|
|
||||||
x = getarray();
|
x = getarray();
|
||||||
for (i in x) print i, x[i];
|
for (i in x) print i, x[i];
|
||||||
}
|
}
|
||||||
@endcode
|
@endcode
|
||||||
|
|
||||||
@subsection awk_ext_reset RESET
|
|
||||||
|
### RESET ###
|
||||||
The reset statement resets an array variable back to the initial state.
|
The reset statement resets an array variable back to the initial state.
|
||||||
After that, the array variable can also be used as a scalar variable again.
|
After that, the array variable can also be used as a scalar variable again.
|
||||||
You must have #QSE_AWK_RESET on to be able to be able to use this
|
You must have #QSE_AWK_RESET on to be able to be able to use this
|
||||||
@ -520,7 +354,7 @@ BEGIN {
|
|||||||
}
|
}
|
||||||
@endcode
|
@endcode
|
||||||
|
|
||||||
@subsection awk_ext_abort ABORT
|
### ABORT ###
|
||||||
The abort statment is similar to the exit statement except that
|
The abort statment is similar to the exit statement except that
|
||||||
it skips executing the END block. You must have #QSE_AWK_ABORT on to be
|
it skips executing the END block. You must have #QSE_AWK_ABORT on to be
|
||||||
able to use this statement.
|
able to use this statement.
|
||||||
@ -535,10 +369,7 @@ END {
|
|||||||
}
|
}
|
||||||
@endcode
|
@endcode
|
||||||
|
|
||||||
@subsection awk_ext_comment COMMENT
|
### EXTENDED FUNCTIONS ###
|
||||||
You can use the C-style comment as well as the pound comment.
|
|
||||||
|
|
||||||
@subsection awk_ext_fnc EXTENDED FUNCTIONS
|
|
||||||
index() and match() can accept the third parameter indicating the position
|
index() and match() can accept the third parameter indicating the position
|
||||||
where the search begins. A negative value indicates a position from the back.
|
where the search begins. A negative value indicates a position from the back.
|
||||||
|
|
||||||
@ -557,7 +388,7 @@ BEGIN {
|
|||||||
}
|
}
|
||||||
@endcode
|
@endcode
|
||||||
|
|
||||||
@subsection awk_ext_fs EXTENDED FS
|
### EXTENDED FS ###
|
||||||
|
|
||||||
If the value for FS begins with a question mark followed by 4
|
If the value for FS begins with a question mark followed by 4
|
||||||
additional letters, QSEAWK can split a record with quoted fields
|
additional letters, QSEAWK can split a record with quoted fields
|
||||||
@ -599,84 +430,6 @@ $3: a b c
|
|||||||
@endcode
|
@endcode
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@subsection awk_ext_binnum BINARY NUMBER
|
|
||||||
Use 0b to begin a binary number sequence.
|
|
||||||
|
|
||||||
@code
|
|
||||||
$ qseawk 'BEGIN { printf ("%b %o %d %x\n", 0b1101, 0b1101, 0b1101, 0b1101); }'
|
|
||||||
1101 15 13 d
|
|
||||||
@endcode
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@subsection awk_ext_unicode UNICODE ESCAPE SEQUENCE
|
|
||||||
|
|
||||||
If QSE is compiled for #QSE_CHAR_IS_WCHAR, you can use \\u and \\U in a
|
|
||||||
string to specify a character by unicode.
|
|
||||||
|
|
||||||
@code
|
|
||||||
$ qseawk 'BEGIN { print "\uC720\uB2C8\uCF54\uB4DC \U00007D71\U00004E00\U000078BC"; }'
|
|
||||||
유니코드 統一碼
|
|
||||||
@endcode
|
|
||||||
|
|
||||||
|
|
||||||
@subsection awk_ext_ioenc I/O ENCODING
|
|
||||||
You can call setioattr() to set the character encoding of a stream resource
|
|
||||||
like a pipe or a file. See qse_findcmgr() for a list of supported encoding names.
|
|
||||||
|
|
||||||
Let's say you run this simple echoing script on a WIN32 platform that has
|
|
||||||
the active code page of 949 and is reachable at the IP address 192.168.2.8.
|
|
||||||
|
|
||||||
@code
|
|
||||||
C:\> chcp
|
|
||||||
Active code page: 949
|
|
||||||
C:\> type s.awk
|
|
||||||
BEGIN {
|
|
||||||
sock = "tcpd://0.0.0.0:9999";
|
|
||||||
setioattr (sock, "codepage", "cp949"); # this is not needed since the active
|
|
||||||
# code page is already 949.
|
|
||||||
do {
|
|
||||||
if ((sock || getline x) <= 0) break;
|
|
||||||
print "PEER: " x;
|
|
||||||
print x || sock;
|
|
||||||
}
|
|
||||||
while(1);
|
|
||||||
}
|
|
||||||
C:\> qseawk --rwpipe=on -f r.awk
|
|
||||||
PEER: 안녕
|
|
||||||
PEER: ?好!
|
|
||||||
@endcode
|
|
||||||
|
|
||||||
Now you run the following script on a UTF-8 console of a Linux box.
|
|
||||||
|
|
||||||
@code
|
|
||||||
$ echo $LANG
|
|
||||||
en_US.UTF-8
|
|
||||||
$ cat c.awk
|
|
||||||
BEGIN {
|
|
||||||
peer = "tcp://192.168.2.8:9999";
|
|
||||||
setioattr (peer, "codepage", "cp949");
|
|
||||||
do
|
|
||||||
{
|
|
||||||
printf "> ";
|
|
||||||
if ((getline x) <= 0) break;
|
|
||||||
print x || peer;
|
|
||||||
if ((peer || getline line) <= -1) break;
|
|
||||||
print "PEER: " line;
|
|
||||||
}
|
|
||||||
while (1);
|
|
||||||
}
|
|
||||||
$ qseawk --rwpipe=on -f c.awk
|
|
||||||
> 안녕
|
|
||||||
PEER: 안녕
|
|
||||||
> 你好!
|
|
||||||
PEER: ?好!
|
|
||||||
@endcode
|
|
||||||
|
|
||||||
Note that 你 has been converted to a question mark since the letter is
|
|
||||||
not supported by cp949.
|
|
||||||
|
|
||||||
## Built-in I/O ##
|
## Built-in I/O ##
|
||||||
|
|
||||||
QSEAWK comes with built-in I/O commands and functions in addition to the
|
QSEAWK comes with built-in I/O commands and functions in addition to the
|
||||||
@ -687,11 +440,16 @@ is available only if QSEAWK is set with #QSE_AWK_RIO.
|
|||||||
|
|
||||||
The *getline* command has multiple forms of usage. It can be used with or
|
The *getline* command has multiple forms of usage. It can be used with or
|
||||||
without a variable name and can also be associated with a pipe or a file
|
without a variable name and can also be associated with a pipe or a file
|
||||||
redirection. Basically, it reads a record from an input stream associated
|
redirection. The default association is the console when no pipe and file
|
||||||
and stores it.
|
redirection is specified. In principle, it reads a record from the associated
|
||||||
|
input stream and updates $0 or a variable with the record. If it managed to
|
||||||
|
perform this successfully, it return 1; it if detected EOF, it returns 0; it
|
||||||
|
return -1 on failure.
|
||||||
|
|
||||||
*getline* without a following variable reads a record from an associated
|
*getline* without a following variable reads a record from an associated
|
||||||
input stream and updates $0 with the value. It also updates *NF*, *FNR*, *NR*.
|
input stream, updates $0 with the value and increments *FNR*, *NR*. Updating
|
||||||
|
$0 also causes changes in *NF* and fields from $1 to $NF.
|
||||||
|
|
||||||
The sample below reads records from the console and prints them.
|
The sample below reads records from the console and prints them.
|
||||||
|
|
||||||
BEGIN {
|
BEGIN {
|
||||||
@ -711,19 +469,112 @@ and updates the variable with the value. It updates *FNR* and *NR*, too.
|
|||||||
while (getline line > 0) print line;
|
while (getline line > 0) print line;
|
||||||
}
|
}
|
||||||
|
|
||||||
*getline* is associated with the console by default. you can change it
|
You can change the stream association to a pipe or a file. If *getline* or
|
||||||
to a file or a pipe by using |, ||, <.
|
*getline variable* is followed by a input redirection operator(<) and
|
||||||
|
an expression, the evaluation result of the expression becomes the name of
|
||||||
|
the file to read records from. The file is opened at the first occurrence
|
||||||
|
and can be closed with the *close* function.
|
||||||
|
|
||||||
The *getline* command acts like a function in that it returns a value: 1 on
|
BEGIN {
|
||||||
success, 0 on EOF, -1 on error. But you can't place an empty parentheses
|
filename = "/etc/passwd";
|
||||||
when no variable name is specified nor can you parenthesize the optional
|
while ((getline line < filename) > 0) print line;
|
||||||
variable name. For example, *getline(a)* is different from *getline a* and
|
close (filename);
|
||||||
means the concatenation of the return value of *getline* and the variable *a*.
|
}
|
||||||
|
|
||||||
|
When *getline* or *getline variable* is preceded with an expression and a pipe
|
||||||
|
operator(|), the evaluation result of the expression becomes the name of
|
||||||
|
the external command to execute. The command is executed at the first occurrence
|
||||||
|
and can be terminated with the *close* function. The example below reads
|
||||||
|
the output of the *ls -laF* command and prints it to the console.
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
procname = "ls -laF";
|
||||||
|
while ((procname | getline line) > 0) print line;
|
||||||
|
close (procname);
|
||||||
|
}
|
||||||
|
|
||||||
|
The two-way pipe operator(||) can also be used to read records from an
|
||||||
|
external command. There is no visible chanages to the end-user in case
|
||||||
|
of the example above if you switch the operator.
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
procname = "ls -laF";
|
||||||
|
while ((procname || getline line) > 0) print line;
|
||||||
|
close (procname);
|
||||||
|
}
|
||||||
|
|
||||||
|
The *getline* command acts like a function in that it returns a value.
|
||||||
|
But you can't place an empty parentheses when no variable name is specified
|
||||||
|
nor can you parenthesize the optional variable name. For example, *getline(a)*
|
||||||
|
is different from *getline a* and means the concatenation of the return value
|
||||||
|
of *getline* and the variable *a*. Besides, it is not clear if
|
||||||
|
|
||||||
|
getline a < b
|
||||||
|
|
||||||
|
is
|
||||||
|
|
||||||
|
(getline a) < b
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
(getline) (a < b)
|
||||||
|
|
||||||
|
For this reason, you are advised to parenthesize *getline* and its related
|
||||||
|
components to avoid confusion whenever necessary. The example reading into
|
||||||
|
the variable *line* can be made clearer with parenthesization.
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
while ((getline line) > 0) print line;
|
||||||
|
}
|
||||||
|
|
||||||
### print ###
|
### print ###
|
||||||
|
**TODO**
|
||||||
|
|
||||||
### printf ###
|
### printf ###
|
||||||
|
|
||||||
|
When #QSE_AWK_TOLERANT is on, print and printf are treated as if
|
||||||
|
they are function calls. In this mode, they return a negative number
|
||||||
|
on failure and a zero on success and any I/O failure doesn't abort
|
||||||
|
a running program.
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
a = print "hello, world" > "/dev/null";
|
||||||
|
print a;
|
||||||
|
a = print ("hello, world") > "/dev/null";
|
||||||
|
print a;
|
||||||
|
}
|
||||||
|
|
||||||
|
Since print and printf are like function calls, you can use them
|
||||||
|
in any context where a normal expression is allowed. For example,
|
||||||
|
printf is used as a conditional expression in an 'if' statement
|
||||||
|
in the sample code below.
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
if ((printf "hello, world\n" || "tcp://127.0.0.1:9999") <= -1)
|
||||||
|
print "FAILURE";
|
||||||
|
else
|
||||||
|
print "SUCCESS";
|
||||||
|
}
|
||||||
|
|
||||||
|
### close (io-name, what) ###
|
||||||
|
|
||||||
|
The *close* function closes a stream indicated by the name *io-name*. It takes
|
||||||
|
an optional parameter *what* indicating whether input or output should be
|
||||||
|
closed.
|
||||||
|
|
||||||
|
If *io-name* is a file, it closes the file handle associated;
|
||||||
|
If *io-name* is a command, it may kill the running process from the command,
|
||||||
|
reclaims other sytstem resources, and closes the pipe handles;
|
||||||
|
If *io-name* is a network stream, it tears down connections to the network
|
||||||
|
peer and closes the socket handles.
|
||||||
|
|
||||||
|
The optional paramenter *what* must be one of *r* or *w* when used is useful
|
||||||
|
when *io-name* is a command invoked for the two-way operator. The value of
|
||||||
|
*r* causes the function to close the read-end of the pipe and the value of
|
||||||
|
*w* causes the function to close the write-end of the pipe.
|
||||||
|
|
||||||
|
The function returns 0 on success and -1 on failure.
|
||||||
|
|
||||||
### setioattr (io-name, attr-name, attr-value) ###
|
### setioattr (io-name, attr-name, attr-value) ###
|
||||||
|
|
||||||
The *setioattr* function changes the I/O attribute of the name *attr-name* to
|
The *setioattr* function changes the I/O attribute of the name *attr-name* to
|
||||||
@ -762,4 +613,181 @@ failure.
|
|||||||
else print "codepage: " codepage;
|
else print "codepage: " codepage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
### Two-way Pipe ###
|
||||||
|
|
||||||
|
The two-way pipe is indicated by the two-way pipe operator(||) and QSEAWK
|
||||||
|
must be set with #QSE_AWK_RWPIPE to be able to use the two-way pipe.
|
||||||
|
|
||||||
|
The example redirects the output of *print* to the external *sort* command
|
||||||
|
and reads back the output.
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
print "15" || "sort";
|
||||||
|
print "14" || "sort";
|
||||||
|
print "13" || "sort";
|
||||||
|
print "12" || "sort";
|
||||||
|
print "11" || "sort";
|
||||||
|
# close the input side of the pipe as 'sort' starts emitting result
|
||||||
|
# once the input is closed.
|
||||||
|
close ("sort", "r");
|
||||||
|
while (("sort" || getline x) > 0) print x;
|
||||||
|
}
|
||||||
|
|
||||||
|
This two-way pipe can create a TCP or UDP connection if the pipe command
|
||||||
|
string is prefixed with one of the followings:
|
||||||
|
|
||||||
|
- tcp:// - establishes a TCP connection to a specified IP address/port.
|
||||||
|
- udp:// - establishes a TCP connection to a specified IP address/port.
|
||||||
|
- tcpd:// - binds a TCP socket to a specified IP address/port and waits for the first connection.
|
||||||
|
- udpd:// - binds a TCP socket to a specified IP address/port and waits for the first sender.
|
||||||
|
|
||||||
|
See this example.
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
# it binds a TCP socket to the IPv6 address :: and the port number
|
||||||
|
# 9999 and waits for the first coming connection. It repeats writing
|
||||||
|
# "hello world" to the first connected peer and reading a line from
|
||||||
|
# it until the session is torn down.
|
||||||
|
do {
|
||||||
|
print "hello world" || "tcpd://[::]:9999";
|
||||||
|
if (("tcpd://[::]:9999" || getline x) <= 0) break;
|
||||||
|
print x;
|
||||||
|
}
|
||||||
|
while(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
You can manipulate TCP or UDP timeouts for connection, accepting, reading, and
|
||||||
|
writing with the *setioattr* function and the *getioattr* function.
|
||||||
|
|
||||||
|
See the example below.
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
setioattr ("tcp://127.0.0.1:9999", "ctimeout", 3);
|
||||||
|
setioattr ("tcp://127.0.0.1:9999", "rtimeout", 5.5);
|
||||||
|
print "hello world" || "tcp://127.0.0.1:9999";
|
||||||
|
"tcp://127.0.0.1:9999" || getline x;
|
||||||
|
print x;
|
||||||
|
}
|
||||||
|
|
||||||
|
Here is an interesting example adopting Michael Sanders' AWK web server,
|
||||||
|
modified for QSEAWK.
|
||||||
|
|
||||||
|
#
|
||||||
|
# Michael Sanders' AWK web server for QSEAWK.
|
||||||
|
# Orginal code in http://awk.info/?tools/server
|
||||||
|
#
|
||||||
|
# qseawk --tolerant=on --rwpipe=on webserver.awk
|
||||||
|
#
|
||||||
|
BEGIN {
|
||||||
|
x = 1 # script exits if x < 1
|
||||||
|
port = 8080 # port number
|
||||||
|
host = "tcpd://0.0.0.0:" port # host string
|
||||||
|
url = "http://localhost:" port # server url
|
||||||
|
status = 200 # 200 == OK
|
||||||
|
reason = "OK" # server response
|
||||||
|
RS = ORS = "\r\n" # header line terminators
|
||||||
|
doc = Setup() # html document
|
||||||
|
len = length(doc) + length(ORS) # length of document
|
||||||
|
while (x) {
|
||||||
|
if ($1 == "GET") RunApp(substr($2, 2))
|
||||||
|
if (! x) break
|
||||||
|
print "HTTP/1.0", status, reason || host
|
||||||
|
print "Connection: Close" || host
|
||||||
|
print "Pragma: no-cache" || host
|
||||||
|
print "Content-length:", len || host
|
||||||
|
print ORS doc || host
|
||||||
|
close(host) # close client connection
|
||||||
|
host || getline # wait for new client request
|
||||||
|
}
|
||||||
|
# server terminated...
|
||||||
|
doc = Bye()
|
||||||
|
len = length(doc) + length(ORS)
|
||||||
|
print "HTTP/1.0", status, reason || host
|
||||||
|
print "Connection: Close" || host
|
||||||
|
print "Pragma: no-cache" || host
|
||||||
|
print "Content-length:", len || host
|
||||||
|
print ORS doc || host
|
||||||
|
close(host)
|
||||||
|
}
|
||||||
|
|
||||||
|
function Setup() {
|
||||||
|
tmp = "<html>\
|
||||||
|
<head><title>Simple gawk server</title></head>\
|
||||||
|
<body>\
|
||||||
|
<p><a href=" url "/xterm>xterm</a>\
|
||||||
|
<p><a href=" url "/xcalc>xcalc</a>\
|
||||||
|
<p><a href=" url "/xload>xload</a>\
|
||||||
|
<p><a href=" url "/exit>terminate script</a>\
|
||||||
|
</body>\
|
||||||
|
</html>"
|
||||||
|
return tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
function Bye() {
|
||||||
|
tmp = "<html>\
|
||||||
|
<head><title>Simple gawk server</title></head>\
|
||||||
|
<body><p>Script Terminated...</body>\
|
||||||
|
</html>"
|
||||||
|
return tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
function RunApp(app) {
|
||||||
|
if (app == "xterm") {system("xterm&"); return}
|
||||||
|
if (app == "xcalc" ) {system("xcalc&"); return}
|
||||||
|
if (app == "xload" ) {system("xload&"); return}
|
||||||
|
if (app == "exit") {x = 0}
|
||||||
|
}
|
||||||
|
|
||||||
|
### I/O Character Encoding ###
|
||||||
|
|
||||||
|
You can change the character encoding encoding of a stream. See qse_findcmgr()
|
||||||
|
for a list of supported encoding names.
|
||||||
|
|
||||||
|
Let's say you run this simple echoing script on a WIN32 platform that has
|
||||||
|
the active code page of 949 and is reachable at the IP address 192.168.2.8.
|
||||||
|
|
||||||
|
C:\> chcp
|
||||||
|
Active code page: 949
|
||||||
|
C:\> type s.awk
|
||||||
|
BEGIN {
|
||||||
|
sock = "tcpd://0.0.0.0:9999";
|
||||||
|
setioattr (sock, "codepage", "cp949");
|
||||||
|
do {
|
||||||
|
if ((sock || getline x) <= 0) break;
|
||||||
|
print "PEER: " x;
|
||||||
|
print x || sock;
|
||||||
|
}
|
||||||
|
while(1);
|
||||||
|
}
|
||||||
|
C:\> qseawk -f r.awk
|
||||||
|
PEER: 안녕
|
||||||
|
PEER: ?好!
|
||||||
|
|
||||||
|
Now you run the following script on a UTF-8 console of a Linux box.
|
||||||
|
|
||||||
|
$ echo $LANG
|
||||||
|
en_US.UTF-8
|
||||||
|
$ cat c.awk
|
||||||
|
BEGIN {
|
||||||
|
peer = "tcp://192.168.2.8:9999";
|
||||||
|
setioattr (peer, "codepage", "cp949");
|
||||||
|
do
|
||||||
|
{
|
||||||
|
printf "> ";
|
||||||
|
if ((getline x) <= 0) break;
|
||||||
|
print x || peer;
|
||||||
|
if ((peer || getline line) <= -1) break;
|
||||||
|
print "PEER: " line;
|
||||||
|
}
|
||||||
|
while (1);
|
||||||
|
}
|
||||||
|
$ qseawk --rwpipe=on -f c.awk
|
||||||
|
> 안녕
|
||||||
|
PEER: 안녕
|
||||||
|
> 你好!
|
||||||
|
PEER: ?好!
|
||||||
|
|
||||||
|
Note that 你 has been converted to a question mark since the letter is
|
||||||
|
not supported by cp949.
|
||||||
|
|
||||||
[awkbook]: http://cm.bell-labs.com/cm/cs/awkbook/
|
[awkbook]: http://cm.bell-labs.com/cm/cs/awkbook/
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
Installation {#installation}
|
Installation {#installation}
|
||||||
============
|
================================================================================
|
||||||
|
|
||||||
## Source Package ##
|
Source Package
|
||||||
|
--------------
|
||||||
|
|
||||||
You can download the source package from
|
You can download the source package from
|
||||||
|
|
||||||
@ -14,7 +15,8 @@ repository by executing the following command:
|
|||||||
|
|
||||||
svn checkout http://qse.googlecode.com/svn/trunk/qse/
|
svn checkout http://qse.googlecode.com/svn/trunk/qse/
|
||||||
|
|
||||||
## Building on Unix/Linux ##
|
Building on Unix/Linux
|
||||||
|
----------------------
|
||||||
|
|
||||||
The project uses the standard autoconf/automake generated script files for
|
The project uses the standard autoconf/automake generated script files for
|
||||||
buildiing. If you work on the systems where these scripts can run, you can
|
buildiing. If you work on the systems where these scripts can run, you can
|
||||||
@ -26,7 +28,8 @@ follow the standard procedures of configuring and making the project.
|
|||||||
|
|
||||||
You can use this method of building for MinGW or Cygwin on Windows.
|
You can use this method of building for MinGW or Cygwin on Windows.
|
||||||
|
|
||||||
## Cross-compiling for WIN32 ##
|
Cross-compiling for WIN32
|
||||||
|
-------------------------
|
||||||
|
|
||||||
While the autoconf/automake scripts may not support your native compilers,
|
While the autoconf/automake scripts may not support your native compilers,
|
||||||
you can cross-compile it for WIN32/WIN64 with a cross-compiler. Get a
|
you can cross-compile it for WIN32/WIN64 with a cross-compiler. Get a
|
||||||
@ -48,7 +51,8 @@ With MINGW-W64, you may run *configure* as shown below:
|
|||||||
The actual host and target names may vary depending on the cross-compiler
|
The actual host and target names may vary depending on the cross-compiler
|
||||||
installed.
|
installed.
|
||||||
|
|
||||||
## Native Makefiles ##
|
Native Makefiles
|
||||||
|
----------------
|
||||||
|
|
||||||
The project provides makefiles for some selected compilers and platforms.
|
The project provides makefiles for some selected compilers and platforms.
|
||||||
The makefiles were generated with bakefile (www.bakefile.org) and can be
|
The makefiles were generated with bakefile (www.bakefile.org) and can be
|
||||||
@ -66,7 +70,8 @@ the wide character type, you can execute this:
|
|||||||
cd bld\os2-watcom
|
cd bld\os2-watcom
|
||||||
wmake BUILD=release CHAR=wchar
|
wmake BUILD=release CHAR=wchar
|
||||||
|
|
||||||
## Build Options ##
|
Build Options
|
||||||
|
-------------
|
||||||
|
|
||||||
The configure script and the native makefiles provides some options that you
|
The configure script and the native makefiles provides some options that you
|
||||||
can use to change the build environment. The options presented here can be
|
can use to change the build environment. The options presented here can be
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
QSE {#mainpage}
|
QSE {#mainpage}
|
||||||
===================
|
================================================================================
|
||||||
|
|
||||||
@image html qse-logo.png
|
@image html qse-logo.png
|
||||||
|
|
||||||
The QSE library implements AWK, SED, and Unix commands in an embeddable form
|
The QSE library implements AWK, SED, and Unix commands in an embeddable form
|
||||||
@ -19,7 +20,7 @@ Chung, Hyung-Hwan <hyunghwan.chung@gmail.com>
|
|||||||
|
|
||||||
See the subpages for more information.
|
See the subpages for more information.
|
||||||
|
|
||||||
- @subpage installation
|
- @ref installation
|
||||||
- @subpage mem "Memory Management"
|
- @subpage mem "Memory Management"
|
||||||
- @subpage cenc "Character Encoding"
|
- @subpage cenc "Character Encoding"
|
||||||
- @subpage io "I/O Handling"
|
- @subpage io "I/O Handling"
|
||||||
|
@ -130,7 +130,7 @@ QSE_EXPORT int qse_ismbsdrivecurpath (
|
|||||||
* @endcode
|
* @endcode
|
||||||
*
|
*
|
||||||
* If #QSE_CANONPATH_EMPTYSINGLEDOT is clear in the @a flags, a single dot
|
* If #QSE_CANONPATH_EMPTYSINGLEDOT is clear in the @a flags, a single dot
|
||||||
* is produced if the input @path resolves to the current directory logically.
|
* is produced if the input @a path resolves to the current directory logically.
|
||||||
* For example, dir/.. is canonicalized to a single period; If it is set,
|
* For example, dir/.. is canonicalized to a single period; If it is set,
|
||||||
* an empty string is produced. Even a single period as an input produces
|
* an empty string is produced. Even a single period as an input produces
|
||||||
* an empty string if it is set.
|
* an empty string if it is set.
|
||||||
@ -204,7 +204,7 @@ QSE_EXPORT int qse_iswcsdrivecurpath (
|
|||||||
* @endcode
|
* @endcode
|
||||||
*
|
*
|
||||||
* If #QSE_CANONPATH_EMPTYSINGLEDOT is clear in the @a flags, a single dot
|
* If #QSE_CANONPATH_EMPTYSINGLEDOT is clear in the @a flags, a single dot
|
||||||
* is produced if the input @path resolves to the current directory logically.
|
* is produced if the input @a path resolves to the current directory logically.
|
||||||
* For example, dir/.. is canonicalized to a single period; If it is set,
|
* For example, dir/.. is canonicalized to a single period; If it is set,
|
||||||
* an empty string is produced. Even a single period as an input produces
|
* an empty string is produced. Even a single period as an input produces
|
||||||
* an empty string if it is set.
|
* an empty string if it is set.
|
||||||
|
@ -1132,7 +1132,7 @@ int qse_awk_rtx_closeio (
|
|||||||
qse_awk_rio_impl_t handler;
|
qse_awk_rio_impl_t handler;
|
||||||
qse_awk_rio_rwcmode_t rwcmode = QSE_AWK_RIO_CLOSE_FULL;
|
qse_awk_rio_rwcmode_t rwcmode = QSE_AWK_RIO_CLOSE_FULL;
|
||||||
|
|
||||||
if (opt != QSE_NULL)
|
if (opt)
|
||||||
{
|
{
|
||||||
if (opt[0] == QSE_T('r'))
|
if (opt[0] == QSE_T('r'))
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user