256 lines
6.9 KiB
Plaintext
256 lines
6.9 KiB
Plaintext
/** @defgroup awk AWK
|
|
|
|
@section awk_intro INTRODUCTION
|
|
|
|
QSEAWK is an embeddable AWK interpreter and is a part of the @ref qse_intro
|
|
"QSE" library. The interpreter implements the language described in the book
|
|
<a class="el" href="http://cm.bell-labs.com/cm/cs/awkbook/">
|
|
The AWK Proramming Language</a>
|
|
with @ref awk_ext "extensions". Its design focuses on building a flexible
|
|
and robust embedding API with minimal platform dependency. An embedding
|
|
application is capable of
|
|
- adding new global variables and functions.
|
|
- getting and set the value of a global variable.
|
|
- calling a function with or without parameters and getting its return value.
|
|
- customizing I/O handlers for file, pipe, console I/O.
|
|
- creating multiple interpreters independent of each other.
|
|
- running a single script with different I/O streams independently.
|
|
- changing language features by setting options.
|
|
- and more
|
|
|
|
Embedding an interpreter typically involves the following steps.
|
|
|
|
- open a new interpreter
|
|
- parse in a source script
|
|
- open a new runtime context
|
|
- execute BEGIN,pattern-action,END blocks or call a function
|
|
- close the runtime context
|
|
- close the interpter
|
|
|
|
The code example below demonstrates the steps in C. It executes the one liner
|
|
<b>BEGIN { print "hello, world" }</b>.
|
|
|
|
@code
|
|
/* cc -o hello hello.c -lqseawk -lqsecmn -lm */
|
|
#include <qse/awk/std.h>
|
|
#include <qse/cmn/stdio.h>
|
|
|
|
#define FAIL(msg) do { qse_printf(QSE_T("ERR: %s\n"),msg); goto oops; } while(0)
|
|
|
|
int main ()
|
|
{
|
|
qse_awk_t* awk = QSE_NULL;
|
|
qse_awk_rtx_t* rtx = QSE_NULL;
|
|
qse_awk_val_t* retv;
|
|
qse_awk_parsestd_in_t psin;
|
|
int ret = -1;
|
|
|
|
awk = qse_awk_openstd (0); /* open a new interpreter */
|
|
if (!awk) FAIL ("cannot open awk");
|
|
|
|
/* parse the hello world script from a string */
|
|
psin.type = QSE_AWK_PARSESTD_CP;
|
|
psin.u.cp = QSE_T("BEGIN { print \"hello, world\" }");
|
|
if (qse_awk_parsestd (awk, &psin, QSE_NULL) <= -1)
|
|
FAIL (qse_awk_geterrmsg(awk));
|
|
|
|
rtx = qse_awk_rtx_openstd ( /* open a runtime context */
|
|
awk, 0, /* no extension */
|
|
QSE_T("hello"), /* ARGV[0] */
|
|
QSE_NULL, /* stdin */
|
|
QSE_NULL /* stdout */
|
|
);
|
|
if (!rtx) FAIL (qse_awk_geterrmsg(awk));
|
|
|
|
/* exeucte BEGIN,pattern-action,END blocks */
|
|
retv = qse_awk_rtx_loop (rtx);
|
|
if (!retv) FAIL (qse_awk_rtx_geterrmsg(rtx));
|
|
|
|
qse_awk_rtx_refdownval (rtx, retv); /* destroy the return value */
|
|
ret = 0;
|
|
|
|
oops:
|
|
if (rtx) qse_awk_rtx_close (rtx); /* close the runtime context */
|
|
if (awk) qse_awk_close (awk); /* close the interpreter */
|
|
return ret;
|
|
}
|
|
@endcode
|
|
|
|
Things can get simpler when you use C++ API. Note that the C++ API supports
|
|
just a single runtime context for each interpreter.
|
|
|
|
@code
|
|
/* c++ -o hello hello.cpp -lqseawkxx -lqseawk -lqsecmnxx -lqsecmn -lm */
|
|
#include <qse/awk/StdAwk.hpp>
|
|
#include <iostream>
|
|
|
|
#ifdef QSE_CHAR_IS_MCHAR
|
|
# define xcout std::cout
|
|
#else
|
|
# define xcout std::wcout
|
|
#endif
|
|
|
|
struct MyAwk: public QSE::StdAwk { ~MyAwk () { QSE::StdAwk::close (); } };
|
|
|
|
#define FAIL(awk) do { \
|
|
xcout << QSE_T("ERR: ") << awk.getErrorMessage() << std::endl; \
|
|
return -1; \
|
|
} while (0)
|
|
|
|
int main (int argc, char* argv[])
|
|
{
|
|
MyAwk awk;
|
|
|
|
// open a new interpreter
|
|
if (awk.open () <= -1) FAIL (awk);
|
|
|
|
// set ARGV[0]
|
|
if (awk.addArgument (QSE_T("hello")) <= -1) FAIL (awk);
|
|
|
|
// parse the source script string
|
|
MyAwk::SourceString in(QSE_T("BEGIN { print \"hello, world\" }"));
|
|
if (awk.parse (in, MyAwk::Source::NONE) == QSE_NULL) FAIL (awk);
|
|
|
|
// execute BEGIN, pattern-action, END blocks.
|
|
MyAwk::Value r;
|
|
if (awk.loop (&r) <= -1) FAIL (awk);
|
|
|
|
return 0;
|
|
}
|
|
@endcode
|
|
|
|
@section awk_ext EXTENSIONS
|
|
Some language extensions are implemented and they can be enabled by setting the
|
|
corresponding options.
|
|
|
|
@subsection awk_ext_vardecl VARIABLE DECLARATION
|
|
|
|
#QSE_AWK_EXPLICIT enables variable declaration. Variables declared are accessed
|
|
directly bypassing the global named map that stores undeclared variables.
|
|
The keyword @b global introduces a global variable and the keyword @b local
|
|
introduces local variable. Local variable declaraion in a block must be
|
|
located before an expression or a statement appears.
|
|
|
|
@code
|
|
global g1, g2; #declares two global variables g1 and g2
|
|
|
|
BEGIN {
|
|
local a1, a2, a3; # declares three local variables
|
|
|
|
g1 = 300; a1 = 200;
|
|
|
|
{
|
|
local a1; # a1 here hides the a1 at the outer scope
|
|
local g1; # g1 here hides the global g1
|
|
a1 = 10; g1 = 5;
|
|
print a1, g1; # it prints 10 and 5
|
|
}
|
|
|
|
print a1, g1; # it prints 200 and 300
|
|
}
|
|
|
|
@endcode
|
|
|
|
However, turning on #QSE_AWK_EXPLICIT does not disable named variables.
|
|
To disable named variables, you must turn off #QSE_AWK_IMPLICIT.
|
|
|
|
@subsection awk_ext_include INCLUDE
|
|
|
|
The \@include directive inserts the contents of the object specified in the
|
|
following string, typically a file name, as if they appeared in the source
|
|
stream being processed. The directive can only be used at the outmost scope
|
|
where global variable declarations, @b BEGIN, @b END, and/or pattern-action
|
|
blocks appear. To use \@include, you must turn on #QSE_AWK_INCLUDE.
|
|
|
|
@code
|
|
@include "abc.awk"
|
|
BEGIN { func_in_abc (); }
|
|
@endcode
|
|
|
|
@subsection awk_ext_rwpipe TWO-WAY PIPE
|
|
|
|
The two-way pipe indicated by @b || is supproted, in addition to the one-way
|
|
pipe indicated by @b |. Turn on #QSE_AWK_RWPIPE to enable the two-way pipe.
|
|
|
|
@code
|
|
BEGIN {
|
|
print "15" || "sort";
|
|
print "14" || "sort";
|
|
print "13" || "sort";
|
|
print "12" || "sort";
|
|
print "11" || "sort";
|
|
# close the input side of the pipe as 'sort' starts emitting result
|
|
# once the input is closed.
|
|
close ("sort", "r");
|
|
while (("sort" || getline x) > 0) print "xx:", x;
|
|
}
|
|
@endcode
|
|
|
|
@subsection awk_ext_return RETURN
|
|
The return statement is valid in BEGIN blocks, END blocks, and pattern-action
|
|
blocks as well as in functions. The execution of a calling block is aborted
|
|
once the return statement is executed.
|
|
|
|
If #QSE_AWK_MAPTOVAR is on, you can return an arrayed value from a function.
|
|
@code
|
|
function getarray() {
|
|
local a;
|
|
a["one"] = 1;
|
|
a["two"] = 2;
|
|
a["three"] = 3;
|
|
return a;
|
|
}
|
|
|
|
BEGIN {
|
|
local x;
|
|
|
|
x = getarray();
|
|
for (i in x) print i, x[i];
|
|
}
|
|
@endcode
|
|
|
|
@subsection awk_ext_comment COMMENT
|
|
You can use the C-style comment as well as the pound comment.
|
|
|
|
@subsection awk_ext_fnc EXTENDED FUNCTIONS
|
|
index() and match() can accept the third parameter indicating the position
|
|
where the search begins. A negative value indicates a position from the back.
|
|
|
|
@code
|
|
BEGIN {
|
|
xstr = "abcdefabcdefabcdef";
|
|
xsub = "abc";
|
|
xlen = length(xsub);
|
|
|
|
i = 1;
|
|
while ((i = index(xstr, xsub, i)) > 0)
|
|
{
|
|
print i, substr(xstr, i, xlen);
|
|
i += xlen;
|
|
}
|
|
}
|
|
@endcode
|
|
|
|
@subsection awk_ext_fs EXTENDED FS
|
|
@code
|
|
BEGIN { FS="?:\\\"\""; }
|
|
@endcode
|
|
|
|
|
|
@subsection awk_ext_binnum BINARY NUMBER
|
|
Use 0b to begin a binary number sequence.
|
|
|
|
@code
|
|
BEGIN { print 0b1101; }
|
|
@endcode
|
|
|
|
@subsection awk_ext_unicode UNICODE ESCAPE SEQUENCE
|
|
|
|
If QSE is compiled for #QSE_CHAR_IS_WCHAR, you can use \\u and \\U in a
|
|
string to specify a character by unicode.
|
|
|
|
@code
|
|
BEGIN { print "string=>[\uB313\U0000B313]"; }
|
|
@endcode
|
|
*/
|