updated README.md
This commit is contained in:
parent
c45114458e
commit
c8e805b159
200
hawk/README.md
200
hawk/README.md
@ -1,26 +1,137 @@
|
||||
# Hawk
|
||||
|
||||
- [Language](#language)
|
||||
- [Basic Modules](#basic-modules)
|
||||
- [Embedding Guide](#embedding-guide)
|
||||
- [Language](#language)
|
||||
- [Basic Modules](#basic-modules)
|
||||
- [Embedding Guide](#embedding-guide)
|
||||
|
||||
## Language <a name="language"></a>
|
||||
|
||||
Hawk implements most of the AWK programming language elements with extensions.
|
||||
|
||||
### Program Structure
|
||||
|
||||
A Hawk program is composed of the following elements at the top level.
|
||||
|
||||
- pattern-action block pair
|
||||
- BEGIN action block pair
|
||||
- END action block pair
|
||||
- action block without a pattern
|
||||
- pattern without an action block
|
||||
- user-defined function
|
||||
- @global variable declaration
|
||||
- @include directive
|
||||
- @pragma directive
|
||||
|
||||
However, none of the above is mandatory. Hawk accepts an empty program.
|
||||
|
||||
### Pattern-Action Block Pair
|
||||
|
||||
A pattern-action pair is composed of a pattern and an action block as shown below:
|
||||
|
||||
pattern {
|
||||
statement
|
||||
statement
|
||||
...
|
||||
}
|
||||
|
||||
A pattern can be one of the followings when specified:
|
||||
|
||||
- expression
|
||||
- first-expression, last-expression
|
||||
- *BEGIN*
|
||||
- *END*
|
||||
|
||||
An action block is a series of statements enclosed in a curly bracket pair. The *BEGIN* and *END* patterns require an action block while normal patterns don't. When no action block is specified for a normal pattern, it is treated
|
||||
as if `{ print $0; }` is specified.
|
||||
|
||||
Hawk executes the action block for the *BEGIN* pattern when it starts executing a program; No start-up action is taken if no *BEGIN* pattern-action pair is specified. If a normal pattern-action pair and/or the *END*
|
||||
pattern-action is specified, it reads the standard input stream. For each input line it reads, it checks if a normal pattern expression evaluates to true. For each pattern that evaluates to true, it executes the action block specified for
|
||||
the pattern. When it reaches the end of the input stream, it executes the action block for the *END* pattern.
|
||||
|
||||
Hawk allows zero or more *BEGIN* patterns. When multiple *BEGIN* patterns are specified, it executes their action blocks in their appearance order in the program. The same applies to the *END* patterns and their action blocks. It
|
||||
doesn't read the standard input stream for programs composed of BEGIN blocks only whereas it reads the stream as long as there is an action block for the END pattern or a normal pattern. It evaluates an empty pattern to true;
|
||||
As a result, the action block for an empty pattern is executed for all input lines read.
|
||||
|
||||
You can compose a pattern range by putting 2 patterns separated by a comma. The pattern range evaluates to true once the first expression evaluates to true until the last expression evaluates to true.
|
||||
|
||||
The following code snippet is a valid Hawk program that prints the string *hello, world* to the console and exits.
|
||||
|
||||
|
||||
BEGIN {
|
||||
print "hello, world";
|
||||
}
|
||||
|
||||
This program prints "hello, world" followed by "hello, all" to the console.
|
||||
|
||||
BEGIN {
|
||||
print "hello, world";
|
||||
}
|
||||
BEGIN {
|
||||
print "hello, all";
|
||||
}
|
||||
|
||||
For the following text input,
|
||||
|
||||
abcdefgahijklmn
|
||||
1234567890
|
||||
opqrstuvwxyzabc
|
||||
9876543210
|
||||
|
||||
this program
|
||||
|
||||
BEGIN { mr=0; my_nr=0; }
|
||||
/abc/ { print "[" $0 "]"; mr++; }
|
||||
{ my_nr++; }
|
||||
END {
|
||||
print "total records: " NR;
|
||||
print "total records selfcounted: " my_nr;
|
||||
print "matching records: " mr;
|
||||
}
|
||||
|
||||
produces the output text like this:
|
||||
|
||||
[abcdefgahijklmn]
|
||||
[opqrstuvwxyzabc]
|
||||
total records: 4
|
||||
total records selfcounted: 4
|
||||
matching records: 2
|
||||
|
||||
See the table for the order of execution indicated by the number and the result
|
||||
of pattern evaluation enclosed in parenthesis. The action block is executed if
|
||||
the evaluation result is true.
|
||||
|
||||
| | START-UP | abcdefgahijklmn | 1234567890 | opqrstuvwxyzabc | 9876543210 | SHUTDOWN |
|
||||
|-------------------------------------|----------|-----------------|------------|-----------------|------------|----------|
|
||||
| `BEGIN { mr = 0; my_nr=0; }` | 1(true) | | | | | |
|
||||
| `/abc/ { print "[" $0 "]"; mr++; }` | | 2(true) | 4(false) | 6(true) | 8(false) | |
|
||||
| `{ my_nr++; }` | | 3(true) | 5(true) | 7(true) | 9(true) | |
|
||||
| `END { print ... }` | | | | | | 10(true) |
|
||||
|
||||
For the same input, this program shows how to use a ranged pattern.
|
||||
|
||||
/abc/,/stu/ { print "[" $0 "]"; }
|
||||
|
||||
It produces the output text like this:
|
||||
|
||||
[abcdefgahijklmn]
|
||||
[1234567890]
|
||||
[opqrstuvwxyzabc]
|
||||
|
||||
The regular expression `/abc/` matches the first input line and `/stu/` matches the
|
||||
third input line. So the range is true between the first input line and the
|
||||
third input line inclusive.
|
||||
|
||||
### Entry Point
|
||||
|
||||
You may change the entry point of your script by setting a function name with @pragma entry.
|
||||
The typical execution begins with the BEGIN block, goes through pattern-action blocks, and eaches the END block. If you like to use a function as an entry point, you may set a function name with @pragma entry.
|
||||
|
||||
```
|
||||
@pragma entry main
|
||||
@pragma entry main
|
||||
|
||||
function main ()
|
||||
{
|
||||
function main ()
|
||||
{
|
||||
print "hello, world";
|
||||
}
|
||||
```
|
||||
}
|
||||
|
||||
|
||||
### Value
|
||||
|
||||
@ -41,24 +152,20 @@ function main ()
|
||||
|
||||
In AWK, the caller can pass an uninitialized variable as a function parameter and get a changed value if the callled function sets it to an array.
|
||||
|
||||
```
|
||||
function q(a) {a[1]=20; a[2]=30;}
|
||||
BEGIN { q(x); for (i in x) print i, x[i]; }
|
||||
```
|
||||
|
||||
function q(a) {a[1]=20; a[2]=30;}
|
||||
BEGIN { q(x); for (i in x) print i, x[i]; }
|
||||
|
||||
In Hawk, you can prefix the pramater name with & to indicate call-by-reference for the same effect.
|
||||
|
||||
```
|
||||
function q(&a) {a[1]=20; a[2]=30;}
|
||||
BEGIN { q(x); for (i in x) print i, x[i]; }
|
||||
```
|
||||
function q(&a) {a[1]=20; a[2]=30;}
|
||||
BEGIN { q(x); for (i in x) print i, x[i]; }
|
||||
|
||||
|
||||
Alternatively, you may form an array before passing it to a function.
|
||||
|
||||
```
|
||||
function q(a) {a[1]=20; a[2]=30;}
|
||||
BEGIN { x[3]=99; q(x); for (i in x) print i, x[i]; }'
|
||||
```
|
||||
function q(a) {a[1]=20; a[2]=30;}
|
||||
BEGIN { x[3]=99; q(x); for (i in x) print i, x[i]; }'
|
||||
|
||||
|
||||
## Basic Modules <a name="basic-modules"></a>
|
||||
@ -71,13 +178,22 @@ BEGIN { x[3]=99; q(x); for (i in x) print i, x[i]; }'
|
||||
|
||||
## Embedding Guide <a name="embedding-guide"></a>
|
||||
|
||||
To use hawk in your program, do the followings:
|
||||
|
||||
```
|
||||
#include <hawk-std.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
- create a hawk instance
|
||||
- parse a source script
|
||||
- create a runtime context
|
||||
- trigger execution on the runtime context
|
||||
- destroy the runtime context
|
||||
- destroy the hawk instance
|
||||
|
||||
static const hawk_bch_t* src =
|
||||
The following sample illustrates the basic steps hightlighed above.
|
||||
|
||||
#include <hawk-std.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
static const hawk_bch_t* src =
|
||||
"BEGIN {"
|
||||
" for (i=2;i<=9;i++)"
|
||||
" {"
|
||||
@ -87,36 +203,36 @@ static const hawk_bch_t* src =
|
||||
" }"
|
||||
"}";
|
||||
|
||||
int main ()
|
||||
{
|
||||
int main ()
|
||||
{
|
||||
hawk_t* hawk = HAWK_NULL;
|
||||
hawk_rtx_t* rtx = HAWK_NULL;
|
||||
hawk_val_t* retv;
|
||||
|
||||
hawk_parsestd_t psin[2];
|
||||
|
||||
int ret;
|
||||
|
||||
hawk = hawk_openstd(0, HAWK_NULL);
|
||||
hawk = hawk_openstd(0, HAWK_NULL); /* create a hawk instance */
|
||||
if (!hawk)
|
||||
{
|
||||
fprintf (stderr, "ERROR: cannot open hawk\n");
|
||||
ret = -1; goto oops;
|
||||
}
|
||||
|
||||
/* set up source script file to read in */
|
||||
memset (&psin, 0, HAWK_SIZEOF(psin));
|
||||
psin[0].type = HAWK_PARSESTD_BCS;
|
||||
psin[0].type = HAWK_PARSESTD_BCS; /* specify the first script path */
|
||||
psin[0].u.bcs.ptr = (hawk_bch_t*)src;
|
||||
psin[0].u.bcs.len = hawk_count_bcstr(src);
|
||||
psin[1].type = HAWK_PARSESTD_NULL;
|
||||
psin[1].type = HAWK_PARSESTD_NULL; /* indicate the no more script to read */
|
||||
|
||||
ret = hawk_parsestd(hawk, psin, HAWK_NULL);
|
||||
ret = hawk_parsestd(hawk, psin, HAWK_NULL); /* parse the script */
|
||||
if (ret <= -1)
|
||||
{
|
||||
hawk_logbfmt (hawk, HAWK_LOG_STDERR, "ERROR(parse): %js\n", hawk_geterrmsg(hawk));
|
||||
ret = -1; goto oops;
|
||||
}
|
||||
|
||||
/* create a runtime context needed for execution */
|
||||
rtx = hawk_rtx_openstd (
|
||||
hawk,
|
||||
0,
|
||||
@ -131,6 +247,7 @@ int main ()
|
||||
ret = -1; goto oops;
|
||||
}
|
||||
|
||||
/* execute the BEGIN/pattern-action/END blocks */
|
||||
retv = hawk_rtx_loop(rtx);
|
||||
if (!retv)
|
||||
{
|
||||
@ -138,12 +255,15 @@ int main ()
|
||||
ret = -1; goto oops;
|
||||
}
|
||||
|
||||
/* lowered the reference count of the returned value */
|
||||
hawk_rtx_refdownval (rtx, retv);
|
||||
ret = 0;
|
||||
|
||||
oops:
|
||||
if (rtx) hawk_rtx_close (rtx);
|
||||
if (hawk) hawk_close (hawk);
|
||||
oops:
|
||||
if (rtx) hawk_rtx_close (rtx); /* destroy the runtime context */
|
||||
if (hawk) hawk_close (hawk); /* destroy the hawk instance */
|
||||
return -1;
|
||||
}
|
||||
```
|
||||
}
|
||||
|
||||
|
||||
If you prefer C++, you may use the Hawk/HawkStd wrapper classes to simplify the task. The C++ classes are inferior to the C equivalents in that they don't allow creation of multiple runtime contexts over a single hawk instance.
|
||||
|
@ -17,31 +17,31 @@ int main ()
|
||||
hawk_t* hawk = HAWK_NULL;
|
||||
hawk_rtx_t* rtx = HAWK_NULL;
|
||||
hawk_val_t* retv;
|
||||
|
||||
hawk_parsestd_t psin[2];
|
||||
|
||||
int ret;
|
||||
|
||||
hawk = hawk_openstd(0, HAWK_NULL);
|
||||
hawk = hawk_openstd(0, HAWK_NULL); /* create a hawk instance */
|
||||
if (!hawk)
|
||||
{
|
||||
fprintf (stderr, "ERROR: cannot open hawk\n");
|
||||
ret = -1; goto oops;
|
||||
}
|
||||
|
||||
/* set up source script file to read in */
|
||||
memset (&psin, 0, HAWK_SIZEOF(psin));
|
||||
psin[0].type = HAWK_PARSESTD_BCS;
|
||||
psin[0].type = HAWK_PARSESTD_BCS; /* specify the first script path */
|
||||
psin[0].u.bcs.ptr = (hawk_bch_t*)src;
|
||||
psin[0].u.bcs.len = hawk_count_bcstr(src);
|
||||
psin[1].type = HAWK_PARSESTD_NULL;
|
||||
psin[1].type = HAWK_PARSESTD_NULL; /* indicate the no more script to read */
|
||||
|
||||
ret = hawk_parsestd(hawk, psin, HAWK_NULL);
|
||||
ret = hawk_parsestd(hawk, psin, HAWK_NULL); /* parse the script */
|
||||
if (ret <= -1)
|
||||
{
|
||||
hawk_logbfmt (hawk, HAWK_LOG_STDERR, "ERROR(parse): %js\n", hawk_geterrmsg(hawk));
|
||||
ret = -1; goto oops;
|
||||
}
|
||||
|
||||
/* create a runtime context needed for execution */
|
||||
rtx = hawk_rtx_openstd (
|
||||
hawk,
|
||||
0,
|
||||
@ -56,6 +56,7 @@ int main ()
|
||||
ret = -1; goto oops;
|
||||
}
|
||||
|
||||
/* execute the BEGIN/pattern-action/END blocks */
|
||||
retv = hawk_rtx_loop(rtx);
|
||||
if (!retv)
|
||||
{
|
||||
@ -63,12 +64,12 @@ int main ()
|
||||
ret = -1; goto oops;
|
||||
}
|
||||
|
||||
/* lowered the reference count of the returned value */
|
||||
hawk_rtx_refdownval (rtx, retv);
|
||||
ret = 0;
|
||||
|
||||
oops:
|
||||
if (rtx) hawk_rtx_close (rtx);
|
||||
if (hawk) hawk_close (hawk);
|
||||
if (rtx) hawk_rtx_close (rtx); /* destroy the runtime context */
|
||||
if (hawk) hawk_close (hawk); /* destroy the hawk instance */
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user