updated README.md
This commit is contained in:
parent
c45114458e
commit
c8e805b159
200
hawk/README.md
200
hawk/README.md
@ -1,26 +1,137 @@
|
|||||||
# Hawk
|
# Hawk
|
||||||
|
|
||||||
- [Language](#language)
|
- [Language](#language)
|
||||||
- [Basic Modules](#basic-modules)
|
- [Basic Modules](#basic-modules)
|
||||||
- [Embedding Guide](#embedding-guide)
|
- [Embedding Guide](#embedding-guide)
|
||||||
|
|
||||||
## Language <a name="language"></a>
|
## Language <a name="language"></a>
|
||||||
|
|
||||||
Hawk implements most of the AWK programming language elements with extensions.
|
Hawk implements most of the AWK programming language elements with extensions.
|
||||||
|
|
||||||
|
### Program Structure
|
||||||
|
|
||||||
|
A Hawk program is composed of the following elements at the top level.
|
||||||
|
|
||||||
|
- pattern-action block pair
|
||||||
|
- BEGIN action block pair
|
||||||
|
- END action block pair
|
||||||
|
- action block without a pattern
|
||||||
|
- pattern without an action block
|
||||||
|
- user-defined function
|
||||||
|
- @global variable declaration
|
||||||
|
- @include directive
|
||||||
|
- @pragma directive
|
||||||
|
|
||||||
|
However, none of the above is mandatory. Hawk accepts an empty program.
|
||||||
|
|
||||||
|
### Pattern-Action Block Pair
|
||||||
|
|
||||||
|
A pattern-action pair is composed of a pattern and an action block as shown below:
|
||||||
|
|
||||||
|
pattern {
|
||||||
|
statement
|
||||||
|
statement
|
||||||
|
...
|
||||||
|
}
|
||||||
|
|
||||||
|
A pattern can be one of the followings when specified:
|
||||||
|
|
||||||
|
- expression
|
||||||
|
- first-expression, last-expression
|
||||||
|
- *BEGIN*
|
||||||
|
- *END*
|
||||||
|
|
||||||
|
An action block is a series of statements enclosed in a curly bracket pair. The *BEGIN* and *END* patterns require an action block while normal patterns don't. When no action block is specified for a normal pattern, it is treated
|
||||||
|
as if `{ print $0; }` is specified.
|
||||||
|
|
||||||
|
Hawk executes the action block for the *BEGIN* pattern when it starts executing a program; No start-up action is taken if no *BEGIN* pattern-action pair is specified. If a normal pattern-action pair and/or the *END*
|
||||||
|
pattern-action is specified, it reads the standard input stream. For each input line it reads, it checks if a normal pattern expression evaluates to true. For each pattern that evaluates to true, it executes the action block specified for
|
||||||
|
the pattern. When it reaches the end of the input stream, it executes the action block for the *END* pattern.
|
||||||
|
|
||||||
|
Hawk allows zero or more *BEGIN* patterns. When multiple *BEGIN* patterns are specified, it executes their action blocks in their appearance order in the program. The same applies to the *END* patterns and their action blocks. It
|
||||||
|
doesn't read the standard input stream for programs composed of BEGIN blocks only whereas it reads the stream as long as there is an action block for the END pattern or a normal pattern. It evaluates an empty pattern to true;
|
||||||
|
As a result, the action block for an empty pattern is executed for all input lines read.
|
||||||
|
|
||||||
|
You can compose a pattern range by putting 2 patterns separated by a comma. The pattern range evaluates to true once the first expression evaluates to true until the last expression evaluates to true.
|
||||||
|
|
||||||
|
The following code snippet is a valid Hawk program that prints the string *hello, world* to the console and exits.
|
||||||
|
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
print "hello, world";
|
||||||
|
}
|
||||||
|
|
||||||
|
This program prints "hello, world" followed by "hello, all" to the console.
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
print "hello, world";
|
||||||
|
}
|
||||||
|
BEGIN {
|
||||||
|
print "hello, all";
|
||||||
|
}
|
||||||
|
|
||||||
|
For the following text input,
|
||||||
|
|
||||||
|
abcdefgahijklmn
|
||||||
|
1234567890
|
||||||
|
opqrstuvwxyzabc
|
||||||
|
9876543210
|
||||||
|
|
||||||
|
this program
|
||||||
|
|
||||||
|
BEGIN { mr=0; my_nr=0; }
|
||||||
|
/abc/ { print "[" $0 "]"; mr++; }
|
||||||
|
{ my_nr++; }
|
||||||
|
END {
|
||||||
|
print "total records: " NR;
|
||||||
|
print "total records selfcounted: " my_nr;
|
||||||
|
print "matching records: " mr;
|
||||||
|
}
|
||||||
|
|
||||||
|
produces the output text like this:
|
||||||
|
|
||||||
|
[abcdefgahijklmn]
|
||||||
|
[opqrstuvwxyzabc]
|
||||||
|
total records: 4
|
||||||
|
total records selfcounted: 4
|
||||||
|
matching records: 2
|
||||||
|
|
||||||
|
See the table for the order of execution indicated by the number and the result
|
||||||
|
of pattern evaluation enclosed in parenthesis. The action block is executed if
|
||||||
|
the evaluation result is true.
|
||||||
|
|
||||||
|
| | START-UP | abcdefgahijklmn | 1234567890 | opqrstuvwxyzabc | 9876543210 | SHUTDOWN |
|
||||||
|
|-------------------------------------|----------|-----------------|------------|-----------------|------------|----------|
|
||||||
|
| `BEGIN { mr = 0; my_nr=0; }` | 1(true) | | | | | |
|
||||||
|
| `/abc/ { print "[" $0 "]"; mr++; }` | | 2(true) | 4(false) | 6(true) | 8(false) | |
|
||||||
|
| `{ my_nr++; }` | | 3(true) | 5(true) | 7(true) | 9(true) | |
|
||||||
|
| `END { print ... }` | | | | | | 10(true) |
|
||||||
|
|
||||||
|
For the same input, this program shows how to use a ranged pattern.
|
||||||
|
|
||||||
|
/abc/,/stu/ { print "[" $0 "]"; }
|
||||||
|
|
||||||
|
It produces the output text like this:
|
||||||
|
|
||||||
|
[abcdefgahijklmn]
|
||||||
|
[1234567890]
|
||||||
|
[opqrstuvwxyzabc]
|
||||||
|
|
||||||
|
The regular expression `/abc/` matches the first input line and `/stu/` matches the
|
||||||
|
third input line. So the range is true between the first input line and the
|
||||||
|
third input line inclusive.
|
||||||
|
|
||||||
### Entry Point
|
### Entry Point
|
||||||
|
|
||||||
You may change the entry point of your script by setting a function name with @pragma entry.
|
The typical execution begins with the BEGIN block, goes through pattern-action blocks, and eaches the END block. If you like to use a function as an entry point, you may set a function name with @pragma entry.
|
||||||
|
|
||||||
```
|
@pragma entry main
|
||||||
@pragma entry main
|
|
||||||
|
|
||||||
function main ()
|
function main ()
|
||||||
{
|
{
|
||||||
print "hello, world";
|
print "hello, world";
|
||||||
}
|
}
|
||||||
```
|
|
||||||
|
|
||||||
### Value
|
### Value
|
||||||
|
|
||||||
@ -41,24 +152,20 @@ function main ()
|
|||||||
|
|
||||||
In AWK, the caller can pass an uninitialized variable as a function parameter and get a changed value if the callled function sets it to an array.
|
In AWK, the caller can pass an uninitialized variable as a function parameter and get a changed value if the callled function sets it to an array.
|
||||||
|
|
||||||
```
|
|
||||||
function q(a) {a[1]=20; a[2]=30;}
|
function q(a) {a[1]=20; a[2]=30;}
|
||||||
BEGIN { q(x); for (i in x) print i, x[i]; }
|
BEGIN { q(x); for (i in x) print i, x[i]; }
|
||||||
```
|
|
||||||
|
|
||||||
In Hawk, you can prefix the pramater name with & to indicate call-by-reference for the same effect.
|
In Hawk, you can prefix the pramater name with & to indicate call-by-reference for the same effect.
|
||||||
|
|
||||||
```
|
function q(&a) {a[1]=20; a[2]=30;}
|
||||||
function q(&a) {a[1]=20; a[2]=30;}
|
BEGIN { q(x); for (i in x) print i, x[i]; }
|
||||||
BEGIN { q(x); for (i in x) print i, x[i]; }
|
|
||||||
```
|
|
||||||
|
|
||||||
Alternatively, you may form an array before passing it to a function.
|
Alternatively, you may form an array before passing it to a function.
|
||||||
|
|
||||||
```
|
function q(a) {a[1]=20; a[2]=30;}
|
||||||
function q(a) {a[1]=20; a[2]=30;}
|
BEGIN { x[3]=99; q(x); for (i in x) print i, x[i]; }'
|
||||||
BEGIN { x[3]=99; q(x); for (i in x) print i, x[i]; }'
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
## Basic Modules <a name="basic-modules"></a>
|
## Basic Modules <a name="basic-modules"></a>
|
||||||
@ -71,13 +178,22 @@ BEGIN { x[3]=99; q(x); for (i in x) print i, x[i]; }'
|
|||||||
|
|
||||||
## Embedding Guide <a name="embedding-guide"></a>
|
## Embedding Guide <a name="embedding-guide"></a>
|
||||||
|
|
||||||
|
To use hawk in your program, do the followings:
|
||||||
|
|
||||||
```
|
- create a hawk instance
|
||||||
#include <hawk-std.h>
|
- parse a source script
|
||||||
#include <stdio.h>
|
- create a runtime context
|
||||||
#include <string.h>
|
- trigger execution on the runtime context
|
||||||
|
- destroy the runtime context
|
||||||
|
- destroy the hawk instance
|
||||||
|
|
||||||
static const hawk_bch_t* src =
|
The following sample illustrates the basic steps hightlighed above.
|
||||||
|
|
||||||
|
#include <hawk-std.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
static const hawk_bch_t* src =
|
||||||
"BEGIN {"
|
"BEGIN {"
|
||||||
" for (i=2;i<=9;i++)"
|
" for (i=2;i<=9;i++)"
|
||||||
" {"
|
" {"
|
||||||
@ -87,36 +203,36 @@ static const hawk_bch_t* src =
|
|||||||
" }"
|
" }"
|
||||||
"}";
|
"}";
|
||||||
|
|
||||||
int main ()
|
int main ()
|
||||||
{
|
{
|
||||||
hawk_t* hawk = HAWK_NULL;
|
hawk_t* hawk = HAWK_NULL;
|
||||||
hawk_rtx_t* rtx = HAWK_NULL;
|
hawk_rtx_t* rtx = HAWK_NULL;
|
||||||
hawk_val_t* retv;
|
hawk_val_t* retv;
|
||||||
|
|
||||||
hawk_parsestd_t psin[2];
|
hawk_parsestd_t psin[2];
|
||||||
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
hawk = hawk_openstd(0, HAWK_NULL);
|
hawk = hawk_openstd(0, HAWK_NULL); /* create a hawk instance */
|
||||||
if (!hawk)
|
if (!hawk)
|
||||||
{
|
{
|
||||||
fprintf (stderr, "ERROR: cannot open hawk\n");
|
fprintf (stderr, "ERROR: cannot open hawk\n");
|
||||||
ret = -1; goto oops;
|
ret = -1; goto oops;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* set up source script file to read in */
|
||||||
memset (&psin, 0, HAWK_SIZEOF(psin));
|
memset (&psin, 0, HAWK_SIZEOF(psin));
|
||||||
psin[0].type = HAWK_PARSESTD_BCS;
|
psin[0].type = HAWK_PARSESTD_BCS; /* specify the first script path */
|
||||||
psin[0].u.bcs.ptr = (hawk_bch_t*)src;
|
psin[0].u.bcs.ptr = (hawk_bch_t*)src;
|
||||||
psin[0].u.bcs.len = hawk_count_bcstr(src);
|
psin[0].u.bcs.len = hawk_count_bcstr(src);
|
||||||
psin[1].type = HAWK_PARSESTD_NULL;
|
psin[1].type = HAWK_PARSESTD_NULL; /* indicate the no more script to read */
|
||||||
|
|
||||||
ret = hawk_parsestd(hawk, psin, HAWK_NULL);
|
ret = hawk_parsestd(hawk, psin, HAWK_NULL); /* parse the script */
|
||||||
if (ret <= -1)
|
if (ret <= -1)
|
||||||
{
|
{
|
||||||
hawk_logbfmt (hawk, HAWK_LOG_STDERR, "ERROR(parse): %js\n", hawk_geterrmsg(hawk));
|
hawk_logbfmt (hawk, HAWK_LOG_STDERR, "ERROR(parse): %js\n", hawk_geterrmsg(hawk));
|
||||||
ret = -1; goto oops;
|
ret = -1; goto oops;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* create a runtime context needed for execution */
|
||||||
rtx = hawk_rtx_openstd (
|
rtx = hawk_rtx_openstd (
|
||||||
hawk,
|
hawk,
|
||||||
0,
|
0,
|
||||||
@ -131,6 +247,7 @@ int main ()
|
|||||||
ret = -1; goto oops;
|
ret = -1; goto oops;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* execute the BEGIN/pattern-action/END blocks */
|
||||||
retv = hawk_rtx_loop(rtx);
|
retv = hawk_rtx_loop(rtx);
|
||||||
if (!retv)
|
if (!retv)
|
||||||
{
|
{
|
||||||
@ -138,12 +255,15 @@ int main ()
|
|||||||
ret = -1; goto oops;
|
ret = -1; goto oops;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* lowered the reference count of the returned value */
|
||||||
hawk_rtx_refdownval (rtx, retv);
|
hawk_rtx_refdownval (rtx, retv);
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
|
||||||
oops:
|
oops:
|
||||||
if (rtx) hawk_rtx_close (rtx);
|
if (rtx) hawk_rtx_close (rtx); /* destroy the runtime context */
|
||||||
if (hawk) hawk_close (hawk);
|
if (hawk) hawk_close (hawk); /* destroy the hawk instance */
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
```
|
|
||||||
|
|
||||||
|
If you prefer C++, you may use the Hawk/HawkStd wrapper classes to simplify the task. The C++ classes are inferior to the C equivalents in that they don't allow creation of multiple runtime contexts over a single hawk instance.
|
||||||
|
@ -17,31 +17,31 @@ int main ()
|
|||||||
hawk_t* hawk = HAWK_NULL;
|
hawk_t* hawk = HAWK_NULL;
|
||||||
hawk_rtx_t* rtx = HAWK_NULL;
|
hawk_rtx_t* rtx = HAWK_NULL;
|
||||||
hawk_val_t* retv;
|
hawk_val_t* retv;
|
||||||
|
|
||||||
hawk_parsestd_t psin[2];
|
hawk_parsestd_t psin[2];
|
||||||
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
hawk = hawk_openstd(0, HAWK_NULL);
|
hawk = hawk_openstd(0, HAWK_NULL); /* create a hawk instance */
|
||||||
if (!hawk)
|
if (!hawk)
|
||||||
{
|
{
|
||||||
fprintf (stderr, "ERROR: cannot open hawk\n");
|
fprintf (stderr, "ERROR: cannot open hawk\n");
|
||||||
ret = -1; goto oops;
|
ret = -1; goto oops;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* set up source script file to read in */
|
||||||
memset (&psin, 0, HAWK_SIZEOF(psin));
|
memset (&psin, 0, HAWK_SIZEOF(psin));
|
||||||
psin[0].type = HAWK_PARSESTD_BCS;
|
psin[0].type = HAWK_PARSESTD_BCS; /* specify the first script path */
|
||||||
psin[0].u.bcs.ptr = (hawk_bch_t*)src;
|
psin[0].u.bcs.ptr = (hawk_bch_t*)src;
|
||||||
psin[0].u.bcs.len = hawk_count_bcstr(src);
|
psin[0].u.bcs.len = hawk_count_bcstr(src);
|
||||||
psin[1].type = HAWK_PARSESTD_NULL;
|
psin[1].type = HAWK_PARSESTD_NULL; /* indicate the no more script to read */
|
||||||
|
|
||||||
ret = hawk_parsestd(hawk, psin, HAWK_NULL);
|
ret = hawk_parsestd(hawk, psin, HAWK_NULL); /* parse the script */
|
||||||
if (ret <= -1)
|
if (ret <= -1)
|
||||||
{
|
{
|
||||||
hawk_logbfmt (hawk, HAWK_LOG_STDERR, "ERROR(parse): %js\n", hawk_geterrmsg(hawk));
|
hawk_logbfmt (hawk, HAWK_LOG_STDERR, "ERROR(parse): %js\n", hawk_geterrmsg(hawk));
|
||||||
ret = -1; goto oops;
|
ret = -1; goto oops;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* create a runtime context needed for execution */
|
||||||
rtx = hawk_rtx_openstd (
|
rtx = hawk_rtx_openstd (
|
||||||
hawk,
|
hawk,
|
||||||
0,
|
0,
|
||||||
@ -56,6 +56,7 @@ int main ()
|
|||||||
ret = -1; goto oops;
|
ret = -1; goto oops;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* execute the BEGIN/pattern-action/END blocks */
|
||||||
retv = hawk_rtx_loop(rtx);
|
retv = hawk_rtx_loop(rtx);
|
||||||
if (!retv)
|
if (!retv)
|
||||||
{
|
{
|
||||||
@ -63,12 +64,12 @@ int main ()
|
|||||||
ret = -1; goto oops;
|
ret = -1; goto oops;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* lowered the reference count of the returned value */
|
||||||
hawk_rtx_refdownval (rtx, retv);
|
hawk_rtx_refdownval (rtx, retv);
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
|
||||||
oops:
|
oops:
|
||||||
if (rtx) hawk_rtx_close (rtx);
|
if (rtx) hawk_rtx_close (rtx); /* destroy the runtime context */
|
||||||
if (hawk) hawk_close (hawk);
|
if (hawk) hawk_close (hawk); /* destroy the hawk instance */
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user