changed awk to convert a map to the size of the map in a scalar context.

allowed @include inside a normal block
This commit is contained in:
hyung-hwan 2014-10-22 01:34:37 +00:00
parent 003c637c0f
commit 4de030f8a3
4 changed files with 182 additions and 95 deletions

View File

@ -14,24 +14,27 @@ QSEAWK reads an AWK program, recognizes various tokens contained while skipping
comments and whitespaces that don't constinute a token, analyses syntax, and comments and whitespaces that don't constinute a token, analyses syntax, and
tranforms them to an internal form for execution. tranforms them to an internal form for execution.
Program Structure Program Structure
---------------------- --------------------------
A QSEAWK program can be composed of the following elements at the top level. A QSEAWK program is composed of the following elements at the top level.
- pattern-action block pair - pattern-action block pair
- BEGIN action block pair
- END action block pair
- action block without a pattern - action block without a pattern
- pattern without an action block - pattern without an action block
- user-defined function - user-defined function
- \@global variable definition - \@global variable declaration
- \@include statement - \@include directive
However, none of the above is mandatory. QSEAWK accepts an empty program. However, none of the above is mandatory. QSEAWK accepts an empty program.
A typical pattern-action pair is composed of a pattern and an action block Patter-Action Block Pairs
as shown below: --------------------------------
A pattern-action pair is composed of a pattern and an action block as
shown below:
~~~~~{.awk} ~~~~~{.awk}
pattern { pattern {
@ -162,8 +165,53 @@ third input line inclusive.
> how to change the entry point programatically. > how to change the entry point programatically.
Comments
--------------
\@include
----------------
The \@include directive inserts the contents of the file specified in the
following string as if they appeared in the source stream being processed.
~~~~~{.awk}
@include "abc.awk"
BEGIN { func_in_abc (); }
~~~~~
A semicolon is optional after the included file name. The following is the
same as the sample above.
~~~~~{.awk}
@include "abc.awk";
BEGIN { func_in_abc(); }
~~~~~
The directive can be used inside a block.
~~~~~{.awk}
BEGIN {
@include "abc.awk";
print var_in_abc;
}
~~~~~
> ### Note ###
> If #QSE_AWK_NEWLINE is off, a semicolon is required after the string.
>
> See #qse_awk_sio_t for customizing file includsion handling.
Tokens
------------
When QSEAWK parses a program, it classifies a series of input characters
into meaningful tokens. It can extract the smallest meaningful unit through
this tokenization process.
### Comments ###
A comment is a part of the program text excluded during tokenization. You can
put descriptive text about the program in a comment.
A single-line comment is introduced by a hash character #, and is terminated at A single-line comment is introduced by a hash character #, and is terminated at
the end of the same line. Additionally, it supports a C-style multi-line comment the end of the same line. Additionally, it supports a C-style multi-line comment
@ -178,16 +226,6 @@ string literals and regular expressions.
*/ */
~~~~~ ~~~~~
Tokens
------------
When QSEAWK parses a program, it classifies a series of input characters
into meaningful tokens. It can extract the smallest meaningful unit through
this tokenization process.
### Reserved Words ### ### Reserved Words ###
The following words are reserved and cannot be used as a variable name, The following words are reserved and cannot be used as a variable name,
@ -374,8 +412,8 @@ converted to an integer 32 and the division is performed producing a
floating-pointer number. floating-pointer number.
You can create a hashed map by assigning a value to a variable indexed with You can create a hashed map by assigning a value to a variable indexed with
a subscript placed within square brackets. You can't convert a hashed map to a subscript placed within square brackets. A hashed map is converted to its size
a scalar type implicitly and explcitly. in the numeric context and "#MAP" in the string context.
A regular expression may or may not be viewed as a data type. You can't assign A regular expression may or may not be viewed as a data type. You can't assign
a compiled regular expression into a variable. A regular expression not placed a compiled regular expression into a variable. A regular expression not placed
@ -390,12 +428,18 @@ an empty string, and a nil value evaluate to false. All other values evaluate
to true. to true.
> ### Note ### > ### Note ###
> The automatic conversion of a hashed map to a number or a string is
> disallowed if #QSE_AWK_FLEXMAP is off. This program ends up with an
> error when it is off.
> ~~~~~{.awk}
> BEGIN { a[1]=1; a[30]=2; a[4]=4; print ("a=" a); }
> ~~~~~
>
> See #qse_awk_fnc_spec_t and #qse_awk_fnc_arg_t to define a function that > See #qse_awk_fnc_spec_t and #qse_awk_fnc_arg_t to define a function that
> can accept a regular expression. > can accept a regular expression.
User-defined Variables User-defined Variables
---------------------------- ----------------------------
@ -453,9 +497,8 @@ A local variable can shade a global variable. See the sample below:
> ### Note ### > ### Note ###
> The QSEAWK engine allows the mixture of named variables and declared variables. > The QSEAWK engine allows the mixture of named variables and declared variables.
> However, the mixture may lead to confusion. Use #QSE_AWK_IMPLICIT and > However, the mixture may lead to confusion. Use #QSE_AWK_IMPLICIT to allow
> #QSE_AWK_EXPLICIT to control what to allow and disallow when configuring > named variables when configuring the engine.
> the engine.
Built-in Variables Built-in Variables
@ -547,7 +590,7 @@ function name (arg1, arg2, ..., argn) {
The caller must invoke a function with the same or less number of arguments The caller must invoke a function with the same or less number of arguments
than the definition. When a function is called with the less number of than the definition. When a function is called with the less number of
arguments, the redundant arguments are initialized to a nil value. arguments, the redundant parameters are initialized to a nil value.
You can't define multiple functions with the same name. The function name You can't define multiple functions with the same name. The function name
can't confict with named variables and globals variables. can't confict with named variables and globals variables.
@ -555,6 +598,11 @@ can't confict with named variables and globals variables.
Built-in Functions Built-in Functions
------------------------- -------------------------
- index
- length
TBD.
Function Calls Function Calls
----------------------- -----------------------
@ -658,7 +706,7 @@ The !== operator is a negated form of the === operator.
----------------------------- -----------------------------
When #QSE_AWK_TOLERANT is on, you can use a grouped expression without When #QSE_AWK_TOLERANT is on, you can use a grouped expression without
the 'in' operator. A grouped expression is a parentheses-enclosed list the **in** operator. A grouped expression is a parentheses-enclosed list
of expressions separated with a comma. Each expression in the group is of expressions separated with a comma. Each expression in the group is
evaluated in the appearing order. The evaluation result of the last evaluated in the appearing order. The evaluation result of the last
expression in the group is returned as that of the group. expression in the group is returned as that of the group.
@ -761,29 +809,7 @@ able to use this statement.
~~~~~ ~~~~~
### \@include ###
The \@include directive inserts the contents of the object specified in the
following string, typically a file name, as if they appeared in the source
stream being processed. The directive can only be used at the outmost scope
where global variable declarations, *BEGIN*, *END*, and/or pattern-action
blocks appear.
~~~~~{.awk}
@include "abc.awk"
BEGIN { func_in_abc (); }
~~~~~
A semicolon is optional after the included file name. The following is the
same as the sample above.
~~~~~{.awk}
@include "abc.awk";
BEGIN { func_in_abc(); }
~~~~~
> ### Note ###
> If #QSE_AWK_NEWLINE is off, the semicolon is required.

View File

@ -512,6 +512,11 @@ struct qse_awk_sio_lxc_t
typedef struct qse_awk_sio_lxc_t qse_awk_sio_lxc_t; typedef struct qse_awk_sio_lxc_t qse_awk_sio_lxc_t;
typedef struct qse_awk_sio_arg_t qse_awk_sio_arg_t; typedef struct qse_awk_sio_arg_t qse_awk_sio_arg_t;
/**
* The qse_awk_sio_arg_t type defines a structure to describe the source
* stream.
*/
struct qse_awk_sio_arg_t struct qse_awk_sio_arg_t
{ {
/** /**
@ -699,18 +704,20 @@ typedef struct qse_awk_prm_t qse_awk_prm_t;
* \code * \code
* qse_ssize_t in ( * qse_ssize_t in (
* qse_awk_t* awk, qse_awk_sio_cmd_t cmd, * qse_awk_t* awk, qse_awk_sio_cmd_t cmd,
* qse_awk_sio_arg_t* arg,
* qse_char_t* buf, qse_size_t size) * qse_char_t* buf, qse_size_t size)
* { * {
* if (cmd == QSE_AWK_SIO_OPEN) open input stream; * if (cmd == QSE_AWK_SIO_OPEN) open input stream of arg->name;
* else if (cmd == QSE_AWK_SIO_CLOSE) close input stream; * else if (cmd == QSE_AWK_SIO_CLOSE) close input stream;
* else read input stream and fill buf up to size characters; * else read input stream and fill buf up to size characters;
* } * }
* *
* qse_ssize_t out ( * qse_ssize_t out (
* qse_awk_t* awk, qse_awk_sio_cmd_t cmd, * qse_awk_t* awk, qse_awk_sio_cmd_t cmd,
* qse_awk_sio_arg_t* arg,
* qse_char_t* data, qse_size_t size) * qse_char_t* data, qse_size_t size)
* { * {
* if (cmd == QSE_AWK_SIO_OPEN) open_output_stream; * if (cmd == QSE_AWK_SIO_OPEN) open_output_stream of arg->name;
* else if (cmd == QSE_AWK_SIO_CLOSE) close_output_stream; * else if (cmd == QSE_AWK_SIO_CLOSE) close_output_stream;
* else write data of size characters to output stream; * else write data of size characters to output stream;
* } * }

View File

@ -1461,8 +1461,29 @@ static qse_awk_nde_t* parse_block (
if (get_token(awk) <= -1) return QSE_NULL; if (get_token(awk) <= -1) return QSE_NULL;
} }
if (!MATCH(awk,TOK_XLOCAL)) break; if (MATCH(awk,TOK_XINCLUDE))
{
/* @include ... */
if (awk->opt.depth.s.incl > 0 &&
awk->parse.depth.incl >= awk->opt.depth.s.incl)
{
SETERR_LOC (awk, QSE_AWK_EINCLTD, &awk->ptok.loc);
return QSE_NULL;
}
if (get_token(awk) <= -1) return QSE_NULL;
if (!MATCH(awk,TOK_STR))
{
SETERR_LOC (awk, QSE_AWK_EINCLSTR, &awk->ptok.loc);
return QSE_NULL;
}
if (begin_include (awk) <= -1) return QSE_NULL;
}
else if (MATCH(awk,TOK_XLOCAL))
{
/* @local ... */
if (get_token(awk) <= -1) if (get_token(awk) <= -1)
{ {
qse_lda_delete ( qse_lda_delete (
@ -1479,6 +1500,8 @@ static qse_awk_nde_t* parse_block (
return QSE_NULL; return QSE_NULL;
} }
} }
else break;
}
/* block body */ /* block body */
head = QSE_NULL; curr = QSE_NULL; head = QSE_NULL; curr = QSE_NULL;
@ -1517,6 +1540,27 @@ static qse_awk_nde_t* parse_block (
break; break;
} }
if (MATCH(awk,TOK_XINCLUDE))
{
if (awk->opt.depth.s.incl > 0 &&
awk->parse.depth.incl >= awk->opt.depth.s.incl)
{
SETERR_LOC (awk, QSE_AWK_EINCLTD, &awk->ptok.loc);
return QSE_NULL;
}
if (get_token(awk) <= -1) return QSE_NULL;
if (!MATCH(awk,TOK_STR))
{
SETERR_LOC (awk, QSE_AWK_EINCLSTR, &awk->ptok.loc);
return QSE_NULL;
}
if (begin_include (awk) <= -1) return QSE_NULL;
}
else
{
/* parse an actual statement in a block */ /* parse an actual statement in a block */
{ {
qse_awk_loc_t sloc = awk->tok.loc; qse_awk_loc_t sloc = awk->tok.loc;
@ -1550,6 +1594,7 @@ static qse_awk_nde_t* parse_block (
else curr->next = nde; else curr->next = nde;
curr = nde; curr = nde;
} }
}
block = (qse_awk_nde_blk_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*block)); block = (qse_awk_nde_blk_t*) qse_awk_callocmem (awk, QSE_SIZEOF(*block));
if (block == QSE_NULL) if (block == QSE_NULL)

View File

@ -1605,6 +1605,15 @@ int qse_awk_rtx_valtonum (
); );
} }
case QSE_AWK_VAL_MAP:
{
if (rtx->awk->opt.trait & QSE_AWK_FLEXMAP)
{
*l = QSE_HTB_SIZE(((qse_awk_val_map_t*)v)->map);
return 0; /* long */
}
}
case QSE_AWK_VAL_REF: case QSE_AWK_VAL_REF:
{ {
return val_ref_to_num (rtx, (qse_awk_val_ref_t*)v, l, r); return val_ref_to_num (rtx, (qse_awk_val_ref_t*)v, l, r);