enhanced the tokenizer to recognize the octal notation as much as possible in a regular expression
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
@ -15,7 +15,7 @@ The library is stable, portable, and designed for projects that need a scripting
|
||||
- [Pragmas](#pragmas)
|
||||
- [@pragma entry](#pragma-entry)
|
||||
- [@pragma implicit](#pragma-implicit)
|
||||
- [@pragma sriprecspc](#pragma-sriprecspc)
|
||||
- [@pragma striprecspc](#pragma-striprecspc)
|
||||
- [@include and @include\_once](#include-and-include_once)
|
||||
- [Comments](#comments)
|
||||
- [Reserved Words](#reserved-words)
|
||||
@ -364,7 +364,7 @@ This feature can be beneficial for catching potential variable misspellings or u
|
||||
|
||||
If you don't want to enforce variable declarations, you can simply omit the `@pragma implicit off` directive or specify `@pragma implicit on`, and Hawk will behave like traditional awk, allowing implicit variable declarations.
|
||||
|
||||
### @pragma sriprecspc
|
||||
### @pragma striprecspc
|
||||
|
||||
The `@pragma striprecspc` directive in Hawk controls how the interpreter handles leading and trailing blank fields in input records when using a regular expression as the field separator (FS).
|
||||
|
||||
|
20
lib/parse.c
20
lib/parse.c
@ -6558,8 +6558,18 @@ static int get_string (
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (digit_count == 1 && end_char == HAWK_T('/'))
|
||||
{
|
||||
/* inside a regular expression, it's likely a backreference */
|
||||
hawk_ooch_t oc = c_acc + HAWK_T('0');
|
||||
ADD_TOKEN_CHAR(hawk, tok, esc_char);
|
||||
ADD_TOKEN_CHAR(hawk, tok, oc);
|
||||
}
|
||||
else
|
||||
{
|
||||
ADD_TOKEN_UINT32(hawk, tok, c_acc);
|
||||
}
|
||||
escaped = 0;
|
||||
}
|
||||
}
|
||||
@ -6600,7 +6610,6 @@ static int get_string (
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
if (digit_count == 0)
|
||||
{
|
||||
hawk_ooch_t ec;
|
||||
@ -6619,7 +6628,7 @@ static int get_string (
|
||||
else ADD_TOKEN_UINT32(hawk, tok, c_acc);
|
||||
|
||||
escaped = 0;
|
||||
/* carray on to handle the current character */
|
||||
/* carry on to handle the current character */
|
||||
}
|
||||
}
|
||||
else if (escaped == 99)
|
||||
@ -6671,10 +6680,11 @@ static int get_string (
|
||||
else if (c == HAWK_T('b')) c = HAWK_T('\b');
|
||||
else if (c == HAWK_T('v')) c = HAWK_T('\v');
|
||||
else if (c == HAWK_T('a')) c = HAWK_T('\a');
|
||||
else if (c >= HAWK_T('0') && c <= HAWK_T('7') && end_char != HAWK_T('/'))
|
||||
else if (c >= HAWK_T('0') && c <= HAWK_T('7'))
|
||||
{
|
||||
/* i don't support the octal notation for a regular expression.
|
||||
* it conflicts with the backreference notation between \1 and \7 inclusive. */
|
||||
/* treat it as an octal notation first and
|
||||
* check if it's a backreference between \1 and \7 inclusive
|
||||
* in the `if (escaped == 3)` block. */
|
||||
escaped = 3;
|
||||
digit_count = 1;
|
||||
c_acc = c - HAWK_T('0');
|
||||
|
@ -557,10 +557,11 @@ function main()
|
||||
|
||||
|
||||
{
|
||||
## back reference in a regular expression
|
||||
tap_ensure (("a2b" ~ /(a)\12b/), 0, @SCRIPTNAME, @SCRIPTLINE);
|
||||
tap_ensure (("aa2b" ~ /(a)\12b/), 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
tap_ensure (("aaa2b" ~ /(a)\12b/), 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
## back reference in a regular expression - use the character class
|
||||
## notation to avoid escaping - \1[2]
|
||||
tap_ensure (("a2b" ~ /(a)\1[2]b/), 0, @SCRIPTNAME, @SCRIPTLINE);
|
||||
tap_ensure (("aa2b" ~ /(a)\1[2]b/), 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
tap_ensure (("aaa2b" ~ /(a)\1[2]b/), 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user