enhanced a special form FS to affect record reading
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
21
README.md
21
README.md
@ -909,7 +909,7 @@ In this example:
|
||||
| CONVFMT | |
|
||||
| FILENAME | |
|
||||
| FNR | File Number of Records, It reset to 1 for each new input file |
|
||||
| FS | Field Separator, specifies the character(s) that separate fields (columns) in an input record. The default is whitespace. If `FS` is a string that begins with a question mark(`?`) and 3 characters, the 3 characters define special quoting characters in this order: escaper, left quote and right quote. |
|
||||
| FS | Field Separator, specifies the character(s) that separate fields (columns) in an input record. The default is whitespace. |
|
||||
| IGNORECASE | |
|
||||
| NF | Number of Fields (columns) in the current input record |
|
||||
| NR | Number of Records processed so far |
|
||||
@ -926,6 +926,25 @@ In this example:
|
||||
| STRIPSTRSPC | |
|
||||
| SUBSPEP | |
|
||||
|
||||
If `FS` is a string beginning with a question mark(`?`) followed by four characters, those characters define special quoting behavior in this order:
|
||||
- Separator
|
||||
- Escaper
|
||||
- Left quote
|
||||
- Right quote
|
||||
|
||||
When the escaper, left quote, and right quote are all the same (for example, `?,"""`), you must repeat that character twice to represent it literally.
|
||||
|
||||
In this specific case - when `FS` is in quoting form and the escaper, left quote, and right quote are identical - if `RS` is unset or set to `@nil`, then records may span multiple lines. This allows fields enclosed in quotes to contain embedded newlines.
|
||||
|
||||
```sh
|
||||
$ echo -e 'the tiger, "pounced on\n""me"""' | hawk -v FS='?,"""' '{ for (i = 0; i <= NF; i++) print i, "[" $i "]"; }'
|
||||
0 [the tiger, "pounced on
|
||||
""me"""]
|
||||
1 [the tiger]
|
||||
2 [pounced on
|
||||
"me"]
|
||||
```
|
||||
|
||||
## Pipes
|
||||
|
||||
```awk
|
||||
|
@ -43,7 +43,7 @@ char_t* split_xchars_to_fields (hawk_rtx_t* rtx, char_t* str, hawk_oow_t len, ch
|
||||
|
||||
while (p < end)
|
||||
{
|
||||
char c = *p;
|
||||
char_t c = *p;
|
||||
|
||||
if (escaped)
|
||||
{
|
||||
|
39
lib/rio.c
39
lib/rio.c
@ -370,10 +370,9 @@ int hawk_rtx_readio (hawk_rtx_t* rtx, hawk_in_type_t in_type, const hawk_ooch_t*
|
||||
hawk_rio_arg_t* p;
|
||||
hawk_rio_impl_t handler;
|
||||
int ret;
|
||||
#if 0
|
||||
int esc_lq_rq;
|
||||
int quoted;
|
||||
hawk_ooch_t esc, lq, rq;
|
||||
#endif
|
||||
|
||||
hawk_val_t* rs;
|
||||
hawk_oocs_t rrs;
|
||||
@ -418,18 +417,18 @@ int hawk_rtx_readio (hawk_rtx_t* rtx, hawk_in_type_t in_type, const hawk_ooch_t*
|
||||
return -1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* RS set to @nil, FS set to a special string starting with ?, followed by esc lq rq */
|
||||
esc_lq_rq = 0;
|
||||
quoted = 0;
|
||||
if (ffs.len == 5 && ffs.ptr[0] == '?' && !rrs.ptr)
|
||||
{
|
||||
esc_lq_rq = 1;
|
||||
esc_lq_rq += (esc == lq && esc == rq);
|
||||
esc = ffs.ptr[2];
|
||||
lq = ffs.ptr[3];
|
||||
rq = ffs.ptr[4];
|
||||
|
||||
esc_lq_rq = 1;
|
||||
esc_lq_rq += (esc == lq && esc == rq);
|
||||
}
|
||||
#endif
|
||||
|
||||
ret = 1;
|
||||
|
||||
@ -511,13 +510,6 @@ int hawk_rtx_readio (hawk_rtx_t* rtx, hawk_in_type_t in_type, const hawk_ooch_t*
|
||||
p->in.pos = 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (esc_lq_rq == 2)
|
||||
{
|
||||
/* TODO: */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (rrs.ptr == HAWK_NULL)
|
||||
{
|
||||
hawk_oow_t start_pos = p->in.pos;
|
||||
@ -529,6 +521,27 @@ int hawk_rtx_readio (hawk_rtx_t* rtx, hawk_in_type_t in_type, const hawk_ooch_t*
|
||||
c = p->in.u.buf[p->in.pos++];
|
||||
end_pos = p->in.pos;
|
||||
|
||||
if (esc_lq_rq == 2)
|
||||
{
|
||||
/* if FS is something like [?,"""] and RS is @nil,
|
||||
* it supports multi-line quoted vlaues. */
|
||||
if (quoted == 2)
|
||||
{
|
||||
quoted = (c == rq);
|
||||
/* no continue here as c could be a new line */
|
||||
}
|
||||
else if (quoted == 1)
|
||||
{
|
||||
if (c == rq) quoted = 2;
|
||||
continue;
|
||||
}
|
||||
else if (c == lq)
|
||||
{
|
||||
quoted = 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: handle different line terminator */
|
||||
/* separate by a new line */
|
||||
if (c == HAWK_T('\n'))
|
||||
|
Reference in New Issue
Block a user