enhanced a special form FS to affect record reading
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
21
README.md
21
README.md
@ -909,7 +909,7 @@ In this example:
|
|||||||
| CONVFMT | |
|
| CONVFMT | |
|
||||||
| FILENAME | |
|
| FILENAME | |
|
||||||
| FNR | File Number of Records, It reset to 1 for each new input file |
|
| FNR | File Number of Records, It reset to 1 for each new input file |
|
||||||
| FS | Field Separator, specifies the character(s) that separate fields (columns) in an input record. The default is whitespace. If `FS` is a string that begins with a question mark(`?`) and 3 characters, the 3 characters define special quoting characters in this order: escaper, left quote and right quote. |
|
| FS | Field Separator, specifies the character(s) that separate fields (columns) in an input record. The default is whitespace. |
|
||||||
| IGNORECASE | |
|
| IGNORECASE | |
|
||||||
| NF | Number of Fields (columns) in the current input record |
|
| NF | Number of Fields (columns) in the current input record |
|
||||||
| NR | Number of Records processed so far |
|
| NR | Number of Records processed so far |
|
||||||
@ -926,6 +926,25 @@ In this example:
|
|||||||
| STRIPSTRSPC | |
|
| STRIPSTRSPC | |
|
||||||
| SUBSPEP | |
|
| SUBSPEP | |
|
||||||
|
|
||||||
|
If `FS` is a string beginning with a question mark(`?`) followed by four characters, those characters define special quoting behavior in this order:
|
||||||
|
- Separator
|
||||||
|
- Escaper
|
||||||
|
- Left quote
|
||||||
|
- Right quote
|
||||||
|
|
||||||
|
When the escaper, left quote, and right quote are all the same (for example, `?,"""`), you must repeat that character twice to represent it literally.
|
||||||
|
|
||||||
|
In this specific case - when `FS` is in quoting form and the escaper, left quote, and right quote are identical - if `RS` is unset or set to `@nil`, then records may span multiple lines. This allows fields enclosed in quotes to contain embedded newlines.
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ echo -e 'the tiger, "pounced on\n""me"""' | hawk -v FS='?,"""' '{ for (i = 0; i <= NF; i++) print i, "[" $i "]"; }'
|
||||||
|
0 [the tiger, "pounced on
|
||||||
|
""me"""]
|
||||||
|
1 [the tiger]
|
||||||
|
2 [pounced on
|
||||||
|
"me"]
|
||||||
|
```
|
||||||
|
|
||||||
## Pipes
|
## Pipes
|
||||||
|
|
||||||
```awk
|
```awk
|
||||||
|
@ -43,7 +43,7 @@ char_t* split_xchars_to_fields (hawk_rtx_t* rtx, char_t* str, hawk_oow_t len, ch
|
|||||||
|
|
||||||
while (p < end)
|
while (p < end)
|
||||||
{
|
{
|
||||||
char c = *p;
|
char_t c = *p;
|
||||||
|
|
||||||
if (escaped)
|
if (escaped)
|
||||||
{
|
{
|
||||||
|
39
lib/rio.c
39
lib/rio.c
@ -370,10 +370,9 @@ int hawk_rtx_readio (hawk_rtx_t* rtx, hawk_in_type_t in_type, const hawk_ooch_t*
|
|||||||
hawk_rio_arg_t* p;
|
hawk_rio_arg_t* p;
|
||||||
hawk_rio_impl_t handler;
|
hawk_rio_impl_t handler;
|
||||||
int ret;
|
int ret;
|
||||||
#if 0
|
|
||||||
int esc_lq_rq;
|
int esc_lq_rq;
|
||||||
|
int quoted;
|
||||||
hawk_ooch_t esc, lq, rq;
|
hawk_ooch_t esc, lq, rq;
|
||||||
#endif
|
|
||||||
|
|
||||||
hawk_val_t* rs;
|
hawk_val_t* rs;
|
||||||
hawk_oocs_t rrs;
|
hawk_oocs_t rrs;
|
||||||
@ -418,18 +417,18 @@ int hawk_rtx_readio (hawk_rtx_t* rtx, hawk_in_type_t in_type, const hawk_ooch_t*
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
/* RS set to @nil, FS set to a special string starting with ?, followed by esc lq rq */
|
/* RS set to @nil, FS set to a special string starting with ?, followed by esc lq rq */
|
||||||
esc_lq_rq = 0;
|
esc_lq_rq = 0;
|
||||||
|
quoted = 0;
|
||||||
if (ffs.len == 5 && ffs.ptr[0] == '?' && !rrs.ptr)
|
if (ffs.len == 5 && ffs.ptr[0] == '?' && !rrs.ptr)
|
||||||
{
|
{
|
||||||
esc_lq_rq = 1;
|
|
||||||
esc_lq_rq += (esc == lq && esc == rq);
|
|
||||||
esc = ffs.ptr[2];
|
esc = ffs.ptr[2];
|
||||||
lq = ffs.ptr[3];
|
lq = ffs.ptr[3];
|
||||||
rq = ffs.ptr[4];
|
rq = ffs.ptr[4];
|
||||||
|
|
||||||
|
esc_lq_rq = 1;
|
||||||
|
esc_lq_rq += (esc == lq && esc == rq);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
ret = 1;
|
ret = 1;
|
||||||
|
|
||||||
@ -511,13 +510,6 @@ int hawk_rtx_readio (hawk_rtx_t* rtx, hawk_in_type_t in_type, const hawk_ooch_t*
|
|||||||
p->in.pos = 0;
|
p->in.pos = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
if (esc_lq_rq == 2)
|
|
||||||
{
|
|
||||||
/* TODO: */
|
|
||||||
}
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
if (rrs.ptr == HAWK_NULL)
|
if (rrs.ptr == HAWK_NULL)
|
||||||
{
|
{
|
||||||
hawk_oow_t start_pos = p->in.pos;
|
hawk_oow_t start_pos = p->in.pos;
|
||||||
@ -529,6 +521,27 @@ int hawk_rtx_readio (hawk_rtx_t* rtx, hawk_in_type_t in_type, const hawk_ooch_t*
|
|||||||
c = p->in.u.buf[p->in.pos++];
|
c = p->in.u.buf[p->in.pos++];
|
||||||
end_pos = p->in.pos;
|
end_pos = p->in.pos;
|
||||||
|
|
||||||
|
if (esc_lq_rq == 2)
|
||||||
|
{
|
||||||
|
/* if FS is something like [?,"""] and RS is @nil,
|
||||||
|
* it supports multi-line quoted vlaues. */
|
||||||
|
if (quoted == 2)
|
||||||
|
{
|
||||||
|
quoted = (c == rq);
|
||||||
|
/* no continue here as c could be a new line */
|
||||||
|
}
|
||||||
|
else if (quoted == 1)
|
||||||
|
{
|
||||||
|
if (c == rq) quoted = 2;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (c == lq)
|
||||||
|
{
|
||||||
|
quoted = 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* TODO: handle different line terminator */
|
/* TODO: handle different line terminator */
|
||||||
/* separate by a new line */
|
/* separate by a new line */
|
||||||
if (c == HAWK_T('\n'))
|
if (c == HAWK_T('\n'))
|
||||||
|
Reference in New Issue
Block a user