diff --git a/lib/misc-imp.h b/lib/misc-imp.h index 41998813..8fee8044 100644 --- a/lib/misc-imp.h +++ b/lib/misc-imp.h @@ -30,6 +30,10 @@ char_t* split_xchars_to_fields (hawk_rtx_t* rtx, char_t* str, hawk_oow_t len, ch char_t* ts; /* token start */ char_t* tp; /* points to one char past the last token char */ char_t* xp; /* points to one char past the last effective char */ + int escape_doubling; + + /* to extract "abc""def" as abc"def */ + escape_doubling = (ec == lq && ec == rq); /* skip leading spaces */ while (p < end && is_xch_space(*p)) p++; @@ -48,8 +52,9 @@ char_t* split_xchars_to_fields (hawk_rtx_t* rtx, char_t* str, hawk_oow_t len, ch } else { - if (c == ec) + if (c == ec && !escape_doubling) { + /* normal escaping is never activated if escaping with two repeated characters is on */ escaped = 1; p++; } @@ -57,11 +62,17 @@ char_t* split_xchars_to_fields (hawk_rtx_t* rtx, char_t* str, hawk_oow_t len, ch { if (c == rq) { + if (escape_doubling && (p + 1) < end && *(p + 1) == rq) + { + p++; + goto not_rq; + } quoted = 0; p++; } else { + not_rq: *tp++ = c; xp = tp; p++; } } diff --git a/t/h-002.hawk b/t/h-002.hawk index 8e6bd863..dddc309b 100644 --- a/t/h-002.hawk +++ b/t/h-002.hawk @@ -494,6 +494,20 @@ function main() tap_ensure (a[3] === @b"coke", 1, @SCRIPTNAME, @SCRIPTLINE); tap_ensure (a[4] === @b"dark,age", 1, @SCRIPTNAME, @SCRIPTLINE); + ## escape doubling scheme - useful for csv-like files + ## if escaper, left-quote, right-quote are the same, escape doubling scheme is turned on + tap_ensure (split(@b"sea of people, brandy, coke, \"\"\"dark\"\", age\"", a, "?,\"\"\""), 4, @SCRIPTNAME, @SCRIPTLINE); + tap_ensure (a[1] === @b"sea of people", 1, @SCRIPTNAME, @SCRIPTLINE); + tap_ensure (a[2] === @b"brandy", 1, @SCRIPTNAME, @SCRIPTLINE); + tap_ensure (a[3] === @b"coke", 1, @SCRIPTNAME, @SCRIPTLINE); + tap_ensure (a[4] === @b"\"dark\", age", 1, @SCRIPTNAME, @SCRIPTLINE); + + tap_ensure (split(@b"sea of people, brandy, coke, |||dark||, age|", a, "?,|||"), 4, @SCRIPTNAME, @SCRIPTLINE); + tap_ensure (a[1] === @b"sea of people", 1, @SCRIPTNAME, @SCRIPTLINE); + tap_ensure (a[2] === @b"brandy", 1, @SCRIPTNAME, @SCRIPTLINE); + tap_ensure (a[3] === @b"coke", 1, @SCRIPTNAME, @SCRIPTLINE); + tap_ensure (a[4] === @b"|dark|, age", 1, @SCRIPTNAME, @SCRIPTLINE); + tap_ensure (split("Here===Is=Some=====Data", a, ""), 23, @SCRIPTNAME, @SCRIPTLINE); tap_ensure (hawk::typename(a), "map", @SCRIPTNAME, @SCRIPTLINE); tap_ensure (str::splita("Here===Is=Some=====Data", a, ""), 23, @SCRIPTNAME, @SCRIPTLINE);