hawk_unescape_bcstr()/hawk_unescape_ucstr()

This commit is contained in:
hyung-hwan 2020-03-13 16:00:21 +00:00
parent 84c0b37e94
commit f2316d1d3e
5 changed files with 344 additions and 14 deletions

View File

@ -845,18 +845,29 @@ void HawkStd::clearConsoleOutputs ()
static int check_var_assign (hawk_rtx_t* rtx, const hawk_ooch_t* str)
{
hawk_ooch_t* eq, * var;
hawk_ooch_t* eq, * dstr;
int n;
eq = hawk_find_oochar_in_oocstr(str, '=');
if (!eq || eq <= str) return 0; /* not assignment */
var = hawk_rtx_dupoochars(rtx, str, eq - str);
if (HAWK_UNLIKELY(!var)) return -1;
dstr = hawk_rtx_dupoocstr(rtx, str, HAWK_NULL);
if (HAWK_UNLIKELY(!dstr)) return -1;
n = hawk_isvalidident(hawk_rtx_gethawk(rtx), var)?
((hawk_rtx_setgbltostrbyname(rtx, var, eq + 1) <= -1)? -1: 1): 0;
hawk_rtx_freemem (rtx, var);
eq = dstr + (eq - str);
*eq = '\0';
if (hawk_isvalidident(hawk_rtx_gethawk(rtx), dstr))
{
hawk_unescape_oocstr (eq + 1);
n = (hawk_rtx_setgbltostrbyname(rtx, dstr, eq + 1) <= -1)? -1: 1;
}
else
{
n = 0;
}
hawk_rtx_freemem (rtx, dstr);
return n;
}

View File

@ -682,6 +682,16 @@ HAWK_EXPORT hawk_bch_t* hawk_tokenize_bchars (
int ignorecase
);
HAWK_EXPORT void hawk_unescape_ucstr (
hawk_uch_t* str
);
HAWK_EXPORT void hawk_unescape_bcstr (
hawk_bch_t* str
);
#if defined(HAWK_OOCH_IS_UCH)
# define hawk_equal_oochars hawk_equal_uchars
# define hawk_comp_oochars hawk_comp_uchars
@ -720,6 +730,7 @@ HAWK_EXPORT hawk_bch_t* hawk_tokenize_bchars (
# define hawk_split_oocstr hawk_split_ucstr
# define hawk_tokenize_oochars hawk_tokenize_uchars
# define hawk_unescape_oocstr hawk_unescape_ucstr
#else
# define hawk_equal_oochars hawk_equal_bchars
# define hawk_comp_oochars hawk_comp_bchars
@ -758,6 +769,7 @@ HAWK_EXPORT hawk_bch_t* hawk_tokenize_bchars (
# define hawk_split_oocstr hawk_split_bcstr
# define hawk_tokenize_oochars hawk_tokenize_bchars
# define hawk_unescape_oocstr hawk_unescape_bcstr
#endif
/* ------------------------------------------------------------------------- */

View File

@ -1673,18 +1673,29 @@ int hawk_parsestd (hawk_t* awk, hawk_parsestd_t in[], hawk_parsestd_t* out)
static int check_var_assign (hawk_rtx_t* rtx, const hawk_ooch_t* str)
{
hawk_ooch_t* eq, * var;
hawk_ooch_t* eq, * dstr;
int n;
eq = hawk_find_oochar_in_oocstr(str, '=');
if (!eq || eq <= str) return 0; /* not assignment */
var = hawk_rtx_dupoochars(rtx, str, eq - str);
if (HAWK_UNLIKELY(!var)) return -1;
dstr = hawk_rtx_dupoocstr(rtx, str, HAWK_NULL);
if (HAWK_UNLIKELY(!dstr)) return -1;
n = hawk_isvalidident(hawk_rtx_gethawk(rtx), var)?
((hawk_rtx_setgbltostrbyname(rtx, var, eq + 1) <= -1)? -1: 1): 0;
hawk_rtx_freemem (rtx, var);
eq = dstr + (eq - str);
*eq = '\0';
if (hawk_isvalidident(hawk_rtx_gethawk(rtx), dstr))
{
hawk_unescape_oocstr (eq + 1);
n = (hawk_rtx_setgbltostrbyname(rtx, dstr, eq + 1) <= -1)? -1: 1;
}
else
{
n = 0;
}
hawk_rtx_freemem (rtx, dstr);
return n;
}

View File

@ -1836,6 +1836,303 @@ exit_loop:
/* ------------------------------------------------------------------------ */
void hawk_unescape_ucstr (hawk_uch_t* str)
{
hawk_uch_t c, c_acc, * p1, * p2;
int escaped = 0, digit_count;
p1 = str;
p2 = str;
while ((c = *p1++) != '\0')
{
if (escaped == 3)
{
/* octal */
if (c >= '0' && c <= '7')
{
c_acc = c_acc * 8 + c - '0';
digit_count++;
if (digit_count >= escaped)
{
/* should i limit the max to 0xFF/0377?
if (c_acc > 0377) c_acc = 0377; */
escaped = 0;
*p2++ = c_acc;
}
continue;
}
else
{
escaped = 0;
*p2++ = c_acc;
}
}
else if (escaped == 2 || escaped == 4 || escaped == 8)
{
/* hexadecimal */
if (c >= '0' && c <= '9')
{
c_acc = c_acc * 16 + c - '0';
digit_count++;
if (digit_count >= escaped)
{
*p2++ = c_acc;
escaped = 0;
}
continue;
}
else if (c >= 'A' && c <= 'F')
{
c_acc = c_acc * 16 + c - 'A' + 10;
digit_count++;
if (digit_count >= escaped)
{
*p2++ = c_acc;
escaped = 0;
}
continue;
}
else if (c >= 'a' && c <= 'f')
{
c_acc = c_acc * 16 + c - 'a' + 10;
digit_count++;
if (digit_count >= escaped)
{
*p2++ = c_acc;
escaped = 0;
}
continue;
}
else
{
hawk_uch_t rc;
rc = (escaped == 2)? 'x':
(escaped == 4)? 'u': 'U';
if (digit_count == 0)
{
/* no valid character after the escaper.
* keep the escaper as it is. consider this input:
* \xGG
* 'c' is at the first G. this part is to restore the
* \x part. since \x is not followed by any hexadecimal
* digits, it's literally 'x' */
*p2++ = rc;
}
else *p2++ = c_acc;
escaped = 0;
}
}
if (escaped == 1)
{
switch (c)
{
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'f': c = '\f'; break;
case 'b': c = '\b'; break;
case 'v': c = '\v'; break;
case 'a': c = '\a'; break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
escaped = 3;
digit_count = 1;
c_acc = c - '0';
continue;
case 'x':
escaped = 2;
digit_count = 0;
c_acc = 0;
continue;
case 'u':
escaped = 4;
digit_count = 0;
c_acc = 0;
continue;
case 'U':
escaped = 8;
digit_count = 0;
c_acc = 0;
continue;
}
*p2++ = c;
escaped = 0;
continue;
}
if (c == '\\')
{
escaped = 1;
continue;
}
*p2++ = c;
}
*p2 = '\0';
}
/* ------------------------------------------------------------------------ */
void hawk_unescape_bcstr (hawk_bch_t* str)
{
hawk_bch_t c, c_acc, * p1, * p2;
int escaped = 0, digit_count;
p1 = str;
p2 = str;
while ((c = *p1++) != '\0')
{
if (escaped == 3)
{
/* octal */
if (c >= '0' && c <= '7')
{
c_acc = c_acc * 8 + c - '0';
digit_count++;
if (digit_count >= escaped)
{
/* should i limit the max to 0xFF/0377?
if (c_acc > 0377) c_acc = 0377; */
escaped = 0;
*p2++ = c_acc;
}
continue;
}
else
{
escaped = 0;
*p2++ = c_acc;
}
}
else if (escaped == 2 || escaped == 4 || escaped == 8)
{
/* hexadecimal */
if (c >= '0' && c <= '9')
{
c_acc = c_acc * 16 + c - '0';
digit_count++;
if (digit_count >= escaped)
{
*p2++ = c_acc;
escaped = 0;
}
continue;
}
else if (c >= 'A' && c <= 'F')
{
c_acc = c_acc * 16 + c - 'A' + 10;
digit_count++;
if (digit_count >= escaped)
{
*p2++ = c_acc;
escaped = 0;
}
continue;
}
else if (c >= 'a' && c <= 'f')
{
c_acc = c_acc * 16 + c - 'a' + 10;
digit_count++;
if (digit_count >= escaped)
{
*p2++ = c_acc;
escaped = 0;
}
continue;
}
else
{
hawk_bch_t rc;
rc = (escaped == 2)? 'x':
(escaped == 4)? 'u': 'U';
if (digit_count == 0)
{
/* no valid character after the escaper.
* keep the escaper as it is. consider this input:
* \xGG
* 'c' is at the first G. this part is to restore the
* \x part. since \x is not followed by any hexadecimal
* digits, it's literally 'x' */
*p2++ = rc;
}
else *p2++ = c_acc;
escaped = 0;
}
}
if (escaped == 1)
{
switch (c)
{
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'f': c = '\f'; break;
case 'b': c = '\b'; break;
case 'v': c = '\v'; break;
case 'a': c = '\a'; break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
escaped = 3;
digit_count = 1;
c_acc = c - '0';
continue;
case 'x':
escaped = 2;
digit_count = 0;
c_acc = 0;
continue;
#if 0
/* don't support \u and \U in byte string. */
case 'u':
escaped = 4;
digit_count = 0;
c_acc = 0;
continue;
case 'U':
escaped = 8;
digit_count = 0;
c_acc = 0;
continue;
#endif
}
*p2++ = c;
escaped = 0;
continue;
}
if (c == '\\')
{
escaped = 1;
continue;
}
*p2++ = c;
}
*p2 = '\0';
}
/* ------------------------------------------------------------------------ */
hawk_oow_t hawk_int_to_oocstr (hawk_int_t value, int radix, const hawk_ooch_t* prefix, hawk_ooch_t* buf, hawk_oow_t size)
{
hawk_int_t t, rem;

View File

@ -195,7 +195,7 @@ init:
val->fcb = 0;
val->val.len = len1 + len2;
val->val.ptr = (hawk_ooch_t*)(val + 1);
if (str1) hawk_copy_oochars_to_oocstr_unlimited (&val->val.ptr[0], str1, len1);
if (HAWK_LIKELY(str1)) hawk_copy_oochars_to_oocstr_unlimited (&val->val.ptr[0], str1, len1);
if (str2) hawk_copy_oochars_to_oocstr_unlimited (&val->val.ptr[len1], str2, len2);
val->val.ptr[val->val.len] = '\0';
@ -205,7 +205,6 @@ init:
return (hawk_val_t*)val;
}
hawk_val_t* hawk_rtx_makestrvalwithuchars (hawk_rtx_t* rtx, const hawk_uch_t* ucs, hawk_oow_t len)
{
#if defined(HAWK_OOCH_IS_UCH)