changed awk \xNN to take up to 2 hexadigits only.

implemented w/W in formatted output
This commit is contained in:
hyung-hwan 2019-03-19 13:59:22 +00:00
parent d680e3aebe
commit 6c61e0bab6
2 changed files with 99 additions and 7 deletions

View File

@ -5707,7 +5707,9 @@ static int get_string (
{ {
qse_char_t rc; qse_char_t rc;
rc = (escaped == QSE_TYPE_MAX(qse_size_t))? QSE_T('x'): /*rc = (escaped == QSE_TYPE_MAX(qse_size_t))? QSE_T('x'):
(escaped == 4)? QSE_T('u'): QSE_T('U');*/
rc = (escaped == 2)? QSE_T('x'):
(escaped == 4)? QSE_T('u'): QSE_T('U'); (escaped == 4)? QSE_T('u'): QSE_T('U');
if (digit_count == 0) if (digit_count == 0)
ADD_TOKEN_CHAR (awk, tok, rc); ADD_TOKEN_CHAR (awk, tok, rc);
@ -5751,7 +5753,8 @@ static int get_string (
} }
else if (c == QSE_T('x')) else if (c == QSE_T('x'))
{ {
escaped = QSE_TYPE_MAX(qse_size_t); /*escaped = QSE_TYPE_MAX(qse_size_t);*/
escaped = 2; /* i find allowing only 2 hexadigits more useful though it may break compatibilty with other awk implementations */
digit_count = 0; digit_count = 0;
c_acc = 0; c_acc = 0;
continue; continue;

View File

@ -91,15 +91,26 @@ static char_t* sprintn (char_t* nbuf, qse_uintmax_t num, int base, int *lenp, in
* I/O error occurs */ * I/O error occurs */
#undef PUT_CHAR #undef PUT_CHAR
#undef PUT_BYTE_IN_HEX
#undef BYTE_PRINTABLE
#define PUT_CHAR(c) do { \ #define PUT_CHAR(c) do { \
int xx; \ int xx; \
if (data->count >= data->limit) goto done; \ if (data->count >= data->limit) goto done; \
if ((xx = data->put (c, data->ctx)) <= -1) goto oops; \ if ((xx = data->put(c, data->ctx)) <= -1) goto oops; \
if (xx == 0) goto done; \ if (xx == 0) goto done; \
data->count++; \ data->count++; \
} while (0) } while (0)
#define PUT_BYTE_IN_HEX(byte,extra_flags) do { \
qse_mchar_t __xbuf[3]; \
qse_bytetombs ((byte), __xbuf, QSE_COUNTOF(__xbuf), (16 | (extra_flags)), '0'); \
PUT_CHAR(__xbuf[0]); \
PUT_CHAR(__xbuf[1]); \
} while (0)
#define BYTE_PRINTABLE(x) ((x >= 'a' && x <= 'z') || (x >= 'A' && x <= 'Z') || (x >= '0' && x <= '9') || (x == ' '))
int fmtout (const char_t* fmt, fmtout_t* data, va_list ap) int fmtout (const char_t* fmt, fmtout_t* data, va_list ap)
{ {
char_t nbuf[MAXNBUF]; char_t nbuf[MAXNBUF];
@ -619,15 +630,15 @@ reswitch:
if (lm_flag & LF_H) if (lm_flag & LF_H)
{ {
/* to print non-printables in hex */ /* to print non-printables in hex. i don't use ismprint() to avoid escaping a backslash itself. */
if (flagc & FLAGC_DOT) if (flagc & FLAGC_DOT)
{ {
/* if precision is specifed, it doesn't stop at the value of zero unlike 's' or 'S' */ /* if precision is specifed, it doesn't stop at the value of zero unlike 's' or 'S' */
for (n = 0; n < precision; n++) width -= QSE_ISMPRINT(bytep[n])? 1: k_hex_width; for (n = 0; n < precision; n++) width -= BYTE_PRINTABLE(bytep[n])? 1: k_hex_width;
} }
else else
{ {
for (n = 0; bytep[n]; n++) width -= QSE_ISMPRINT(bytep[n])? 1: k_hex_width; for (n = 0; bytep[n]; n++) width -= BYTE_PRINTABLE(bytep[n])? 1: k_hex_width;
} }
} }
else else
@ -652,7 +663,7 @@ reswitch:
while (n--) while (n--)
{ {
if ((lm_flag & LF_H) && QSE_ISMPRINT(*bytep)) if ((lm_flag & LF_H) && BYTE_PRINTABLE(*bytep))
{ {
PUT_CHAR(*bytep); PUT_CHAR(*bytep);
} }
@ -678,6 +689,84 @@ reswitch:
break; break;
} }
case T('w'):
case T('W'):
{
/* unicode string in unicode escape sequence.
*
* hw -> \uXXXX, \UXXXXXXXX, printable-byte(only in ascii range)
* w -> \uXXXX, \UXXXXXXXX
* lw -> all in \UXXXXXXXX
*/
const qse_wchar_t* usp;
qse_size_t uwid;
if (flagc & FLAGC_ZEROPAD) padc = ' ';
usp = va_arg(ap, qse_wchar_t*);
if (flagc & FLAGC_DOT)
{
/* if precision is specifed, it doesn't stop at the value of zero unlike 's' or 'S' */
for (n = 0; n < precision; n++)
{
if ((lm_flag & LF_H) && BYTE_PRINTABLE(usp[n])) uwid = 1;
else if (!(lm_flag & LF_L) && usp[n] <= 0xFFFF) uwid = 6;
else uwid = 10;
width -= uwid;
}
}
else
{
for (n = 0; usp[n]; n++)
{
if ((lm_flag & LF_H) && BYTE_PRINTABLE(usp[n])) uwid = 1;
else if (!(lm_flag & LF_L) && usp[n] <= 0xFFFF) uwid = 6;
else uwid = 10;
width -= uwid;
}
}
if (!(flagc & FLAGC_LEFTADJ) && width > 0)
{
while (width--) PUT_CHAR(padc);
}
while (n--)
{
if ((lm_flag & LF_H) && BYTE_PRINTABLE(*usp))
{
PUT_CHAR(*usp);
}
else if (!(lm_flag & LF_L) && *usp <= 0xFFFF)
{
qse_uint16_t u16 = *usp;
int extra_flags = ((ch) == 'w'? QSE_BYTETOSTR_LOWERCASE: 0);
PUT_CHAR('\\');
PUT_CHAR('u');
PUT_BYTE_IN_HEX((u16 >> 8) & 0xFF, extra_flags);
PUT_BYTE_IN_HEX(u16 & 0xFF, extra_flags);
}
else
{
qse_uint32_t u32 = *usp;
int extra_flags = ((ch) == 'w'? QSE_BYTETOSTR_LOWERCASE: 0);
PUT_CHAR('\\');
PUT_CHAR('U');
PUT_BYTE_IN_HEX((u32 >> 24) & 0xFF, extra_flags);
PUT_BYTE_IN_HEX((u32 >> 16) & 0xFF, extra_flags);
PUT_BYTE_IN_HEX((u32 >> 8) & 0xFF, extra_flags);
PUT_BYTE_IN_HEX(u32 & 0xFF, extra_flags);
}
usp++;
}
if ((flagc & FLAGC_LEFTADJ) && width > 0)
{
while (width--) PUT_CHAR(padc);
}
break;
}
case T('e'): case T('e'):
case T('E'): case T('E'):
case T('f'): case T('f'):