added quite some code for handling mbs in awk

This commit is contained in:
hyung-hwan 2019-04-18 08:42:54 +00:00
parent f90ca01493
commit 07be5e22d7
12 changed files with 357 additions and 125 deletions

View File

@ -203,14 +203,14 @@ typedef struct qse_awk_val_str_t qse_awk_val_str_t;
/**
* The qse_awk_val_str_t type is a string type. The type field is
* #QSE_AWK_VAL_BYTEARR.
* #QSE_AWK_VAL_MBS.
*/
struct qse_awk_val_bytearr_t
struct qse_awk_val_mbs_t
{
QSE_AWK_VAL_HDR;
qse_u8ptl_t val;
qse_mcstr_t val;
};
typedef struct qse_awk_val_bytearr_t qse_awk_val_bytearr_t;
typedef struct qse_awk_val_mbs_t qse_awk_val_mbs_t;
/**
* The qse_awk_val_rex_t type is a regular expression type. The type field
@ -358,7 +358,7 @@ enum qse_awk_nde_type_t
/* expression */
/* if you change the following values including their order,
* you should change __eval_func of __eval_expression
* you should change __evaluator of __eval_expression
* in run.c accordingly */
QSE_AWK_NDE_GRP,
QSE_AWK_NDE_ASS,
@ -372,6 +372,7 @@ enum qse_awk_nde_type_t
QSE_AWK_NDE_INT,
QSE_AWK_NDE_FLT,
QSE_AWK_NDE_STR,
QSE_AWK_NDE_MBS,
QSE_AWK_NDE_REX,
/* keep this order for the following items otherwise, you may have
@ -1211,6 +1212,7 @@ enum qse_awk_errnum_t
QSE_AWK_EEOF, /**< unexpected end of source */
QSE_AWK_ECMTNC, /**< comment not closed properly */
QSE_AWK_ESTRNC, /**< string or regular expression not closed */
QSE_AWK_EMBSCHR, /**< invalid mbs character '%{0}' */
QSE_AWK_ELBRACE, /**< left brace expected in place of '${0}' */
QSE_AWK_ELPAREN, /**< left parenthesis expected in place of '${0}' */
QSE_AWK_ERPAREN, /**< right parenthesis expected in place of '${0}' */
@ -1401,7 +1403,7 @@ enum qse_awk_val_type_t
QSE_AWK_VAL_INT = 1, /**< integer */
QSE_AWK_VAL_FLT = 2, /**< floating-pointer number */
QSE_AWK_VAL_STR = 3, /**< string */
QSE_AWK_VAL_BYTEARR = 4, /**< byte array */
QSE_AWK_VAL_MBS = 4, /**< byte array */
QSE_AWK_VAL_MAP = 5, /**< map */
QSE_AWK_VAL_REX = 6, /**< regular expression */
@ -2474,9 +2476,9 @@ QSE_EXPORT qse_awk_val_t* qse_awk_rtx_makenstrvalwithxstr (
* The qse_awk_rtx_makebytearrvaal() function create a byte array value.
* \return value on success, #QSE_NULL on failure
*/
qse_awk_val_t* qse_awk_rtx_makebytearrval (
qse_awk_val_t* qse_awk_rtx_makembsval (
qse_awk_rtx_t* rtx,
const qse_uint8_t* ptr,
const qse_mchar_t* ptr,
qse_size_t len
);

View File

@ -3639,21 +3639,23 @@ QSE_EXPORT qse_size_t qse_wcs_vfmt (
QSE_EXPORT qse_size_t qse_mbs_ncatwcs (
qse_mbs_t* str,
const qse_wchar_t* s,
qse_size_t len
qse_size_t len,
qse_cmgr_t* cmgr
);
qse_size_t qse_wcs_ncatmbs (
qse_wcs_t* str,
const qse_mchar_t* s,
qse_size_t len
qse_size_t len,
qse_cmgr_t* cmgr
);
#if defined(QSE_CHAR_IS_MCHAR)
# define qse_str_ncatwcs(str,s,len) qse_mbs_ncatwcs(str,s,len)
# define qse_str_ncatmbs(str,s,len) qse_mbs_ncat(str,s,len)
# define qse_str_ncatwcs(str,s,len,cmgr) qse_mbs_ncatwcs(str,s,len,cmgr)
# define qse_str_ncatmbs(str,s,len,cmgr) qse_mbs_ncat(str,s,len)
#else
# define qse_str_ncatwcs(str,s,len) qse_wcs_ncat(str,s,len)
# define qse_str_ncatmbs(str,s,len) qse_wcs_ncatmbs(str,s,len)
# define qse_str_ncatwcs(str,s,len,cmgr) qse_wcs_ncat(str,s,len)
# define qse_str_ncatmbs(str,s,len,cmgr) qse_wcs_ncatmbs(str,s,len,cmgr)
#endif
#if defined(__cplusplus)

View File

@ -107,6 +107,8 @@ typedef struct qse_awk_tree_t qse_awk_tree_t;
#define QSE_AWK_STRDUP(awk,str) (qse_strdup(str,(awk)->mmgr))
#define QSE_AWK_STRXDUP(awk,str,len) (qse_strxdup(str,len,(awk)->mmgr))
#define QSE_AWK_BYTE_PRINTABLE(x) ((x) <= 0x7F && (x) != '\\' && QSE_ISMPRINT(x))
enum qse_awk_rio_type_t
{
/* rio types available */
@ -149,6 +151,7 @@ struct qse_awk_tok_t
struct qse_awk_t
{
qse_mmgr_t* mmgr;
qse_cmgr_t* cmgr;
/* primitive functions */
qse_awk_prm_t prm;
@ -385,7 +388,6 @@ struct qse_awk_rtx_t
qse_awk_errinf_t errinf;
qse_awk_t* awk;
qse_cmgr_t* cmgr; /* internal default cmgr */
qse_awk_rtx_ecb_t* ecb;
};

View File

@ -25,6 +25,7 @@
*/
#include "awk-prv.h"
#include <qse/cmn/mbwc.h>
static void free_fun (qse_htb_t* map, void* vptr, qse_size_t vlen)
{
@ -144,6 +145,7 @@ int qse_awk_init (qse_awk_t* awk, qse_mmgr_t* mmgr, const qse_awk_prm_t* prm)
/* remember the memory manager */
awk->mmgr = mmgr;
awk->cmgr = qse_getdflcmgr();
/* initialize error handling fields */
awk->errinf.num = QSE_AWK_ENOERR;
@ -166,9 +168,9 @@ int qse_awk_init (qse_awk_t* awk, qse_mmgr_t* mmgr, const qse_awk_prm_t* prm)
}
awk->prm = *prm;
if (init_token (mmgr, &awk->ptok) <= -1 ||
init_token (mmgr, &awk->tok) <= -1 ||
init_token (mmgr, &awk->ntok) <= -1)
if (init_token(mmgr, &awk->ptok) <= -1 ||
init_token(mmgr, &awk->tok) <= -1 ||
init_token(mmgr, &awk->ntok) <= -1)
{
qse_awk_seterrnum (awk, QSE_AWK_ENOMEM, QSE_NULL);
goto oops;

View File

@ -59,6 +59,7 @@ const qse_char_t* qse_awk_dflerrstr (const qse_awk_t* awk, qse_awk_errnum_t errn
QSE_T("unexpected end of input"),
QSE_T("comment not closed properly"),
QSE_T("string or regular expression not closed"),
QSE_T("invalid mbs character '${0}'"),
QSE_T("left brace expected in place of '${0}'"),
QSE_T("left parenthesis expected in place of '${0}'"),
QSE_T("right parenthesis expected in place of '${0}'"),

View File

@ -26,6 +26,7 @@
#include "awk-prv.h"
#include <qse/cmn/utf8.h>
#include <qse/cmn/mbwc.h>
#if !defined(QSE_AWK_DEFAULT_MODPREFIX)
# if defined(_WIN32)
@ -148,6 +149,7 @@ enum tok_t
TOK_INT,
TOK_FLT,
TOK_STR,
TOK_MBS,
TOK_REX,
__TOKEN_COUNT__
@ -871,7 +873,7 @@ static int parse_progunit (qse_awk_t* awk)
}
if (get_token(awk) <= -1) return -1;
if (!MATCH(awk,TOK_STR))
{
SETERR_LOC (awk, QSE_AWK_EINCLSTR, &awk->ptok.loc);
@ -4410,7 +4412,53 @@ oops:
return QSE_NULL;
}
static qse_awk_nde_t* parse_primary_rex (qse_awk_t* awk, const qse_awk_loc_t* xloc)
static qse_awk_nde_t* parse_primary_mbs (qse_awk_t* awk, const qse_awk_loc_t* xloc)
{
qse_awk_nde_mbs_t* nde;
nde = (qse_awk_nde_mbs_t*)qse_awk_callocmem(awk, QSE_SIZEOF(*nde));
if (nde == QSE_NULL)
{
ADJERR_LOC (awk, xloc);
return QSE_NULL;
}
nde->type = QSE_AWK_NDE_MBS;
nde->loc = *xloc;
#if defined(QSE_CHAR_IS_MCHAR)
nde->len = QSE_STR_LEN(awk->tok.name);
nde->ptr = qse_awk_cstrdup(awk, QSE_STR_XSTR(awk->tok.name));
if (!nde->ptr) goto oops;
#else
{
qse_size_t wcslen, mbslen;
wcslen = QSE_STR_LEN(awk->tok.name);
/* the MBS token doesn't include a character greater than 0xFF in awk->tok.name though it is a wide character string.
* so i simply use QSE_CMGR_MB8 to store it in a byte string */
nde->ptr = qse_wcsntombsdupwithcmgr(QSE_STR_PTR(awk->tok.name), wcslen, &mbslen, awk->mmgr, qse_findcmgrbyid(QSE_CMGR_MB8));
if (!nde->ptr)
{
qse_awk_seterror (awk, QSE_AWK_ENOMEM, QSE_NULL, xloc);
goto oops;
}
nde->len = mbslen;
}
#endif
if (get_token(awk) <= -1) goto oops;
return (qse_awk_nde_t*)nde;
oops:
QSE_ASSERT (nde != QSE_NULL);
if (nde->ptr) QSE_AWK_FREE (awk, nde->ptr);
QSE_AWK_FREE (awk, nde);
return QSE_NULL;
}
static qse_awk_nde_t* parse_primary_rex (qse_awk_t* awk, const qse_awk_loc_t* xloc)
{
qse_awk_nde_rex_t* nde;
qse_awk_errnum_t errnum;
@ -4686,29 +4734,32 @@ static qse_awk_nde_t* parse_primary_nopipe (qse_awk_t* awk, const qse_awk_loc_t*
switch (awk->tok.type)
{
case TOK_IDENT:
return parse_primary_ident (awk, xloc);
return parse_primary_ident(awk, xloc);
case TOK_INT:
return parse_primary_int (awk, xloc);
return parse_primary_int(awk, xloc);
case TOK_FLT:
return parse_primary_flt (awk, xloc);
return parse_primary_flt(awk, xloc);
case TOK_STR:
return parse_primary_str (awk, xloc);
return parse_primary_str(awk, xloc);
case TOK_MBS:
return parse_primary_mbs(awk, xloc);
case TOK_DIV:
case TOK_DIV_ASSN:
return parse_primary_rex (awk, xloc);
return parse_primary_rex(awk, xloc);
case TOK_DOLLAR:
return parse_primary_positional (awk, xloc);
return parse_primary_positional(awk, xloc);
case TOK_LPAREN:
return parse_primary_lparen (awk, xloc);
return parse_primary_lparen(awk, xloc);
case TOK_GETLINE:
return parse_primary_getline (awk, xloc);
return parse_primary_getline(awk, xloc);
default:
/* in the tolerant mode, we treat print and printf
@ -5623,7 +5674,7 @@ static int get_number (qse_awk_t* awk, qse_awk_tok_t* tok)
static int get_string (
qse_awk_t* awk, qse_char_t end_char,
qse_char_t esc_char, int keep_esc_char,
qse_char_t esc_char, int keep_esc_char, int byte_only,
qse_size_t preescaped, qse_awk_tok_t* tok)
{
qse_cint_t c;
@ -5641,6 +5692,15 @@ static int get_string (
return -1;
}
#if !defined(QSE_CHAR_IS_MCHAR)
if (byte_only && c != '\\' && !QSE_AWK_BYTE_PRINTABLE(c))
{
qse_char_t wc = c;
SETERR_ARG_LOC (awk, QSE_AWK_EMBSCHR, &wc, 1, &awk->tok.loc);
return -1;
}
#endif
if (escaped == 3)
{
if (c >= QSE_T('0') && c <= QSE_T('7'))
@ -5758,7 +5818,7 @@ static int get_string (
c_acc = 0;
continue;
}
else if (c == QSE_T('u'))
else if (!byte_only && c == QSE_T('u'))
{
/* in the MCHAR mode, the \u letter will get converted to UTF-8 sequences.
* see ADD_TOKEN_UINT32(). */
@ -5767,7 +5827,7 @@ static int get_string (
c_acc = 0;
continue;
}
else if (c == QSE_T('U'))
else if (!byte_only && c == QSE_T('U'))
{
/* in the MCHAR mode, the \u letter will get converted to UTF-8 sequences
* see ADD_TOKEN_UINT32(). */
@ -5827,10 +5887,48 @@ static int get_rexstr (qse_awk_t* awk, qse_awk_tok_t* tok)
* begins with reading the next character */
ADD_TOKEN_CHAR (awk, tok, awk->sio.last.c);
}
return get_string (awk, QSE_T('/'), QSE_T('\\'), 1, preescaped, tok);
return get_string(awk, QSE_T('/'), QSE_T('\\'), 1, 0, preescaped, tok);
}
}
static int get_single_quoted_string (qse_awk_t* awk, int byte_only, qse_awk_tok_t* tok)
{
qse_cint_t c;
while (1)
{
GET_CHAR_TO (awk, c);
if (c == QSE_CHAR_EOF)
{
SETERR_LOC (awk, QSE_AWK_ESTRNC, &awk->tok.loc);
return -1;
}
#if !defined(QSE_CHAR_IS_MCHAR)
if (byte_only && c != '\\' && !QSE_AWK_BYTE_PRINTABLE(c))
{
qse_char_t wc = c;
SETERR_ARG_LOC (awk, QSE_AWK_EMBSCHR, &wc, 1, &awk->tok.loc);
return -1;
}
#endif
if (c == QSE_T('\''))
{
/* terminating quote */
GET_CHAR (awk);
break;
}
ADD_TOKEN_CHAR (awk, tok, c);
}
return 0;
}
static int skip_spaces (qse_awk_t* awk)
{
qse_cint_t c = awk->sio.last.c;
@ -6155,7 +6253,7 @@ retry:
QSE_AWK_ISALPHA(awk, c) ||
QSE_AWK_ISDIGIT(awk, c));
type = classify_ident (awk, QSE_STR_XSTR(tok->name));
type = classify_ident(awk, QSE_STR_XSTR(tok->name));
if (type == TOK_IDENT)
{
SETERR_TOK (awk, QSE_AWK_EXKWNR);
@ -6163,54 +6261,52 @@ retry:
}
SET_TOKEN_TYPE (awk, tok, type);
}
else if (c == 'M')
{
GET_CHAR_TO (awk, c);
if (c == '\"')
{
/* multi-byte string */
SET_TOKEN_TYPE (awk, tok, TOK_MBS);
if (get_string(awk, c, QSE_T('\\'), 0, 1, 0, tok) <= -1) return -1;
}
else if (c == '\'')
{
SET_TOKEN_TYPE (awk, tok, TOK_MBS);
if (get_single_quoted_string(awk, 1, tok) <= -1) return -1;
}
else
{
goto process_identifier;
}
}
else if (c == QSE_T('_') || QSE_AWK_ISALPHA(awk, c))
{
int type;
process_identifier:
/* identifier */
do
{
ADD_TOKEN_CHAR (awk, tok, c);
GET_CHAR_TO (awk, c);
}
while (c == QSE_T('_') ||
QSE_AWK_ISALPHA(awk, c) ||
QSE_AWK_ISDIGIT(awk, c));
while (c == QSE_T('_') || QSE_AWK_ISALPHA(awk, c) || QSE_AWK_ISDIGIT(awk, c));
type = classify_ident (awk, QSE_STR_XSTR(tok->name));
type = classify_ident(awk, QSE_STR_XSTR(tok->name));
SET_TOKEN_TYPE (awk, tok, type);
}
else if (c == QSE_T('\"'))
{
/* double-quoted string */
SET_TOKEN_TYPE (awk, tok, TOK_STR);
if (get_string (awk, c, QSE_T('\\'), 0, 0, tok) <= -1) return -1;
if (get_string(awk, c, QSE_T('\\'), 0, 0, 0, tok) <= -1) return -1;
}
else if (c == QSE_T('\''))
{
/* single-quoted string - no escaping */
SET_TOKEN_TYPE (awk, tok, TOK_STR);
while (1)
{
GET_CHAR_TO (awk, c);
if (c == QSE_CHAR_EOF)
{
SETERR_LOC (awk, QSE_AWK_ESTRNC, &awk->tok.loc);
return -1;
}
if (c == QSE_T('\''))
{
/* terminating quote */
GET_CHAR (awk);
break;
}
ADD_TOKEN_CHAR (awk, tok, c);
}
if (get_single_quoted_string(awk, 0, tok) <= -1) return -1;
}
else
{
@ -6916,4 +7012,3 @@ done:
n = mdp->mod.query (&mdp->mod, awk, segs[1].ptr, sym);
return (n <= -1)? QSE_NULL: &mdp->mod;
}

View File

@ -250,6 +250,7 @@ static qse_awk_val_t** get_reference_indexed (
static qse_awk_val_t* eval_int (qse_awk_rtx_t* run, qse_awk_nde_t* nde);
static qse_awk_val_t* eval_real (qse_awk_rtx_t* run, qse_awk_nde_t* nde);
static qse_awk_val_t* eval_str (qse_awk_rtx_t* run, qse_awk_nde_t* nde);
static qse_awk_val_t* eval_mbs (qse_awk_rtx_t* run, qse_awk_nde_t* nde);
static qse_awk_val_t* eval_rex (qse_awk_rtx_t* run, qse_awk_nde_t* nde);
static qse_awk_val_t* eval_named (qse_awk_rtx_t* run, qse_awk_nde_t* nde);
static qse_awk_val_t* eval_gbl (qse_awk_rtx_t* run, qse_awk_nde_t* nde);
@ -985,7 +986,6 @@ static int init_rtx (qse_awk_rtx_t* rtx, qse_awk_t* awk, qse_awk_rio_t* rio)
};
rtx->awk = awk;
rtx->cmgr = qse_getdflcmgr();
CLRERR (rtx);
@ -3294,6 +3294,7 @@ static qse_awk_val_t* eval_expression0 (qse_awk_rtx_t* run, qse_awk_nde_t* nde)
eval_int,
eval_real,
eval_str,
eval_mbs,
eval_rex,
eval_named,
eval_gbl,
@ -4179,7 +4180,7 @@ static QSE_INLINE int __cmp_nil_str (qse_awk_rtx_t* rtx, qse_awk_val_t* left, qs
static QSE_INLINE int __cmp_nil_bytearr (qse_awk_rtx_t* rtx, qse_awk_val_t* left, qse_awk_val_t* right)
{
return (((qse_awk_val_bytearr_t*)right)->val.len == 0)? 0: -1;
return (((qse_awk_val_mbs_t*)right)->val.len == 0)? 0: -1;
}
static QSE_INLINE int __cmp_nil_map (qse_awk_rtx_t* rtx, qse_awk_val_t* left, qse_awk_val_t* right)
@ -4467,7 +4468,7 @@ static QSE_INLINE int __cmp_str_str (qse_awk_rtx_t* rtx, qse_awk_val_t* left, qs
static QSE_INLINE int __cmp_str_bytearr (qse_awk_rtx_t* rtx, qse_awk_val_t* left, qse_awk_val_t* right)
{
qse_awk_val_str_t* ls = (qse_awk_val_str_t*)left;
qse_awk_val_bytearr_t* rs = (qse_awk_val_bytearr_t*)right;
qse_awk_val_mbs_t* rs = (qse_awk_val_mbs_t*)right;
#if (QSE_SIZEOF_MCHAR_T != QSE_SIZEOF_UINT8_T)
# error Unsupported size of qse_mchar_t
@ -4501,7 +4502,7 @@ static QSE_INLINE int __cmp_str_map (qse_awk_rtx_t* rtx, qse_awk_val_t* left, qs
static QSE_INLINE int __cmp_bytearr_nil (qse_awk_rtx_t* rtx, qse_awk_val_t* left, qse_awk_val_t* right)
{
return (((qse_awk_val_bytearr_t*)left)->val.len == 0)? 0: 1;
return (((qse_awk_val_mbs_t*)left)->val.len == 0)? 0: 1;
}
static QSE_INLINE int __cmp_bytearr_int (qse_awk_rtx_t* rtx, qse_awk_val_t* left, qse_awk_val_t* right)
@ -4521,8 +4522,8 @@ static QSE_INLINE int __cmp_bytearr_str (qse_awk_rtx_t* rtx, qse_awk_val_t* left
static QSE_INLINE int __cmp_bytearr_bytearr (qse_awk_rtx_t* rtx, qse_awk_val_t* left, qse_awk_val_t* right)
{
qse_awk_val_bytearr_t* ls = (qse_awk_val_bytearr_t*)left;
qse_awk_val_bytearr_t* rs = (qse_awk_val_bytearr_t*)right;
qse_awk_val_mbs_t* ls = (qse_awk_val_mbs_t*)left;
qse_awk_val_mbs_t* rs = (qse_awk_val_mbs_t*)right;
#if (QSE_SIZEOF_MCHAR_T != QSE_SIZEOF_UINT8_T)
# error Unsupported size of qse_mchar_t
#endif
@ -4609,7 +4610,7 @@ static int __cmp_val(
* QSE_AWK_VAL_INT = 1
* QSE_AWK_VAL_FLT = 2
* QSE_AWK_VAL_STR = 3
* QSE_AWK_VAL_BYTEARR = 4
* QSE_AWK_VAL_MBS = 4
* QSE_AWK_VAL_MAP = 5
*/
return func[lvtype * 6 + rvtype](rtx, left, right);
@ -4651,12 +4652,12 @@ static int teq_val (qse_awk_rtx_t* rtx, qse_awk_val_t* left, qse_awk_val_t* righ
((qse_awk_val_str_t*)right)->val.len) == 0;
break;
case QSE_AWK_VAL_BYTEARR:
case QSE_AWK_VAL_MBS:
n = qse_mbsxncmp (
((qse_awk_val_bytearr_t*)left)->val.ptr,
((qse_awk_val_bytearr_t*)left)->val.len,
((qse_awk_val_bytearr_t*)right)->val.ptr,
((qse_awk_val_bytearr_t*)right)->val.len) == 0;
((qse_awk_val_mbs_t*)left)->val.ptr,
((qse_awk_val_mbs_t*)left)->val.len,
((qse_awk_val_mbs_t*)right)->val.ptr,
((qse_awk_val_mbs_t*)right)->val.len) == 0;
break;
default:
@ -6262,6 +6263,19 @@ static qse_awk_val_t* eval_str (qse_awk_rtx_t* run, qse_awk_nde_t* nde)
return val;
}
static qse_awk_val_t* eval_mbs (qse_awk_rtx_t* run, qse_awk_nde_t* nde)
{
qse_awk_val_t* val;
val = qse_awk_rtx_makembsval (run,
((qse_awk_nde_mbs_t*)nde)->ptr,
((qse_awk_nde_mbs_t*)nde)->len);
if (val == QSE_NULL) ADJERR_LOC (run, &nde->loc);
return val;
}
static qse_awk_val_t* eval_rex (qse_awk_rtx_t* run, qse_awk_nde_t* nde)
{
qse_awk_val_t* val;
@ -7329,7 +7343,7 @@ wp_mod_main:
qse_awk_val_t* v;
qse_awk_flt_t r;
int n;
#if defined(QSE_USE_AWK_FLTMAX)
FMT_CHAR (QSE_T('j'));
#else
@ -7438,6 +7452,16 @@ wp_mod_main:
else ch = QSE_T('\0');
break;
case QSE_AWK_VAL_MBS:
ch_len = ((qse_awk_val_mbs_t*)v)->val.len;
if (ch_len > 0)
{
ch = ((qse_awk_val_mbs_t*)v)->val.ptr[0];
ch_len = 1;
}
else ch = QSE_T('\0');
break;
default:
qse_awk_rtx_refdownval (rtx, v);
SETERR_COD (rtx, QSE_AWK_EVALTOCHR);
@ -7531,7 +7555,7 @@ wp_mod_main:
qse_awk_rtx_refupval (rtx, v);
vtype = QSE_AWK_RTX_GETVALTYPE (rtx, v);
vtype = QSE_AWK_RTX_GETVALTYPE(rtx, v);
switch (vtype)
{
case QSE_AWK_VAL_NIL:
@ -7544,6 +7568,17 @@ wp_mod_main:
str_len = ((qse_awk_val_str_t*)v)->val.len;
break;
case QSE_AWK_VAL_MBS:
#if defined(QSE_CHAR_IS_MCHAR)
str_ptr = ((qse_awk_val_mbs_t*)v)->val.ptr;
str_len = ((qse_awk_val_mbs_t*)v)->val.len;
break;
#else
str_ptr = (qse_char_t*)((qse_awk_val_mbs_t*)v)->val.ptr;
str_len = ((qse_awk_val_mbs_t*)v)->val.len;
break;
#endif
default:
{
qse_awk_rtx_valtostr_out_t out;
@ -7556,7 +7591,7 @@ wp_mod_main:
}
out.type = QSE_AWK_RTX_VALTOSTR_CPLDUP;
if (qse_awk_rtx_valtostr (rtx, v, &out) <= -1)
if (qse_awk_rtx_valtostr(rtx, v, &out) <= -1)
{
qse_awk_rtx_refdownval (rtx, v);
return QSE_NULL;
@ -7588,47 +7623,54 @@ wp_mod_main:
}
}
#define BYTE_PRINTABLE(x) ((x) <= 0x7F && (x) != '\\' && QSE_ISMPRINT(x))
if (fmt[i] == QSE_T('k')) bytetostr_flagged_radix |= QSE_BYTETOSTR_LOWERCASE;
for (k = 0; k < wp[WP_PRECISION]; k++)
{
if (fmt[i] != QSE_T('s') && !BYTE_PRINTABLE(str_ptr[k]))
qse_char_t curc;
#if defined(QSE_CHAR_IS_MCHAR)
curc = str_ptr[k];
#else
if (vtype == QSE_AWK_VAL_MBS) curc = (qse_uint8_t)((qse_mchar_t*)str_ptr)[k];
else curc = str_ptr[k];
#endif
if (fmt[i] != QSE_T('s') && !QSE_AWK_BYTE_PRINTABLE(curc))
{
qse_char_t xbuf[3];
if (str_ptr[k] <= 0xFF)
if (curc <= 0xFF)
{
if (qse_str_ncat (out, QSE_T("\\x"), 2) == (qse_size_t)-1) goto s_fail;
qse_bytetostr(str_ptr[k], xbuf, QSE_COUNTOF(xbuf), bytetostr_flagged_radix, QSE_T('0'));
if (qse_str_ncat (out, xbuf, 2) == (qse_size_t)-1) goto s_fail;
if (qse_str_ncat(out, QSE_T("\\x"), 2) == (qse_size_t)-1) goto s_fail;
qse_bytetostr(curc, xbuf, QSE_COUNTOF(xbuf), bytetostr_flagged_radix, QSE_T('0'));
if (qse_str_ncat(out, xbuf, 2) == (qse_size_t)-1) goto s_fail;
}
else if (str_ptr[k] <= 0xFFFF)
else if (curc <= 0xFFFF)
{
qse_uint16_t u16 = str_ptr[k];
if (qse_str_ncat (out, QSE_T("\\u"), 2) == (qse_size_t)-1) goto s_fail;
qse_uint16_t u16 = curc;
if (qse_str_ncat(out, QSE_T("\\u"), 2) == (qse_size_t)-1) goto s_fail;
qse_bytetostr((u16 >> 8) & 0xFF, xbuf, QSE_COUNTOF(xbuf), bytetostr_flagged_radix, QSE_T('0'));
if (qse_str_ncat (out, xbuf, 2) == (qse_size_t)-1) goto s_fail;
if (qse_str_ncat(out, xbuf, 2) == (qse_size_t)-1) goto s_fail;
qse_bytetostr(u16 & 0xFF, xbuf, QSE_COUNTOF(xbuf), bytetostr_flagged_radix, QSE_T('0'));
if (qse_str_ncat (out, xbuf, 2) == (qse_size_t)-1) goto s_fail;
if (qse_str_ncat(out, xbuf, 2) == (qse_size_t)-1) goto s_fail;
}
else
{
qse_uint32_t u32 = str_ptr[k];
if (qse_str_ncat (out, QSE_T("\\U"), 2) == (qse_size_t)-1) goto s_fail;
qse_uint32_t u32 = curc;
if (qse_str_ncat(out, QSE_T("\\U"), 2) == (qse_size_t)-1) goto s_fail;
qse_bytetostr((u32 >> 24) & 0xFF, xbuf, QSE_COUNTOF(xbuf), bytetostr_flagged_radix, QSE_T('0'));
if (qse_str_ncat (out, xbuf, 2) == (qse_size_t)-1) goto s_fail;
if (qse_str_ncat(out, xbuf, 2) == (qse_size_t)-1) goto s_fail;
qse_bytetostr((u32 >> 16) & 0xFF, xbuf, QSE_COUNTOF(xbuf), bytetostr_flagged_radix, QSE_T('0'));
if (qse_str_ncat (out, xbuf, 2) == (qse_size_t)-1) goto s_fail;
if (qse_str_ncat(out, xbuf, 2) == (qse_size_t)-1) goto s_fail;
qse_bytetostr((u32 >> 8) & 0xFF, xbuf, QSE_COUNTOF(xbuf), bytetostr_flagged_radix, QSE_T('0'));
if (qse_str_ncat (out, xbuf, 2) == (qse_size_t)-1) goto s_fail;
if (qse_str_ncat(out, xbuf, 2) == (qse_size_t)-1) goto s_fail;
qse_bytetostr(u32 & 0xFF, xbuf, QSE_COUNTOF(xbuf), bytetostr_flagged_radix, QSE_T('0'));
if (qse_str_ncat (out, xbuf, 2) == (qse_size_t)-1) goto s_fail;
if (qse_str_ncat(out, xbuf, 2) == (qse_size_t)-1) goto s_fail;
}
}
else
{
if (qse_str_ccat(out, str_ptr[k]) == (qse_size_t)-1)
if (qse_str_ccat(out, curc) == (qse_size_t)-1)
{
s_fail:
if (str_free) QSE_AWK_FREE (rtx->awk, str_free);
@ -7657,7 +7699,6 @@ wp_mod_main:
}
qse_awk_rtx_refdownval (rtx, v);
}
else
{

View File

@ -417,6 +417,76 @@ static int print_expr (qse_awk_t* awk, qse_awk_nde_t* nde)
break;
}
case QSE_AWK_NDE_MBS:
{
qse_mchar_t* ptr;
qse_size_t len, i;
PUT_SRCSTR (awk, QSE_T("M\""));
ptr = ((qse_awk_nde_mbs_t*)nde)->ptr;
len = ((qse_awk_nde_mbs_t*)nde)->len;
for (i = 0; i < len; i++)
{
/* TODO: maybe more de-escaping?? */
switch (ptr[i])
{
case QSE_MT('\n'):
PUT_SRCSTR (awk, QSE_T("\\n"));
break;
case QSE_MT('\r'):
PUT_SRCSTR (awk, QSE_T("\\r"));
break;
case QSE_MT('\t'):
PUT_SRCSTR (awk, QSE_T("\\t"));
break;
case QSE_MT('\f'):
PUT_SRCSTR (awk, QSE_T("\\f"));
break;
case QSE_MT('\b'):
PUT_SRCSTR (awk, QSE_T("\\b"));
break;
case QSE_MT('\v'):
PUT_SRCSTR (awk, QSE_T("\\v"));
break;
case QSE_MT('\a'):
PUT_SRCSTR (awk, QSE_T("\\a"));
break;
case QSE_MT('\0'):
PUT_SRCSTR (awk, QSE_T("\\0"));
break;
case QSE_MT('\"'):
PUT_SRCSTR (awk, QSE_T("\\\""));
break;
case QSE_MT('\\'):
PUT_SRCSTR (awk, QSE_T("\\\\"));
break;
default:
{
#if defined(QSE_CHAR_IS_MCHAR)
PUT_SRCSTRN (awk, &ptr[i], 1);
#else
qse_char_t wc = ptr[i];
if (QSE_AWK_BYTE_PRINTABLE(wc))
{
PUT_SRCSTRN (awk, &wc, 1);
}
else
{
qse_mchar_t xbuf[3];
qse_bytetombs (wc, xbuf, QSE_COUNTOF(xbuf), 16, '0');
PUT_SRCSTR (awk, QSE_T("\\x"));
wc = xbuf[0]; PUT_SRCSTRN (awk, &wc, 1);
wc = xbuf[1]; PUT_SRCSTRN (awk, &wc, 1);
}
#endif
break;
}
}
}
PUT_SRCSTR (awk, QSE_T("\""));
break;
}
case QSE_AWK_NDE_REX:
{
PUT_SRCSTR (awk, QSE_T("/"));
@ -1317,6 +1387,13 @@ void qse_awk_clrpt (qse_awk_t* awk, qse_awk_nde_t* tree)
break;
}
case QSE_AWK_NDE_MBS:
{
QSE_AWK_FREE (awk, ((qse_awk_nde_mbs_t*)p)->ptr);
QSE_AWK_FREE (awk, p);
break;
}
case QSE_AWK_NDE_REX:
{
qse_awk_nde_rex_t* rex = (qse_awk_nde_rex_t*)p;

View File

@ -61,6 +61,7 @@ typedef struct qse_awk_nde_int_t qse_awk_nde_int_t;
typedef struct qse_awk_nde_flt_t qse_awk_nde_flt_t;
typedef struct qse_awk_nde_str_t qse_awk_nde_str_t;
typedef struct qse_awk_nde_mbs_t qse_awk_nde_mbs_t;
typedef struct qse_awk_nde_rex_t qse_awk_nde_rex_t;
typedef struct qse_awk_nde_var_t qse_awk_nde_var_t;
typedef struct qse_awk_nde_fncall_t qse_awk_nde_fncall_t;
@ -156,6 +157,14 @@ struct qse_awk_nde_str_t
qse_size_t len;
};
/* QSE_AWK_NDE_MBS */
struct qse_awk_nde_mbs_t
{
QSE_AWK_NDE_HDR;
qse_mchar_t* ptr;
qse_size_t len;
};
/* QSE_AWK_NDE_REX */
struct qse_awk_nde_rex_t
{

View File

@ -245,7 +245,7 @@ qse_awk_val_t* qse_awk_rtx_makestrvalwithwcs (qse_awk_rtx_t* rtx, const qse_wcha
qse_awk_val_t* v;
qse_mcstr_t tmp;
tmp.ptr = qse_wcstombsdup (wcs, &tmp.len, rtx->awk->mmgr);
tmp.ptr = qse_wcstombsdup(wcs, &tmp.len, rtx->awk->mmgr);
if (tmp.ptr == QSE_NULL)
{
qse_awk_rtx_seterrnum (rtx, QSE_AWK_ENOMEM, QSE_NULL);
@ -398,26 +398,26 @@ qse_awk_val_t* qse_awk_rtx_makenstrvalwithxstr (qse_awk_rtx_t* rtx, const qse_cs
return v;
}
qse_awk_val_t* qse_awk_rtx_makebytearrval (qse_awk_rtx_t* rtx, const qse_uint8_t* ptr, qse_size_t len)
qse_awk_val_t* qse_awk_rtx_makembsval (qse_awk_rtx_t* rtx, const qse_mchar_t* ptr, qse_size_t len)
{
qse_awk_val_bytearr_t* val = QSE_NULL;
qse_size_t xlen = len * QSE_SIZEOF(*ptr);
qse_awk_val_mbs_t* val = QSE_NULL;
qse_size_t xsz = len * QSE_SIZEOF(*ptr);
val = (qse_awk_val_bytearr_t*)QSE_AWK_ALLOC(rtx->awk, QSE_SIZEOF(qse_awk_val_bytearr_t) + xlen + QSE_SIZEOF(*ptr));
val = (qse_awk_val_mbs_t*)QSE_AWK_ALLOC(rtx->awk, QSE_SIZEOF(qse_awk_val_mbs_t) + xsz + QSE_SIZEOF(*ptr));
if (val == QSE_NULL)
{
qse_awk_rtx_seterrnum (rtx, QSE_AWK_ENOMEM, QSE_NULL);
return QSE_NULL;
}
val->v_type = QSE_AWK_VAL_BYTEARR;
val->v_type = QSE_AWK_VAL_MBS;
val->ref = 0;
val->stat = 0;
val->nstr = 0;
val->val.len = len;
val->val.ptr = (qse_uint8_t*)(val + 1);
QSE_MEMCPY (val->val.ptr, ptr, xlen);
val->val.ptr[xlen] = 0;
val->val.ptr = (qse_mchar_t*)(val + 1);
QSE_MEMCPY (val->val.ptr, ptr, xsz);
val->val.ptr[len] = QSE_MT('\0');
return (qse_awk_val_t*)val;
}
@ -812,7 +812,7 @@ void qse_awk_rtx_freeval (qse_awk_rtx_t* rtx, qse_awk_val_t* val, int cache)
break;
}
case QSE_AWK_VAL_BYTEARR:
case QSE_AWK_VAL_MBS:
QSE_AWK_FREE (rtx->awk, val);
break;
@ -976,8 +976,8 @@ int qse_awk_rtx_valtobool (qse_awk_rtx_t* rtx, const qse_awk_val_t* val)
return ((qse_awk_val_flt_t*)val)->val != 0.0;
case QSE_AWK_VAL_STR:
return ((qse_awk_val_str_t*)val)->val.len > 0;
case QSE_AWK_VAL_BYTEARR:
return ((qse_awk_val_bytearr_t*)val)->val.len > 0;
case QSE_AWK_VAL_MBS:
return ((qse_awk_val_mbs_t*)val)->val.len > 0;
case QSE_AWK_VAL_REX: /* TODO: is this correct? */
return ((qse_awk_val_rex_t*)val)->str.len > 0;
case QSE_AWK_VAL_MAP:
@ -1085,7 +1085,7 @@ static int mbs_to_str (qse_awk_rtx_t* rtx, const qse_mchar_t* str, qse_size_t st
mbslen = str_len;
wcslen = out->u.cplcpy.len;
if (qse_mbsntowcsnallwithcmgr(str, &mbslen, out->u.cplcpy.ptr, &wcslen, rtx->cmgr) <= -1 || wcslen >= out->u.cplcpy.len)
if (qse_mbsntowcsnallwithcmgr(str, &mbslen, out->u.cplcpy.ptr, &wcslen, qse_findcmgrbyid(QSE_CMGR_MB8)) <= -1 || wcslen >= out->u.cplcpy.len)
{
qse_awk_rtx_seterrnum (rtx, QSE_AWK_EINVAL, QSE_NULL); /* TODO: change error code */
return -1;
@ -1103,7 +1103,7 @@ static int mbs_to_str (qse_awk_rtx_t* rtx, const qse_mchar_t* str, qse_size_t st
qse_size_t mbslen, wcslen;
mbslen = str_len;
tmp = qse_mbsntowcsalldupwithcmgr(str, &mbslen, &wcslen, rtx->awk->mmgr, rtx->cmgr);
tmp = qse_mbsntowcsalldupwithcmgr(str, &mbslen, &wcslen, rtx->awk->mmgr, qse_findcmgrbyid(QSE_CMGR_MB8));
if (!tmp)
{
qse_awk_rtx_seterrnum (rtx, QSE_AWK_ENOMEM, QSE_NULL);
@ -1120,7 +1120,7 @@ static int mbs_to_str (qse_awk_rtx_t* rtx, const qse_mchar_t* str, qse_size_t st
qse_size_t n;
qse_str_clear (out->u.strp);
n = qse_str_ncatmbs(out->u.strp, str, str_len);
n = qse_str_ncatmbs(out->u.strp, str, str_len, qse_findcmgrbyid(QSE_CMGR_MB8));
if (n == (qse_size_t)-1)
{
qse_awk_rtx_seterrnum (rtx, QSE_AWK_ENOMEM, QSE_NULL);
@ -1133,7 +1133,7 @@ static int mbs_to_str (qse_awk_rtx_t* rtx, const qse_mchar_t* str, qse_size_t st
{
qse_size_t n;
n = qse_str_ncatmbs(out->u.strpcat, str, str_len);
n = qse_str_ncatmbs(out->u.strpcat, str, str_len, qse_findcmgrbyid(QSE_CMGR_MB8));
if (n == (qse_size_t)-1)
{
qse_awk_rtx_seterrnum (rtx, QSE_AWK_ENOMEM, QSE_NULL);
@ -1469,9 +1469,9 @@ int qse_awk_rtx_valtostr (
return str_to_str(rtx, vs->val.ptr, vs->val.len, out);
}
case QSE_AWK_VAL_BYTEARR:
case QSE_AWK_VAL_MBS:
{
qse_awk_val_bytearr_t* vs = (qse_awk_val_bytearr_t*)v;
qse_awk_val_mbs_t* vs = (qse_awk_val_mbs_t*)v;
#if defined(QSE_CHAR_IS_MCHAR)
return str_to_str(rtx, vs->val.ptr, vs->val.len, out);
#else
@ -1678,13 +1678,13 @@ int qse_awk_rtx_valtonum (qse_awk_rtx_t* rtx, const qse_awk_val_t* v, qse_awk_in
);
}
case QSE_AWK_VAL_BYTEARR:
case QSE_AWK_VAL_MBS:
{
return qse_awk_rtx_mbstonum (
rtx,
QSE_AWK_RTX_STRTONUM_MAKE_OPTION(0, 0),
((qse_awk_val_bytearr_t*)v)->val.ptr,
((qse_awk_val_bytearr_t*)v)->val.len,
((qse_awk_val_mbs_t*)v)->val.ptr,
((qse_awk_val_mbs_t*)v)->val.len,
l, r
);
}
@ -1823,9 +1823,9 @@ qse_awk_int_t qse_awk_rtx_hashval (qse_awk_rtx_t* rtx, qse_awk_val_t* v)
break;
}
case QSE_AWK_VAL_BYTEARR:
case QSE_AWK_VAL_MBS:
{
qse_awk_val_bytearr_t* dv = (qse_awk_val_bytearr_t*)v;
qse_awk_val_mbs_t* dv = (qse_awk_val_mbs_t*)v;
hv = (qse_awk_int_t)hash((qse_uint8_t*)dv->val.ptr, dv->val.len * QSE_SIZEOF(*dv->val.ptr));
break;
}
@ -1886,13 +1886,13 @@ int qse_awk_rtx_setrefval (qse_awk_rtx_t* rtx, qse_awk_val_ref_t* ref, qse_awk_v
return x;
}
case QSE_AWK_VAL_BYTEARR:
case QSE_AWK_VAL_MBS:
#if defined(QSE_CHAR_IS_MCHAR)
{
/* same as str in the mchar mode */
int x;
qse_awk_rtx_refupval (rtx, val);
x = qse_awk_rtx_setrec(rtx, (qse_size_t)ref->adr, &((qse_awk_val_bytearr_t*)val)->val);
x = qse_awk_rtx_setrec(rtx, (qse_size_t)ref->adr, &((qse_awk_val_mbs_t*)val)->val);
qse_awk_rtx_refdownval (rtx, val);
return x;
}
@ -2018,6 +2018,10 @@ void qse_awk_dprintval (qse_awk_rtx_t* run, qse_awk_val_t* val)
qse_errputstrf (QSE_T("%s"), ((qse_awk_val_str_t*)val)->ptr);
break;
case QSE_AWK_VAL_MBS:
qse_errputstrf (QSE_T("%hs"), ((qse_awk_val_mbs_t*)val)->ptr);
break;
case QSE_AWK_VAL_REX:
qse_errputstrf (QSE_T("REX[%s]"), ((qse_awk_val_rex_t*)val)->ptr);
break;

View File

@ -92,7 +92,6 @@ static char_t* sprintn (char_t* nbuf, qse_uintmax_t num, int base, int *lenp, in
#undef PUT_CHAR
#undef PUT_BYTE_IN_HEX
#undef BYTE_PRINTABLE
#define PUT_CHAR(c) do { \
int xx; \

View File

@ -280,10 +280,9 @@ static int mbs_to_wcs (
#include "str-dyn.h"
qse_size_t qse_mbs_ncatwcs (qse_mbs_t* str, const qse_wchar_t* s, qse_size_t len)
qse_size_t qse_mbs_ncatwcs (qse_mbs_t* str, const qse_wchar_t* s, qse_size_t len, qse_cmgr_t* cmgr)
{
qse_size_t mbslen, wcslen;
qse_cmgr_t* cmgr = qse_getdflcmgr();
wcslen = len;
if (qse_wcsntombsnwithcmgr(s, &wcslen, QSE_NULL, &mbslen, cmgr) <= -1) return (qse_size_t)-1;
@ -299,10 +298,9 @@ qse_size_t qse_mbs_ncatwcs (qse_mbs_t* str, const qse_wchar_t* s, qse_size_t len
return str->val.len;
}
qse_size_t qse_wcs_ncatmbs (qse_wcs_t* str, const qse_mchar_t* s, qse_size_t len)
qse_size_t qse_wcs_ncatmbs (qse_wcs_t* str, const qse_mchar_t* s, qse_size_t len, qse_cmgr_t* cmgr)
{
qse_size_t mbslen, wcslen;
qse_cmgr_t* cmgr = qse_getdflcmgr();
mbslen = len;
if (qse_mbsntowcsnallwithcmgr(s, &mbslen, QSE_NULL, &wcslen, cmgr) <= -1) return (qse_size_t)-1;