From 955210800ef542d68f7c22420609134a5906987d Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Thu, 10 Jul 2025 23:12:47 +0900 Subject: [PATCH] fixed a segfault bug in index()/rindex() function handler which treated a byte character as a byte string. enhanced code to handle BOB better --- README.md | 7 +++--- lib/Hawk.cpp | 61 +++++++++++++++++++++++++++++++++++++++++++++++++- lib/Hawk.hpp | 36 ++++++++++++++++++++++------- lib/fmt.c | 2 ++ lib/fnc.c | 43 +++++++++++++++++++++++++++++------ lib/hawk.c | 12 ++++++++++ lib/hawk.h | 34 +++++++++++++++++++++++++--- lib/mod-hawk.c | 1 + lib/mod-str.c | 9 +++++++- lib/rio.c | 8 +++++++ lib/run.c | 54 +++++++++++++++++++++++++++++++++++++++----- lib/val.c | 51 ++++++++++++++++++++++++++++++++++++++--- t/h-002.hawk | 10 +++++++++ 13 files changed, 296 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index e8405fa3..47e1bd75 100644 --- a/README.md +++ b/README.md @@ -503,11 +503,12 @@ However, some of these words not beginning with `@` can be used as normal names ## Values - uninitialized value -- character +- character - 'C' +- byte character - @b'B' - integer - floating-point number -- string -- byte string +- string - "string" +- byte string - @b"byte string" - array - light-weight array with numeric index only - map - conventional AWK array - function diff --git a/lib/Hawk.cpp b/lib/Hawk.cpp index 11e8cb6c..59d28f4e 100644 --- a/lib/Hawk.cpp +++ b/lib/Hawk.cpp @@ -229,6 +229,12 @@ const hawk_bch_t* Hawk::Value::getEmptyMbs() return EMPTY_STRING; } +const void* Hawk::Value::getEmptyBob() +{ + static const hawk_uint8_t EMPTY_BOB = 0; + return &EMPTY_BOB; +} + Hawk::Value::IntIndex::IntIndex (hawk_int_t x) { ptr = buf; @@ -599,6 +605,30 @@ int Hawk::Value::getMbs (const hawk_bch_t** str, hawk_oow_t* len) const return 0; } +int Hawk::Value::getBob (const void** ptr, hawk_oow_t* len) const +{ + const void* p = getEmptyBob(); + hawk_oow_t l = 0; + + HAWK_ASSERT (this->val != HAWK_NULL); + + if (this->run) + { + if (HAWK_RTX_GETVALTYPE(this->run->rtx, this->val) == HAWK_VAL_BOB) + { + p = ((hawk_val_bob_t*)this->val)->val.ptr; + l = ((hawk_val_bob_t*)this->val)->val.len; + } + } + + *ptr = p; + *len = l; + + return 0; +} + +////////////////////////////////////////////////////////////////// + int Hawk::Value::setVal (hawk_val_t* v) { if (this->run == HAWK_NULL) @@ -839,7 +869,36 @@ int Hawk::Value::setMbs (Run* r, const hawk_bch_t* str) { hawk_val_t* tmp; tmp = hawk_rtx_makembsvalwithbchars(r->rtx, str, hawk_count_bcstr(str)); - if (!tmp) + if (HAWK_UNLIKELY(!tmp)) + { + r->hawk->retrieveError(r); + return -1; + } + + int n = this->setVal(r, tmp); + HAWK_ASSERT (n == 0); + return n; +} + +/////////////////////////////////////////////////////////////////// + +int Hawk::Value::setBob (const void* ptr, hawk_oow_t len) +{ + if (this->run == HAWK_NULL) + { + /* no runtime context assoicated. unfortunately, i can't + * set an error number for the same reason */ + return -1; + } + return this->setBob(this->run, ptr, len); +} + +int Hawk::Value::setBob (Run* r, const void* ptr, hawk_oow_t len) +{ + hawk_val_t* tmp; + + tmp = hawk_rtx_makebobval(r->rtx, ptr, len); + if (HAWK_UNLIKELY(!tmp)) { r->hawk->retrieveError(r); return -1; diff --git a/lib/Hawk.hpp b/lib/Hawk.hpp index 780f13d1..cdfa361b 100644 --- a/lib/Hawk.hpp +++ b/lib/Hawk.hpp @@ -1072,6 +1072,21 @@ public: return p; } + const void* toBob (hawk_oow_t* len) const + { + const void* p; + hawk_oow_t l; + + if (this->getBob(&p, &l) <= -1) + { + p = this->getEmptyBob(); + l = 0; + } + + if (len) *len = l; + return p; + } + int getType () const { return hawk_get_val_type(this->val); } const hawk_ooch_t* getTypeName () const { return hawk_get_val_type_name(this->val); } @@ -1080,6 +1095,7 @@ public: int getNum (hawk_int_t* lv, hawk_flt_t* fv) const; int getStr (const hawk_ooch_t** str, hawk_oow_t* len) const; int getMbs (const hawk_bch_t** str, hawk_oow_t* len) const; + int getBob (const void** str, hawk_oow_t* len) const; int setVal (hawk_val_t* v); int setVal (Run* r, hawk_val_t* v); @@ -1103,6 +1119,9 @@ public: int setMbs (const hawk_bch_t* str); int setMbs (Run* r, const hawk_bch_t* str); + int setBob (const void* str, hawk_oow_t len); + int setBob (Run* r, const void* str, hawk_oow_t len); + int setArrayedVal (hawk_ooi_t idx, hawk_val_t* v); int setArrayedVal (Run* r, hawk_ooi_t idx, hawk_val_t* v); @@ -1196,6 +1215,7 @@ public: public: static const hawk_ooch_t* getEmptyStr(); static const hawk_bch_t* getEmptyMbs(); + static const void* getEmptyBob(); }; public: @@ -1621,20 +1641,20 @@ public: /// int addFunction ( const hawk_bch_t* name, ///< function name - hawk_oow_t minArgs, ///< minimum numbers of arguments - hawk_oow_t maxArgs, ///< maximum numbers of arguments + hawk_oow_t minArgs, ///< minimum numbers of arguments + hawk_oow_t maxArgs, ///< maximum numbers of arguments const hawk_bch_t* argSpec, ///< argument specification - FunctionHandler handler, ///< function handler - int validOpts = 0 ///< valid if these options are set + FunctionHandler handler, ///< function handler + int validOpts = 0 ///< valid if these options are set ); int addFunction ( const hawk_uch_t* name, ///< function name - hawk_oow_t minArgs, ///< minimum numbers of arguments - hawk_oow_t maxArgs, ///< maximum numbers of arguments + hawk_oow_t minArgs, ///< minimum numbers of arguments + hawk_oow_t maxArgs, ///< maximum numbers of arguments const hawk_uch_t* argSpec, ///< argument specification - FunctionHandler handler, ///< function handler - int validOpts = 0 ///< valid if these options are set + FunctionHandler handler, ///< function handler + int validOpts = 0 ///< valid if these options are set ); /// diff --git a/lib/fmt.c b/lib/fmt.c index 59602a53..c3443776 100644 --- a/lib/fmt.c +++ b/lib/fmt.c @@ -1059,6 +1059,8 @@ static int fmt_outv (hawk_fmtout_t* fmtout, va_list ap) } case 'O': /* object - ignore precision, width, adjustment */ + /* NOTE: currently, there is no hawk_fmtout_t value that fills the putobj field. */ + /* it's kind of reserved for future implementation */ if (!fmtout->putobj) goto invalid_format; if (fmtout->putobj(fmtout, va_arg(ap, hawk_val_t*)) <= -1) goto oops; break; diff --git a/lib/fnc.c b/lib/fnc.c index b794274d..d4925a53 100644 --- a/lib/fnc.c +++ b/lib/fnc.c @@ -549,6 +549,7 @@ static int index_or_rindex (hawk_rtx_t* rtx, int rindex) hawk_oow_t nargs; hawk_val_t* a0, * a1; hawk_int_t idx, boundary = 1; + hawk_val_type_t vtype; nargs = hawk_rtx_getnargs(rtx); a0 = hawk_rtx_getarg(rtx, 0); @@ -569,19 +570,37 @@ static int index_or_rindex (hawk_rtx_t* rtx, int rindex) if (n <= -1) return -1; } - switch (HAWK_RTX_GETVALTYPE(rtx, a0)) + vtype = HAWK_RTX_GETVALTYPE(rtx, a0); + switch (vtype) { case HAWK_VAL_BCHR: case HAWK_VAL_MBS: + case HAWK_VAL_BOB: { - hawk_bch_t* str0, * str1, * ptr; + hawk_bch_t* str0, * str1, * ptr, bchr; hawk_oow_t len0, len1; - str0 = ((hawk_val_mbs_t*)a0)->val.ptr; - len0 = ((hawk_val_mbs_t*)a0)->val.len; + if (vtype == HAWK_VAL_BCHR) + { + bchr = HAWK_RTX_GETBCHRFROMVAL(rtx, a0); + str0 = &bchr; + len0 = 1; + } + /* this part isn't needed because hawk_val_mbs_t and hawk_val_mbs_t + * are almost the same except the type of the val field + else if (vtype == HAWK_VAL_BOB) + { + str0 = (hawk_bch_t*)((hawk_val_bob_t*)a0)->val.ptr; + len0 = ((hawk_val_bob_t*)a0)->val.len; + }*/ + else + { + str0 = ((hawk_val_mbs_t*)a0)->val.ptr; + len0 = ((hawk_val_mbs_t*)a0)->val.len; + } str1 = hawk_rtx_getvalbcstr(rtx, a1, &len1); - if (HAWK_UNLIKELY(!str0)) return -1; + if (HAWK_UNLIKELY(!str1)) return -1; if (nargs < 3) { @@ -721,6 +740,10 @@ int hawk_fnc_length (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int mode) len = ((hawk_val_mbs_t*)v)->val.len; break; + case HAWK_VAL_BOB: + len = ((hawk_val_bob_t*)v)->val.len; + break; + case HAWK_VAL_CHAR: len = 1; break; @@ -782,6 +805,7 @@ int hawk_fnc_substr (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) { case HAWK_VAL_BCHR: case HAWK_VAL_MBS: + case HAWK_VAL_BOB: { hawk_bch_t* str; hawk_oow_t len; @@ -909,6 +933,7 @@ static int fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int use_array) { case HAWK_VAL_BCHR: case HAWK_VAL_MBS: + case HAWK_VAL_BOB: byte_str = 1; str.ptr = do_fld? (hawk_ooch_t*)hawk_rtx_valtobcstrdup(rtx, a0, &str.len): (hawk_ooch_t*)hawk_rtx_getvalbcstr(rtx, a0, &str.len); @@ -1085,7 +1110,7 @@ int hawk_fnc_tolower (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) a0 = hawk_rtx_getarg(rtx, 0); switch (HAWK_RTX_GETVALTYPE(rtx, a0)) { - case HAWK_VAL_BCHR: + case HAWK_VAL_BCHR: { hawk_bch_t tmp = HAWK_RTX_GETBCHRFROMVAL(rtx, a0); tmp = hawk_to_bch_lower(tmp); @@ -1095,6 +1120,7 @@ int hawk_fnc_tolower (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) } case HAWK_VAL_MBS: + case HAWK_VAL_BOB: { hawk_bcs_t str; str.ptr = hawk_rtx_getvalbcstr(rtx, a0, &str.len); @@ -1108,7 +1134,7 @@ int hawk_fnc_tolower (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) break; } - case HAWK_VAL_CHAR: + case HAWK_VAL_CHAR: { hawk_ooch_t tmp = HAWK_RTX_GETCHARFROMVAL(rtx, a0); tmp = hawk_to_ooch_lower(tmp); @@ -1154,6 +1180,7 @@ int hawk_fnc_toupper (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) } case HAWK_VAL_MBS: + case HAWK_VAL_BOB: { hawk_bcs_t str; str.ptr = hawk_rtx_getvalbcstr(rtx, a0, &str.len); @@ -1464,6 +1491,7 @@ static int __substitute (hawk_rtx_t* rtx, hawk_oow_t max_count) { case HAWK_VAL_BCHR: case HAWK_VAL_MBS: + case HAWK_VAL_BOB: s2.ptr = hawk_rtx_getvalbcstr(rtx, r2, &s2.len); s2_free = 2; @@ -1893,6 +1921,7 @@ int hawk_fnc_sprintf (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) { case HAWK_VAL_BCHR: case HAWK_VAL_MBS: + case HAWK_VAL_BOB: { hawk_becs_t fbu; int fbu_inited = 0; diff --git a/lib/hawk.c b/lib/hawk.c index 420b09ed..d37aa584 100644 --- a/lib/hawk.c +++ b/lib/hawk.c @@ -138,6 +138,18 @@ int hawk_init (hawk_t* hawk, hawk_mmgr_t* mmgr, hawk_cmgr_t* cmgr, const hawk_pr HAWK_HTB_HASHER_DEFAULT }; + /* some assertions in case someone breaks the basic assumptions */ + HAWK_ASSERT (HAWK_SIZEOF(hawk_val_mbs_t) == HAWK_SIZEOF(hawk_val_bob_t)); + HAWK_ASSERT (HAWK_SIZEOF(hawk_val_str_t) == HAWK_SIZEOF(hawk_val_bob_t)); + HAWK_ASSERT (HAWK_OFFSETOF(hawk_val_mbs_t, val) == HAWK_OFFSETOF(hawk_val_bob_t, val)); + HAWK_ASSERT (HAWK_OFFSETOF(hawk_val_str_t, val) == HAWK_OFFSETOF(hawk_val_bob_t, val)); + HAWK_ASSERT (HAWK_SIZEOF(hawk_bcs_t) == HAWK_SIZEOF(hawk_ptl_t)); + HAWK_ASSERT (HAWK_SIZEOF(hawk_ucs_t) == HAWK_SIZEOF(hawk_ptl_t)); + HAWK_ASSERT (HAWK_OFFSETOF(hawk_bcs_t, ptr) == HAWK_OFFSETOF(hawk_ptl_t, ptr)); + HAWK_ASSERT (HAWK_OFFSETOF(hawk_bcs_t, len) == HAWK_OFFSETOF(hawk_ptl_t, len)); + HAWK_ASSERT (HAWK_OFFSETOF(hawk_ucs_t, ptr) == HAWK_OFFSETOF(hawk_ptl_t, ptr)); + HAWK_ASSERT (HAWK_OFFSETOF(hawk_ucs_t, len) == HAWK_OFFSETOF(hawk_ptl_t, len)); + /* zero out the object */ HAWK_MEMSET(hawk, 0, HAWK_SIZEOF(*hawk)); diff --git a/lib/hawk.h b/lib/hawk.h index 4262e651..8fd49024 100644 --- a/lib/hawk.h +++ b/lib/hawk.h @@ -249,8 +249,11 @@ struct hawk_val_str_t typedef struct hawk_val_str_t hawk_val_str_t; /** - * The hawk_val_str_t type is a string type. The type field is + * The hawk_val_mbs_t type is a byte string type. The type field is * #HAWK_VAL_MBS. + * + * You must keep the structure of hawk_val_mbs_t and hawk_val_bob_t the same + * except the type of the val field; */ struct hawk_val_mbs_t { @@ -334,6 +337,24 @@ struct hawk_val_ref_t }; typedef struct hawk_val_ref_t hawk_val_ref_t; +/** + * The hawk_val_bob_t type represents an internal binary object which can contain + * arbitrary seqence of byte. The structure is almost identical to #hawk_val_mbs_t + * except the type of the val field. The data held is supposed to be private. + * You can create a value of this type with hawk_rtx_makebobval() inside a module + * function or built-in functions to convey internal data between them. The language + * doesn't provide a way to create and/or change the value. + * + * You must keep the structure of hawk_val_mbs_t and hawk_val_bob_t the same + * except the type of the val field; + */ +struct hawk_val_bob_t +{ + HAWK_VAL_HDR; + hawk_ptl_t val; +}; +typedef struct hawk_val_bob_t hawk_val_bob_t; + /** * The hawk_val_map_itr_t type defines the iterator to map value fields. */ @@ -1439,7 +1460,8 @@ enum hawk_val_type_t HAWK_VAL_ARR = 9, /**< array */ HAWK_VAL_REX = 10, /**< regular expression */ - HAWK_VAL_REF = 11 /**< reference to other types */ + HAWK_VAL_REF = 11, /**< reference to other types */ + HAWK_VAL_BOB = 12 /**< internal binary object - access not exposed to normal hawk program */ }; typedef enum hawk_val_type_t hawk_val_type_t; @@ -3168,7 +3190,7 @@ HAWK_EXPORT hawk_val_t* hawk_rtx_getarrvalfld ( */ HAWK_EXPORT hawk_val_t* hawk_rtx_makerefval ( hawk_rtx_t* rtx, - int id, + int id, hawk_val_t** adr ); @@ -3177,6 +3199,12 @@ HAWK_EXPORT hawk_val_t* hawk_rtx_makefunval ( const hawk_fun_t* fun ); +HAWK_EXPORT hawk_val_t* hawk_rtx_makebobval ( + hawk_rtx_t* rtx, + const void* ptr, + hawk_oow_t len +); + /** * The hawk_rtx_isstaticval() function determines if a value is static. * A static value is allocated once and reused until a runtime context @ rtx diff --git a/lib/mod-hawk.c b/lib/mod-hawk.c index e63225ad..c714b907 100644 --- a/lib/mod-hawk.c +++ b/lib/mod-hawk.c @@ -561,6 +561,7 @@ static hawk_mod_int_tab_t inttab[] = /* the names follow the val_type_name table in val.c */ { HAWK_T("VAL_ARRAY"), { HAWK_VAL_ARR } }, { HAWK_T("VAL_BCHAR"), { HAWK_VAL_BCHR } }, + { HAWK_T("VAL_BOB"), { HAWK_VAL_BOB } }, { HAWK_T("VAL_CHAR"), { HAWK_VAL_CHAR } }, { HAWK_T("VAL_FLT"), { HAWK_VAL_FLT } }, { HAWK_T("VAL_FUN"), { HAWK_VAL_FUN } }, diff --git a/lib/mod-str.c b/lib/mod-str.c index 6199a5a4..8f110d9e 100644 --- a/lib/mod-str.c +++ b/lib/mod-str.c @@ -44,6 +44,7 @@ static int fnc_normspace (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) { case HAWK_VAL_BCHR: case HAWK_VAL_MBS: + case HAWK_VAL_BOB: { hawk_bch_t* str0; hawk_oow_t len0; @@ -86,6 +87,7 @@ static int trim (hawk_rtx_t* rtx, int flags) { case HAWK_VAL_BCHR: case HAWK_VAL_MBS: + case HAWK_VAL_BOB: { hawk_bcs_t path; hawk_bch_t* npath; @@ -154,6 +156,7 @@ static int is_class (hawk_rtx_t* rtx, hawk_ooch_prop_t ctype) { case HAWK_VAL_BCHR: case HAWK_VAL_MBS: + case HAWK_VAL_BOB: { hawk_bch_t* str0; hawk_oow_t len0; @@ -297,7 +300,7 @@ static int fnc_frombcharcode (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) a0 = hawk_rtx_getarg(rtx, 0); if (hawk_rtx_valtoint(rtx, a0, &cc) <= -1) return -1; - retv = hawk_rtx_makecharval(rtx, (hawk_ooch_t)cc); + retv = hawk_rtx_makebchrval(rtx, (hawk_bch_t)cc); if (HAWK_UNLIKELY(!retv)) return -1; } else @@ -401,6 +404,7 @@ static int fnc_tocharcode (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) { case HAWK_VAL_BCHR: case HAWK_VAL_MBS: + case HAWK_VAL_BOB: { hawk_bch_t* str0; hawk_oow_t len0; @@ -553,6 +557,7 @@ static int fnc_tombs (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) r = a0; break; + case HAWK_VAL_BOB: /* BOB must fall thru and reach below */ default: { hawk_bcs_t str; @@ -605,6 +610,7 @@ static int fnc_tonum (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) } case HAWK_VAL_MBS: + case HAWK_VAL_BOB: /* assume hawk_val_mbs_t and hawk_val_bob_t are the same */ { /* if the value is known to be a byte string, it supports the optional * base parameter */ @@ -705,6 +711,7 @@ static int fnc_subchar (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) { case HAWK_VAL_BCHR: case HAWK_VAL_MBS: + case HAWK_VAL_BOB: { hawk_bch_t* str; hawk_oow_t len; diff --git a/lib/rio.c b/lib/rio.c index a57b5ae1..3bf2f81b 100644 --- a/lib/rio.c +++ b/lib/rio.c @@ -237,6 +237,11 @@ static HAWK_INLINE int resolve_brs (hawk_rtx_t* rtx, hawk_val_t* brs, hawk_bcs_t rrs->len = ((hawk_val_mbs_t*)brs)->val.len; break; + case HAWK_VAL_BOB: + rrs->ptr = (hawk_bch_t*)((hawk_val_bob_t*)brs)->val.ptr; + rrs->len = ((hawk_val_bob_t*)brs)->val.len; + break; + default: rrs->ptr = hawk_rtx_valtobcstrdup(rtx, brs, &rrs->len); if (rrs->ptr == HAWK_NULL) ret = -1; @@ -1022,6 +1027,9 @@ int hawk_rtx_writeioval (hawk_rtx_t* rtx, hawk_out_type_t out_type, const hawk_o case HAWK_VAL_MBS: return hawk_rtx_writeiobytes(rtx, out_type, name, ((hawk_val_mbs_t*)v)->val.ptr, ((hawk_val_mbs_t*)v)->val.len); + case HAWK_VAL_BOB: + return hawk_rtx_writeiobytes(rtx, out_type, name, (hawk_bch_t*)((hawk_val_bob_t*)v)->val.ptr, ((hawk_val_bob_t*)v)->val.len); + default: { hawk_rtx_valtostr_out_t out; diff --git a/lib/run.c b/lib/run.c index d8680068..ca12bcca 100644 --- a/lib/run.c +++ b/lib/run.c @@ -3654,6 +3654,16 @@ static int run_printf (hawk_rtx_t* rtx, hawk_nde_print_t* nde) } break; + case HAWK_VAL_BOB: + n = output_formatted_bytes(rtx, nde->out_type, out.ptr, ((hawk_val_bob_t*)v)->val.ptr, ((hawk_val_bob_t*)v)->val.len, head->next); + hawk_rtx_refdownval(rtx, v); + if (n <= -1) + { + if (n == PRINT_IOERR) xret = n; + else goto oops; + } + break; + default: /* the remaining arguments are ignored as the format cannot * contain any % characters. e.g. printf (1, "xxxx") */ @@ -5818,7 +5828,7 @@ static int teq_val (hawk_rtx_t* rtx, hawk_val_t* left, hawk_val_t* right) break; case HAWK_VAL_STR: - n = hawk_comp_oochars ( + n = hawk_comp_oochars( ((hawk_val_str_t*)left)->val.ptr, ((hawk_val_str_t*)left)->val.len, ((hawk_val_str_t*)right)->val.ptr, @@ -5827,7 +5837,7 @@ static int teq_val (hawk_rtx_t* rtx, hawk_val_t* left, hawk_val_t* right) break; case HAWK_VAL_MBS: - n = hawk_comp_bchars ( + n = hawk_comp_bchars( ((hawk_val_mbs_t*)left)->val.ptr, ((hawk_val_mbs_t*)left)->val.len, ((hawk_val_mbs_t*)right)->val.ptr, @@ -6267,8 +6277,8 @@ static hawk_val_t* eval_binop_concat (hawk_rtx_t* rtx, hawk_val_t* left, hawk_va res = (hawk_val_t*)hawk_rtx_makembsvalwithbchars2(rtx, l.ptr, l.len, r.ptr, r.len); - hawk_rtx_freevalbcstr (rtx, right, r.ptr); - hawk_rtx_freevalbcstr (rtx, left, l.ptr); + hawk_rtx_freevalbcstr(rtx, right, r.ptr); + hawk_rtx_freevalbcstr(rtx, left, l.ptr); break; } @@ -6288,8 +6298,8 @@ static hawk_val_t* eval_binop_concat (hawk_rtx_t* rtx, hawk_val_t* left, hawk_va res = (hawk_val_t*)hawk_rtx_makestrvalwithoochars2(rtx, l.ptr, l.len, r.ptr, r.len); - hawk_rtx_freevaloocstr (rtx, right, r.ptr); - hawk_rtx_freevaloocstr (rtx, left, l.ptr); + hawk_rtx_freevaloocstr(rtx, right, r.ptr); + hawk_rtx_freevaloocstr(rtx, left, l.ptr); break; } } @@ -8919,6 +8929,11 @@ wp_mod_main: ch_len = 1; break; + case HAWK_VAL_BOB: + ch = (((hawk_val_bob_t*)v)->val.len > 0)? ((hawk_bch_t*)((hawk_val_bob_t*)v)->val.ptr)[0]: '\0'; + ch_len = 1; + break; + default: hawk_rtx_refdownval(rtx, v); hawk_rtx_seterrnum(rtx, HAWK_NULL, HAWK_EVALTOCHR); @@ -9091,6 +9106,17 @@ wp_mod_main: #endif break; + case HAWK_VAL_BOB: + #if defined(HAWK_OOCH_IS_BCH) + str_ptr = ((hawk_val_bob_t*)v)->val.ptr; + str_len = ((hawk_val_bob_t*)v)->val.len; + #else + if (fmt[i] == HAWK_T('s')) goto duplicate; + str_ptr = (hawk_ooch_t*)((hawk_val_bob_t*)v)->val.ptr; + str_len = ((hawk_val_bob_t*)v)->val.len; + #endif + break; + default: duplicate: str_ptr = hawk_rtx_valtooocstrdup(rtx, v, &str_len); @@ -9803,6 +9829,17 @@ wp_mod_main: else ch = HAWK_BT('\0'); break; + case HAWK_VAL_BOB: + ch_len = ((hawk_val_bob_t*)v)->val.len; + if (ch_len > 0) + { + ch = ((hawk_bch_t*)((hawk_val_bob_t*)v)->val.ptr)[0]; + ch_len = 1; + } + else ch = 0; + break; + + default: hawk_rtx_refdownval(rtx, v); hawk_rtx_seterrnum(rtx, HAWK_NULL, HAWK_EVALTOCHR); @@ -9951,6 +9988,11 @@ wp_mod_main: str_len = ((hawk_val_mbs_t*)v)->val.len; break; + case HAWK_VAL_BOB: + str_ptr = (hawk_bch_t*)((hawk_val_bob_t*)v)->val.ptr; + str_len = ((hawk_val_bob_t*)v)->val.len; + break; + case HAWK_VAL_CHAR: #if defined(HAWK_OOCH_IS_BCH) bchr_tmp = HAWK_RTX_GETBCHRFROMVAL(rtx, v); diff --git a/lib/val.c b/lib/val.c index 9418b133..75a9c7e1 100644 --- a/lib/val.c +++ b/lib/val.c @@ -75,7 +75,8 @@ static const hawk_ooch_t* val_type_name[] = HAWK_T("array"), HAWK_T("rex"), - HAWK_T("ref") + HAWK_T("ref"), + HAWK_T("bob") }; /* --------------------------------------------------------------------- */ @@ -1478,6 +1479,28 @@ hawk_val_t* hawk_rtx_makefunval (hawk_rtx_t* rtx, const hawk_fun_t* fun) return (hawk_val_t*)val; } +hawk_val_t* hawk_rtx_makebobval (hawk_rtx_t* rtx, const void* ptr, hawk_oow_t len) +{ + hawk_val_bob_t* val; + + val = (hawk_val_bob_t*)hawk_rtx_callocmem(rtx, HAWK_SIZEOF(hawk_val_bob_t) + len); + if (HAWK_UNLIKELY(!val)) return HAWK_NULL; + + val->v_type = HAWK_VAL_BOB; + val->v_refs = 0; + val->v_static = 0; + val->v_nstr = 0; + val->v_gc = 0; + val->val.len = len; + val->val.ptr = (hawk_bch_t*)(val + 1); + HAWK_MEMCPY (val + 1, ptr, len); + +#if defined(DEBUG_VAL) + hawk_logfmt (hawk_rtx_gethawk(rtx), HAWK_LOG_STDERR, HAWK_T("make_bob_val => %p - [%O]\n"), val, val); +#endif + return (hawk_val_t*)val; +} + int HAWK_INLINE hawk_rtx_isstaticval (hawk_rtx_t* rtx, const hawk_val_t* val) { return HAWK_VTR_IS_POINTER(val) && HAWK_IS_STATICVAL(val); @@ -1653,6 +1676,10 @@ void hawk_rtx_freeval (hawk_rtx_t* rtx, hawk_val_t* val, int flags) } else hawk_rtx_freemem (rtx, val); break; + + case HAWK_VAL_BOB: + hawk_rtx_freemem (rtx, val); + break; } } } @@ -1810,6 +1837,8 @@ int hawk_rtx_valtobool (hawk_rtx_t* rtx, const hawk_val_t* val) return HAWK_ARR_SIZE(((hawk_val_arr_t*)val)->arr) > 0; case HAWK_VAL_REF: return val_ref_to_bool(rtx, (hawk_val_ref_t*)val); + case HAWK_VAL_BOB: + return ((hawk_val_bob_t*)val)->val.len > 0; } /* the type of a value should be one of HAWK_VAL_XXX enumerators defined in hawk-prv.h */ @@ -2603,6 +2632,7 @@ hawk_bch_t* hawk_rtx_getvalbcstrwithcmgr (hawk_rtx_t* rtx, hawk_val_t* v, hawk_o /* fall through */ #endif + case HAWK_VAL_BOB: /* BOB must be duplicated */ default: duplicate: return hawk_rtx_valtobcstrdupwithcmgr(rtx, v, len, cmgr); @@ -2643,6 +2673,7 @@ void hawk_rtx_freevalbcstr (hawk_rtx_t* rtx, hawk_val_t* v, hawk_bch_t* str) /* fall through */ #endif + case HAWK_VAL_BOB: /* BOB to MBS is always duplication */ default: freemem: hawk_rtx_freemem (rtx, str); @@ -2782,6 +2813,7 @@ int hawk_rtx_valtonum (hawk_rtx_t* rtx, const hawk_val_t* v, hawk_int_t* l, hawk return val_ref_to_num(rtx, (hawk_val_ref_t*)v, l, r); case HAWK_VAL_REX: + case HAWK_VAL_BOB: /* never allow reference */ default: invalid: #if defined(DEBUG_VAL) @@ -2835,6 +2867,7 @@ hawk_fun_t* hawk_rtx_valtofun (hawk_rtx_t* rtx, hawk_val_t* v) case HAWK_VAL_BCHR: case HAWK_VAL_MBS: + case HAWK_VAL_BOB: { hawk_bcs_t x; x.ptr = hawk_rtx_getvalbcstr(rtx, v, &x.len); @@ -2893,6 +2926,7 @@ hawk_fnc_t* hawk_rtx_valtofnc (hawk_rtx_t* rtx, hawk_val_t* v, hawk_fnc_t* rfnc) { case HAWK_VAL_BCHR: case HAWK_VAL_MBS: + case HAWK_VAL_BOB: case HAWK_VAL_CHAR: case HAWK_VAL_STR: { @@ -3011,6 +3045,13 @@ hawk_int_t hawk_rtx_hashval (hawk_rtx_t* rtx, hawk_val_t* v) break; } + case HAWK_VAL_BOB: + { + hawk_val_bob_t* dv = (hawk_val_bob_t*)v; + hv = (hawk_int_t)hash((hawk_uint8_t*)dv->val.ptr, dv->val.len * HAWK_SIZEOF(*dv->val.ptr)); + break; + } + default: #if defined(DEBUG_VAL) @@ -3312,11 +3353,11 @@ void hawk_dprintval (hawk_rtx_t* run, hawk_val_t* val) break; case HAWK_VAL_STR: - hawk_errputstrf (HAWK_T("%s"), ((hawk_val_str_t*)val)->ptr); + hawk_errputstrf (HAWK_T("%.*s"), ((hawk_val_str_t*)val)->len, ((hawk_val_str_t*)val)->ptr); break; case HAWK_VAL_MBS: - hawk_errputstrf (HAWK_T("%hs"), ((hawk_val_mbs_t*)val)->ptr); + hawk_errputstrf (HAWK_T("%.*hs"), ((hawk_val_mbs_t*)val)->len, ((hawk_val_mbs_t*)val)->ptr); break; case HAWK_VAL_REX: @@ -3345,6 +3386,10 @@ void hawk_dprintval (hawk_rtx_t* run, hawk_val_t* val) hawk_errputstrf (HAWK_T("]")); break; + case HAWK_VAL_BOB: + hawk_errputstrf (HAWK_T("%.*k"), ((hawk_val_bob_t*)val)->len, ((hawk_val_bob_t*)val)->ptr); + break; + default: hawk_errputstrf (HAWK_T("**** INTERNAL ERROR - INVALID VALUE TYPE ****\n")); } diff --git a/t/h-002.hawk b/t/h-002.hawk index 4cbf63fc..a429ef5c 100644 --- a/t/h-002.hawk +++ b/t/h-002.hawk @@ -405,6 +405,16 @@ function main() tap_ensure (str::rindex(@b"\xFFQ\xABX\xABZ", @b"\xAB"), 5, @SCRIPTNAME, @SCRIPTLINE); tap_ensure (str::rindex(@b"\xFFQ\xABX\xABZ", @b"Q\xAB"), 2, @SCRIPTNAME, @SCRIPTLINE); tap_ensure (str::rindex(@b"\xFFQ\xABX\xABZ", @b"Q\xABQ"), 0, @SCRIPTNAME, @SCRIPTLINE); + + tap_ensure (str::index(str::frombcharcode(65), @b"B"), 0, @SCRIPTNAME, @SCRIPTLINE); + tap_ensure (str::index(str::frombcharcode(65), @b"A"), 1, @SCRIPTNAME, @SCRIPTLINE); + tap_ensure (str::index(str::frombcharcode(65), @b"B"), 0, @SCRIPTNAME, @SCRIPTLINE); + tap_ensure (str::index(str::frombcharcode(65), @b"A"), 1, @SCRIPTNAME, @SCRIPTLINE); + + tap_ensure (str::index(@b'A', @b"B"), 0, @SCRIPTNAME, @SCRIPTLINE); + tap_ensure (str::index(@b'A', @b"A"), 1, @SCRIPTNAME, @SCRIPTLINE); + tap_ensure (str::index('A', @b"B"), 0, @SCRIPTNAME, @SCRIPTLINE); + tap_ensure (str::index('A', @b"A"), 1, @SCRIPTNAME, @SCRIPTLINE); } {