diff --git a/hawk/bin/main.c b/hawk/bin/main.c index bb05c81a..9dd3f105 100644 --- a/hawk/bin/main.c +++ b/hawk/bin/main.c @@ -507,6 +507,7 @@ struct opttab_t { "pablock", HAWK_PABLOCK, "enable pattern-action loop" }, { "rexbound", HAWK_REXBOUND, "enable {n,m} in a regular expression" }, { "ncmponstr", HAWK_NCMPONSTR, "perform numeric comparsion on numeric strings" }, + { "numstrdetect", HAWK_NUMSTRDETECT, "detect a numeric string and convert it to a number" }, { "strictnaming", HAWK_STRICTNAMING, "enable the strict naming rule" }, { "tolerant", HAWK_TOLERANT, "make more fault-tolerant" }, { HAWK_NULL, 0, HAWK_NULL } @@ -652,6 +653,7 @@ static int process_argv (int argc, hawk_bch_t* argv[], struct arg_t* arg) { ":pablock", '\0' }, { ":rexbound", '\0' }, { ":ncmponstr", '\0' }, + { ":numstrdetect", '\0' }, { ":strictnaming", '\0' }, { ":tolerant", '\0' }, diff --git a/hawk/lib/hawk-prv.h b/hawk/lib/hawk-prv.h index 6f2ca193..2b3caaf2 100644 --- a/hawk/lib/hawk-prv.h +++ b/hawk/lib/hawk-prv.h @@ -481,6 +481,7 @@ struct hawk_rtx_t int ignorecase; int striprecspc; int stripstrspc; + int numstrdetect; hawk_int_t nr; hawk_int_t fnr; @@ -605,9 +606,7 @@ static HAWK_INLINE void HAWK_RTX_STACK_POP (hawk_rtx_t* rtx) #define HAWK_RTX_IS_STRIPRECSPC_ON(rtx) ((rtx)->gbl.striprecspc > 0 || ((rtx)->gbl.striprecspc < 0 && ((rtx)->hawk->parse.pragma.trait & HAWK_STRIPRECSPC))) #define HAWK_RTX_IS_STRIPSTRSPC_ON(rtx) ((rtx)->gbl.stripstrspc > 0 || ((rtx)->gbl.stripstrspc < 0 && ((rtx)->hawk->parse.pragma.trait & HAWK_STRIPSTRSPC))) - - - +#define HAWK_RTX_IS_NUMSTRDETECT_ON(rtx) ((rtx)->gbl.numstrdetect > 0 || ((rtx)->gbl.stripstrspc < 0 && ((rtx)->hawk->parse.pragma.trait & HAWK_NUMSTRDETECT))) #if !defined(HAWK_DEFAULT_MODLIBDIRS) # define HAWK_DEFAULT_MODLIBDIRS "" diff --git a/hawk/lib/hawk.h b/hawk/lib/hawk.h index 03a9fc37..57af1930 100644 --- a/hawk/lib/hawk.h +++ b/hawk/lib/hawk.h @@ -1317,6 +1317,12 @@ enum hawk_trait_t */ HAWK_TOLERANT = (1 << 17), + /* + * detect a numeric string and convert to a numeric type + * automatically + */ + HAWK_NUMSTRDETECT = (1 << 18), + /** * makes #hawk_t to behave compatibly with classical AWK * implementations @@ -1324,11 +1330,11 @@ enum hawk_trait_t HAWK_CLASSIC = HAWK_IMPLICIT | HAWK_RIO | HAWK_NEWLINE | HAWK_BLANKCONCAT | HAWK_PABLOCK | - HAWK_STRIPSTRSPC | HAWK_STRICTNAMING, + HAWK_STRIPSTRSPC | HAWK_STRICTNAMING | HAWK_NUMSTRDETECT, HAWK_MODERN = HAWK_CLASSIC | HAWK_FLEXMAP | HAWK_REXBOUND | - HAWK_RWPIPE | HAWK_TOLERANT | HAWK_NEXTOFILE /*| HAWK_NCMPONSTR*/ + HAWK_RWPIPE | HAWK_TOLERANT | HAWK_NEXTOFILE | HAWK_NUMSTRDETECT /*| HAWK_NCMPONSTR*/ }; typedef enum hawk_trait_t hawk_trait_t; @@ -1380,6 +1386,7 @@ enum hawk_gbl_id_t HAWK_GBL_IGNORECASE, HAWK_GBL_NF, HAWK_GBL_NR, + HAWK_GBL_NUMSTRDETECT, HAWK_GBL_OFILENAME, HAWK_GBL_OFMT, HAWK_GBL_OFS, diff --git a/hawk/lib/parse.c b/hawk/lib/parse.c index 96ebef02..194181f9 100644 --- a/hawk/lib/parse.c +++ b/hawk/lib/parse.c @@ -334,6 +334,9 @@ static global_t gtab[] = /* input record number */ { HAWK_T("NR"), 2, HAWK_PABLOCK }, + /* detect a numeric string */ + { HAWK_T("NUMSTRDETECT"), 12, 0 }, + /* current output file name */ { HAWK_T("OFILENAME"), 9, HAWK_PABLOCK | HAWK_NEXTOFILE }, @@ -959,6 +962,7 @@ static int parse_progunit (hawk_t* hawk) else if (MATCH(hawk, TOK_XPRAGMA)) { hawk_oocs_t name; + int trait; if (get_token(hawk) <= -1) return -1; if (!MATCH(hawk, TOK_IDENT)) @@ -997,58 +1001,39 @@ static int parse_progunit (hawk_t* hawk) hawk_copy_oochars_to_oocstr (hawk->parse.pragma.entry, HAWK_COUNTOF(hawk->parse.pragma.entry), HAWK_OOECS_PTR(hawk->tok.name), HAWK_OOECS_LEN(hawk->tok.name)); } } - else if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("implicit"), 0) == 0) + /* NOTE: trait = is an intended assignment */ + else if (((trait = HAWK_IMPLICIT) && hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("implicit"), 0) == 0) || + ((trait = HAWK_MULTILINESTR) && hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("multilinestr"), 0) == 0)) { /* @pragma implicit on - * @pragma implicit off */ + * @pragma implicit off + * @pragma multilinestr on + * @pragma multilinestr off */ + hawk_oocs_t value; + if (get_token(hawk) <= -1) return -1; if (!MATCH(hawk, TOK_IDENT)) { error_ident_on_off_expected_for_implicit: - hawk_seterrfmt (hawk, &hawk->ptok.loc, HAWK_EIDENT, HAWK_T("identifier 'on' or 'off' expected for 'implicit'")); + hawk_seterrfmt (hawk, &hawk->ptok.loc, HAWK_EIDENT, HAWK_T("identifier 'on' or 'off' expected for '%.*js'"), name.len, name.ptr); return -1; } - name.len = HAWK_OOECS_LEN(hawk->tok.name); - name.ptr = HAWK_OOECS_PTR(hawk->tok.name); - if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("on"), 0) == 0) + value.len = HAWK_OOECS_LEN(hawk->tok.name); + value.ptr = HAWK_OOECS_PTR(hawk->tok.name); + if (hawk_comp_oochars_oocstr(value.ptr, value.len, HAWK_T("on"), 0) == 0) { - hawk->parse.pragma.trait |= HAWK_IMPLICIT; + hawk->parse.pragma.trait |= trait; } - else if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("off"), 0) == 0) + else if (hawk_comp_oochars_oocstr(value.ptr, value.len, HAWK_T("off"), 0) == 0) { - hawk->parse.pragma.trait &= ~HAWK_IMPLICIT; + hawk->parse.pragma.trait &= ~trait; } else { goto error_ident_on_off_expected_for_implicit; } } - else if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("multilinestr"), 0) == 0) - { - if (get_token(hawk) <= -1) return -1; - if (!MATCH(hawk, TOK_IDENT)) - { - error_ident_on_off_expected_for_multilinestr: - hawk_seterrfmt (hawk, &hawk->ptok.loc, HAWK_EIDENT, HAWK_T("identifier 'on' or 'off' expected for 'multilinestr'")); - return -1; - } - - name.len = HAWK_OOECS_LEN(hawk->tok.name); - name.ptr = HAWK_OOECS_PTR(hawk->tok.name); - if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("on"), 0) == 0) - { - hawk->parse.pragma.trait |= HAWK_MULTILINESTR; - } - else if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("off"), 0) == 0) - { - hawk->parse.pragma.trait &= ~HAWK_MULTILINESTR; - } - else - { - goto error_ident_on_off_expected_for_multilinestr; - } - } /* --------------------------------------------------------------------- * the pragmas up to this point affect the parser * the following pragmas affect runtime @@ -1071,85 +1056,45 @@ static int parse_progunit (hawk_t* hawk) /* take the specified value if it's greater than the existing value */ if (sl > hawk->parse.pragma.rtx_stack_limit) hawk->parse.pragma.rtx_stack_limit = sl; } - else if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("striprecspc"), 0) == 0) + /* NOTE: trait = is an intended assignment */ + else if (((trait = HAWK_STRIPRECSPC) && hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("striprecspc"), 0) == 0) || + ((trait = HAWK_STRIPSTRSPC) && hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("stripstrspc"), 0) == 0) || + ((trait = HAWK_NUMSTRDETECT) && hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("numstrdetect"), 0) == 0)) { /* @pragma striprecspc on * @pragma striprecspc off + * @pragma stripstrspc on + * @pragma stripstrspc off + * @pragma numstrdetect on + * @pragma numstrdetect off * * Take note the global STRIPRECSPC is available for context based change. * STRIPRECSPC takes precedence over this pragma. */ int is_on; + hawk_oocs_t value; if (get_token(hawk) <= -1) return -1; if (!MATCH(hawk, TOK_IDENT)) { error_ident_on_off_expected_for_striprecspc: - hawk_seterrfmt (hawk, &hawk->ptok.loc, HAWK_EIDENT, HAWK_T("identifier 'on' or 'off' expected for 'striprecspc'")); + hawk_seterrfmt (hawk, &hawk->ptok.loc, HAWK_EIDENT, HAWK_T("identifier 'on' or 'off' expected for '%.*js'"), name.len, name.ptr); return -1; } - name.len = HAWK_OOECS_LEN(hawk->tok.name); - name.ptr = HAWK_OOECS_PTR(hawk->tok.name); - if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("on"), 0) == 0) - { - is_on = 1; - } - else if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("off"), 0) == 0) - { - is_on = 0; - } - else - { - goto error_ident_on_off_expected_for_striprecspc; - } + value.len = HAWK_OOECS_LEN(hawk->tok.name); + value.ptr = HAWK_OOECS_PTR(hawk->tok.name); + if (hawk_comp_oochars_oocstr(value.ptr, value.len, HAWK_T("on"), 0) == 0) is_on = 1; + else if (hawk_comp_oochars_oocstr(value.ptr, value.len, HAWK_T("off"), 0) == 0) is_on = 0; + else goto error_ident_on_off_expected_for_striprecspc; if (hawk->sio.inp == &hawk->sio.arg) { - /* only the top level source. ignore striprecspc pragma in other levels */ + /* only the top level source. ignore the specified pragma in other levels */ if (is_on) - hawk->parse.pragma.trait |= HAWK_STRIPRECSPC; + hawk->parse.pragma.trait |= trait; else - hawk->parse.pragma.trait &= ~HAWK_STRIPRECSPC; - } - } - else if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("stripstrspc"), 0) == 0) - { - /* @pragma stripstrspc on - * @pragma stripstrspc off - */ - int is_on; - - if (get_token(hawk) <= -1) return -1; - if (!MATCH(hawk, TOK_IDENT)) - { - error_ident_on_off_expected_for_stripstrspc: - hawk_seterrfmt (hawk, &hawk->ptok.loc, HAWK_EIDENT, HAWK_T("identifier 'on' or 'off' expected for 'stripstrspc'")); - return -1; - } - - name.len = HAWK_OOECS_LEN(hawk->tok.name); - name.ptr = HAWK_OOECS_PTR(hawk->tok.name); - if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("on"), 0) == 0) - { - is_on = 1; - } - else if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("off"), 0) == 0) - { - is_on = 0; - } - else - { - goto error_ident_on_off_expected_for_stripstrspc; - } - - if (hawk->sio.inp == &hawk->sio.arg) - { - /* only the top level source. ignore stripstrspc pragma in other levels */ - if (is_on) - hawk->parse.pragma.trait |= HAWK_STRIPSTRSPC; - else - hawk->parse.pragma.trait &= ~HAWK_STRIPSTRSPC; + hawk->parse.pragma.trait &= ~trait; } } else diff --git a/hawk/lib/rec.c b/hawk/lib/rec.c index d31b87e5..9f413ff3 100644 --- a/hawk/lib/rec.c +++ b/hawk/lib/rec.c @@ -48,8 +48,9 @@ int hawk_rtx_setrec (hawk_rtx_t* rtx, hawk_oow_t idx, const hawk_oocs_t* str, in if (split_record(rtx, prefer_number) <= -1) goto oops; - v = prefer_number? hawk_rtx_makenumorstrvalwithoochars(rtx, HAWK_OOECS_PTR(&rtx->inrec.line), HAWK_OOECS_LEN(&rtx->inrec.line)): - hawk_rtx_makenstrvalwithoochars(rtx, HAWK_OOECS_PTR(&rtx->inrec.line), HAWK_OOECS_LEN(&rtx->inrec.line)); + v = prefer_number? hawk_rtx_makenumorstrvalwithoochars(rtx, HAWK_OOECS_PTR(&rtx->inrec.line), HAWK_OOECS_LEN(&rtx->inrec.line)): /* number or string */ + hawk_rtx_makenstrvalwithoochars(rtx, HAWK_OOECS_PTR(&rtx->inrec.line), HAWK_OOECS_LEN(&rtx->inrec.line)); /* str with nstr flag */ + if (HAWK_UNLIKELY(!v)) goto oops; } else diff --git a/hawk/lib/run.c b/hawk/lib/run.c index 6dedcffd..2eed0a08 100644 --- a/hawk/lib/run.c +++ b/hawk/lib/run.c @@ -472,6 +472,22 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t break; } + case HAWK_GBL_NUMSTRDETECT: + { + hawk_int_t l; + hawk_flt_t r; + int vt; + + vt = hawk_rtx_valtonum(rtx, val, &l, &r); + if (vt <= -1) return -1; + + if (vt == 0) + rtx->gbl.numstrdetect = ((l > 0)? 1: (l < 0)? -1: 0); + else + rtx->gbl.numstrdetect = ((r > 0.0)? 1: (r < 0.0)? -1: 0); + break; + } + case HAWK_GBL_OFMT: { hawk_oow_t i; @@ -1095,6 +1111,7 @@ static int init_rtx (hawk_rtx_t* rtx, hawk_t* hawk, hawk_rio_cbs_t* rio) rtx->gbl.ignorecase = 0; rtx->gbl.striprecspc = -1; /* means 'not set' */ rtx->gbl.stripstrspc = -1; /* means 'not set' */ + rtx->gbl.numstrdetect = -1; /* means 'not set' */ return 0; diff --git a/hawk/lib/val.c b/hawk/lib/val.c index fdb56671..023333df 100644 --- a/hawk/lib/val.c +++ b/hawk/lib/val.c @@ -741,9 +741,12 @@ hawk_val_t* hawk_rtx_makenumorstrvalwithuchars (hawk_rtx_t* rtx, const hawk_uch_ if (len == 1 && ptr[0] == '.') goto make_str; - x = hawk_uchars_to_num(HAWK_OOCHARS_TO_NUM_MAKE_OPTION(1, 1, HAWK_RTX_IS_STRIPSTRSPC_ON(rtx), 0), ptr, len, &l, &r); - if (x == 0) return hawk_rtx_makeintval(rtx, l); - else if (x >= 1) return hawk_rtx_makefltval(rtx, r); + if (HAWK_RTX_IS_NUMSTRDETECT_ON(rtx)) + { + x = hawk_uchars_to_num(HAWK_OOCHARS_TO_NUM_MAKE_OPTION(1, 1, HAWK_RTX_IS_STRIPSTRSPC_ON(rtx), 0), ptr, len, &l, &r); + if (x == 0) return hawk_rtx_makeintval(rtx, l); + else if (x >= 1) return hawk_rtx_makefltval(rtx, r); + } make_str: return hawk_rtx_makestrvalwithuchars(rtx, ptr, len); @@ -757,9 +760,12 @@ hawk_val_t* hawk_rtx_makenumorstrvalwithbchars (hawk_rtx_t* rtx, const hawk_bch_ if (len == 1 && ptr[0] == '.') goto make_str; - x = hawk_bchars_to_num(HAWK_OOCHARS_TO_NUM_MAKE_OPTION(1, 1, HAWK_RTX_IS_STRIPSTRSPC_ON(rtx), 0), ptr, len, &l, &r); - if (x == 0) return hawk_rtx_makeintval(rtx, l); - else if (x >= 1) return hawk_rtx_makefltval(rtx, r); + if (HAWK_RTX_IS_NUMSTRDETECT_ON(rtx)) + { + x = hawk_bchars_to_num(HAWK_OOCHARS_TO_NUM_MAKE_OPTION(1, 1, HAWK_RTX_IS_STRIPSTRSPC_ON(rtx), 0), ptr, len, &l, &r); + if (x == 0) return hawk_rtx_makeintval(rtx, l); + else if (x >= 1) return hawk_rtx_makefltval(rtx, r); + } make_str: return hawk_rtx_makestrvalwithbchars(rtx, ptr, len); @@ -971,9 +977,12 @@ hawk_val_t* hawk_rtx_makenumormbsvalwithuchars (hawk_rtx_t* rtx, const hawk_uch_ if (len == 1 && ptr[0] == '.') goto make_str; - x = hawk_uchars_to_num(HAWK_OOCHARS_TO_NUM_MAKE_OPTION(1, 1, HAWK_RTX_IS_STRIPSTRSPC_ON(rtx), 0), ptr, len, &l, &r); - if (x == 0) return hawk_rtx_makeintval(rtx, l); - else if (x >= 1) return hawk_rtx_makefltval(rtx, r); + if (HAWK_RTX_IS_NUMSTRDETECT_ON(rtx)) + { + x = hawk_uchars_to_num(HAWK_OOCHARS_TO_NUM_MAKE_OPTION(1, 1, HAWK_RTX_IS_STRIPSTRSPC_ON(rtx), 0), ptr, len, &l, &r); + if (x == 0) return hawk_rtx_makeintval(rtx, l); + else if (x >= 1) return hawk_rtx_makefltval(rtx, r); + } make_str: return hawk_rtx_makembsvalwithuchars(rtx, ptr, len); @@ -987,9 +996,12 @@ hawk_val_t* hawk_rtx_makenumormbsvalwithbchars (hawk_rtx_t* rtx, const hawk_bch_ if (len == 1 && ptr[0] == '.') goto make_str; - x = hawk_bchars_to_num(HAWK_OOCHARS_TO_NUM_MAKE_OPTION(1, 1, HAWK_RTX_IS_STRIPSTRSPC_ON(rtx), 0), ptr, len, &l, &r); - if (x == 0) return hawk_rtx_makeintval(rtx, l); - else if (x >= 1) return hawk_rtx_makefltval(rtx, r); + if (HAWK_RTX_IS_NUMSTRDETECT_ON(rtx)) + { + x = hawk_bchars_to_num(HAWK_OOCHARS_TO_NUM_MAKE_OPTION(1, 1, HAWK_RTX_IS_STRIPSTRSPC_ON(rtx), 0), ptr, len, &l, &r); + if (x == 0) return hawk_rtx_makeintval(rtx, l); + else if (x >= 1) return hawk_rtx_makefltval(rtx, r); + } make_str: return hawk_rtx_makembsvalwithbchars(rtx, ptr, len);