added a new @pragma numstrdetect on/off

added HAWK_NUMSTRDETECT
This commit is contained in:
hyung-hwan 2021-10-14 05:00:55 +00:00
parent 2e3f1a5463
commit 3b51b1290e
7 changed files with 94 additions and 111 deletions

View File

@ -507,6 +507,7 @@ struct opttab_t
{ "pablock", HAWK_PABLOCK, "enable pattern-action loop" }, { "pablock", HAWK_PABLOCK, "enable pattern-action loop" },
{ "rexbound", HAWK_REXBOUND, "enable {n,m} in a regular expression" }, { "rexbound", HAWK_REXBOUND, "enable {n,m} in a regular expression" },
{ "ncmponstr", HAWK_NCMPONSTR, "perform numeric comparsion on numeric strings" }, { "ncmponstr", HAWK_NCMPONSTR, "perform numeric comparsion on numeric strings" },
{ "numstrdetect", HAWK_NUMSTRDETECT, "detect a numeric string and convert it to a number" },
{ "strictnaming", HAWK_STRICTNAMING, "enable the strict naming rule" }, { "strictnaming", HAWK_STRICTNAMING, "enable the strict naming rule" },
{ "tolerant", HAWK_TOLERANT, "make more fault-tolerant" }, { "tolerant", HAWK_TOLERANT, "make more fault-tolerant" },
{ HAWK_NULL, 0, HAWK_NULL } { HAWK_NULL, 0, HAWK_NULL }
@ -652,6 +653,7 @@ static int process_argv (int argc, hawk_bch_t* argv[], struct arg_t* arg)
{ ":pablock", '\0' }, { ":pablock", '\0' },
{ ":rexbound", '\0' }, { ":rexbound", '\0' },
{ ":ncmponstr", '\0' }, { ":ncmponstr", '\0' },
{ ":numstrdetect", '\0' },
{ ":strictnaming", '\0' }, { ":strictnaming", '\0' },
{ ":tolerant", '\0' }, { ":tolerant", '\0' },

View File

@ -481,6 +481,7 @@ struct hawk_rtx_t
int ignorecase; int ignorecase;
int striprecspc; int striprecspc;
int stripstrspc; int stripstrspc;
int numstrdetect;
hawk_int_t nr; hawk_int_t nr;
hawk_int_t fnr; hawk_int_t fnr;
@ -605,9 +606,7 @@ static HAWK_INLINE void HAWK_RTX_STACK_POP (hawk_rtx_t* rtx)
#define HAWK_RTX_IS_STRIPRECSPC_ON(rtx) ((rtx)->gbl.striprecspc > 0 || ((rtx)->gbl.striprecspc < 0 && ((rtx)->hawk->parse.pragma.trait & HAWK_STRIPRECSPC))) #define HAWK_RTX_IS_STRIPRECSPC_ON(rtx) ((rtx)->gbl.striprecspc > 0 || ((rtx)->gbl.striprecspc < 0 && ((rtx)->hawk->parse.pragma.trait & HAWK_STRIPRECSPC)))
#define HAWK_RTX_IS_STRIPSTRSPC_ON(rtx) ((rtx)->gbl.stripstrspc > 0 || ((rtx)->gbl.stripstrspc < 0 && ((rtx)->hawk->parse.pragma.trait & HAWK_STRIPSTRSPC))) #define HAWK_RTX_IS_STRIPSTRSPC_ON(rtx) ((rtx)->gbl.stripstrspc > 0 || ((rtx)->gbl.stripstrspc < 0 && ((rtx)->hawk->parse.pragma.trait & HAWK_STRIPSTRSPC)))
#define HAWK_RTX_IS_NUMSTRDETECT_ON(rtx) ((rtx)->gbl.numstrdetect > 0 || ((rtx)->gbl.stripstrspc < 0 && ((rtx)->hawk->parse.pragma.trait & HAWK_NUMSTRDETECT)))
#if !defined(HAWK_DEFAULT_MODLIBDIRS) #if !defined(HAWK_DEFAULT_MODLIBDIRS)
# define HAWK_DEFAULT_MODLIBDIRS "" # define HAWK_DEFAULT_MODLIBDIRS ""

View File

@ -1317,6 +1317,12 @@ enum hawk_trait_t
*/ */
HAWK_TOLERANT = (1 << 17), HAWK_TOLERANT = (1 << 17),
/*
* detect a numeric string and convert to a numeric type
* automatically
*/
HAWK_NUMSTRDETECT = (1 << 18),
/** /**
* makes #hawk_t to behave compatibly with classical AWK * makes #hawk_t to behave compatibly with classical AWK
* implementations * implementations
@ -1324,11 +1330,11 @@ enum hawk_trait_t
HAWK_CLASSIC = HAWK_CLASSIC =
HAWK_IMPLICIT | HAWK_RIO | HAWK_IMPLICIT | HAWK_RIO |
HAWK_NEWLINE | HAWK_BLANKCONCAT | HAWK_PABLOCK | HAWK_NEWLINE | HAWK_BLANKCONCAT | HAWK_PABLOCK |
HAWK_STRIPSTRSPC | HAWK_STRICTNAMING, HAWK_STRIPSTRSPC | HAWK_STRICTNAMING | HAWK_NUMSTRDETECT,
HAWK_MODERN = HAWK_MODERN =
HAWK_CLASSIC | HAWK_FLEXMAP | HAWK_REXBOUND | HAWK_CLASSIC | HAWK_FLEXMAP | HAWK_REXBOUND |
HAWK_RWPIPE | HAWK_TOLERANT | HAWK_NEXTOFILE /*| HAWK_NCMPONSTR*/ HAWK_RWPIPE | HAWK_TOLERANT | HAWK_NEXTOFILE | HAWK_NUMSTRDETECT /*| HAWK_NCMPONSTR*/
}; };
typedef enum hawk_trait_t hawk_trait_t; typedef enum hawk_trait_t hawk_trait_t;
@ -1380,6 +1386,7 @@ enum hawk_gbl_id_t
HAWK_GBL_IGNORECASE, HAWK_GBL_IGNORECASE,
HAWK_GBL_NF, HAWK_GBL_NF,
HAWK_GBL_NR, HAWK_GBL_NR,
HAWK_GBL_NUMSTRDETECT,
HAWK_GBL_OFILENAME, HAWK_GBL_OFILENAME,
HAWK_GBL_OFMT, HAWK_GBL_OFMT,
HAWK_GBL_OFS, HAWK_GBL_OFS,

View File

@ -334,6 +334,9 @@ static global_t gtab[] =
/* input record number */ /* input record number */
{ HAWK_T("NR"), 2, HAWK_PABLOCK }, { HAWK_T("NR"), 2, HAWK_PABLOCK },
/* detect a numeric string */
{ HAWK_T("NUMSTRDETECT"), 12, 0 },
/* current output file name */ /* current output file name */
{ HAWK_T("OFILENAME"), 9, HAWK_PABLOCK | HAWK_NEXTOFILE }, { HAWK_T("OFILENAME"), 9, HAWK_PABLOCK | HAWK_NEXTOFILE },
@ -959,6 +962,7 @@ static int parse_progunit (hawk_t* hawk)
else if (MATCH(hawk, TOK_XPRAGMA)) else if (MATCH(hawk, TOK_XPRAGMA))
{ {
hawk_oocs_t name; hawk_oocs_t name;
int trait;
if (get_token(hawk) <= -1) return -1; if (get_token(hawk) <= -1) return -1;
if (!MATCH(hawk, TOK_IDENT)) if (!MATCH(hawk, TOK_IDENT))
@ -997,58 +1001,39 @@ static int parse_progunit (hawk_t* hawk)
hawk_copy_oochars_to_oocstr (hawk->parse.pragma.entry, HAWK_COUNTOF(hawk->parse.pragma.entry), HAWK_OOECS_PTR(hawk->tok.name), HAWK_OOECS_LEN(hawk->tok.name)); hawk_copy_oochars_to_oocstr (hawk->parse.pragma.entry, HAWK_COUNTOF(hawk->parse.pragma.entry), HAWK_OOECS_PTR(hawk->tok.name), HAWK_OOECS_LEN(hawk->tok.name));
} }
} }
else if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("implicit"), 0) == 0) /* NOTE: trait = is an intended assignment */
else if (((trait = HAWK_IMPLICIT) && hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("implicit"), 0) == 0) ||
((trait = HAWK_MULTILINESTR) && hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("multilinestr"), 0) == 0))
{ {
/* @pragma implicit on /* @pragma implicit on
* @pragma implicit off */ * @pragma implicit off
* @pragma multilinestr on
* @pragma multilinestr off */
hawk_oocs_t value;
if (get_token(hawk) <= -1) return -1; if (get_token(hawk) <= -1) return -1;
if (!MATCH(hawk, TOK_IDENT)) if (!MATCH(hawk, TOK_IDENT))
{ {
error_ident_on_off_expected_for_implicit: error_ident_on_off_expected_for_implicit:
hawk_seterrfmt (hawk, &hawk->ptok.loc, HAWK_EIDENT, HAWK_T("identifier 'on' or 'off' expected for 'implicit'")); hawk_seterrfmt (hawk, &hawk->ptok.loc, HAWK_EIDENT, HAWK_T("identifier 'on' or 'off' expected for '%.*js'"), name.len, name.ptr);
return -1; return -1;
} }
name.len = HAWK_OOECS_LEN(hawk->tok.name); value.len = HAWK_OOECS_LEN(hawk->tok.name);
name.ptr = HAWK_OOECS_PTR(hawk->tok.name); value.ptr = HAWK_OOECS_PTR(hawk->tok.name);
if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("on"), 0) == 0) if (hawk_comp_oochars_oocstr(value.ptr, value.len, HAWK_T("on"), 0) == 0)
{ {
hawk->parse.pragma.trait |= HAWK_IMPLICIT; hawk->parse.pragma.trait |= trait;
} }
else if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("off"), 0) == 0) else if (hawk_comp_oochars_oocstr(value.ptr, value.len, HAWK_T("off"), 0) == 0)
{ {
hawk->parse.pragma.trait &= ~HAWK_IMPLICIT; hawk->parse.pragma.trait &= ~trait;
} }
else else
{ {
goto error_ident_on_off_expected_for_implicit; goto error_ident_on_off_expected_for_implicit;
} }
} }
else if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("multilinestr"), 0) == 0)
{
if (get_token(hawk) <= -1) return -1;
if (!MATCH(hawk, TOK_IDENT))
{
error_ident_on_off_expected_for_multilinestr:
hawk_seterrfmt (hawk, &hawk->ptok.loc, HAWK_EIDENT, HAWK_T("identifier 'on' or 'off' expected for 'multilinestr'"));
return -1;
}
name.len = HAWK_OOECS_LEN(hawk->tok.name);
name.ptr = HAWK_OOECS_PTR(hawk->tok.name);
if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("on"), 0) == 0)
{
hawk->parse.pragma.trait |= HAWK_MULTILINESTR;
}
else if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("off"), 0) == 0)
{
hawk->parse.pragma.trait &= ~HAWK_MULTILINESTR;
}
else
{
goto error_ident_on_off_expected_for_multilinestr;
}
}
/* --------------------------------------------------------------------- /* ---------------------------------------------------------------------
* the pragmas up to this point affect the parser * the pragmas up to this point affect the parser
* the following pragmas affect runtime * the following pragmas affect runtime
@ -1071,85 +1056,45 @@ static int parse_progunit (hawk_t* hawk)
/* take the specified value if it's greater than the existing value */ /* take the specified value if it's greater than the existing value */
if (sl > hawk->parse.pragma.rtx_stack_limit) hawk->parse.pragma.rtx_stack_limit = sl; if (sl > hawk->parse.pragma.rtx_stack_limit) hawk->parse.pragma.rtx_stack_limit = sl;
} }
else if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("striprecspc"), 0) == 0) /* NOTE: trait = is an intended assignment */
else if (((trait = HAWK_STRIPRECSPC) && hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("striprecspc"), 0) == 0) ||
((trait = HAWK_STRIPSTRSPC) && hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("stripstrspc"), 0) == 0) ||
((trait = HAWK_NUMSTRDETECT) && hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("numstrdetect"), 0) == 0))
{ {
/* @pragma striprecspc on /* @pragma striprecspc on
* @pragma striprecspc off * @pragma striprecspc off
* @pragma stripstrspc on
* @pragma stripstrspc off
* @pragma numstrdetect on
* @pragma numstrdetect off
* *
* Take note the global STRIPRECSPC is available for context based change. * Take note the global STRIPRECSPC is available for context based change.
* STRIPRECSPC takes precedence over this pragma. * STRIPRECSPC takes precedence over this pragma.
*/ */
int is_on; int is_on;
hawk_oocs_t value;
if (get_token(hawk) <= -1) return -1; if (get_token(hawk) <= -1) return -1;
if (!MATCH(hawk, TOK_IDENT)) if (!MATCH(hawk, TOK_IDENT))
{ {
error_ident_on_off_expected_for_striprecspc: error_ident_on_off_expected_for_striprecspc:
hawk_seterrfmt (hawk, &hawk->ptok.loc, HAWK_EIDENT, HAWK_T("identifier 'on' or 'off' expected for 'striprecspc'")); hawk_seterrfmt (hawk, &hawk->ptok.loc, HAWK_EIDENT, HAWK_T("identifier 'on' or 'off' expected for '%.*js'"), name.len, name.ptr);
return -1; return -1;
} }
name.len = HAWK_OOECS_LEN(hawk->tok.name); value.len = HAWK_OOECS_LEN(hawk->tok.name);
name.ptr = HAWK_OOECS_PTR(hawk->tok.name); value.ptr = HAWK_OOECS_PTR(hawk->tok.name);
if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("on"), 0) == 0) if (hawk_comp_oochars_oocstr(value.ptr, value.len, HAWK_T("on"), 0) == 0) is_on = 1;
{ else if (hawk_comp_oochars_oocstr(value.ptr, value.len, HAWK_T("off"), 0) == 0) is_on = 0;
is_on = 1; else goto error_ident_on_off_expected_for_striprecspc;
}
else if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("off"), 0) == 0)
{
is_on = 0;
}
else
{
goto error_ident_on_off_expected_for_striprecspc;
}
if (hawk->sio.inp == &hawk->sio.arg) if (hawk->sio.inp == &hawk->sio.arg)
{ {
/* only the top level source. ignore striprecspc pragma in other levels */ /* only the top level source. ignore the specified pragma in other levels */
if (is_on) if (is_on)
hawk->parse.pragma.trait |= HAWK_STRIPRECSPC; hawk->parse.pragma.trait |= trait;
else else
hawk->parse.pragma.trait &= ~HAWK_STRIPRECSPC; hawk->parse.pragma.trait &= ~trait;
}
}
else if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("stripstrspc"), 0) == 0)
{
/* @pragma stripstrspc on
* @pragma stripstrspc off
*/
int is_on;
if (get_token(hawk) <= -1) return -1;
if (!MATCH(hawk, TOK_IDENT))
{
error_ident_on_off_expected_for_stripstrspc:
hawk_seterrfmt (hawk, &hawk->ptok.loc, HAWK_EIDENT, HAWK_T("identifier 'on' or 'off' expected for 'stripstrspc'"));
return -1;
}
name.len = HAWK_OOECS_LEN(hawk->tok.name);
name.ptr = HAWK_OOECS_PTR(hawk->tok.name);
if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("on"), 0) == 0)
{
is_on = 1;
}
else if (hawk_comp_oochars_oocstr(name.ptr, name.len, HAWK_T("off"), 0) == 0)
{
is_on = 0;
}
else
{
goto error_ident_on_off_expected_for_stripstrspc;
}
if (hawk->sio.inp == &hawk->sio.arg)
{
/* only the top level source. ignore stripstrspc pragma in other levels */
if (is_on)
hawk->parse.pragma.trait |= HAWK_STRIPSTRSPC;
else
hawk->parse.pragma.trait &= ~HAWK_STRIPSTRSPC;
} }
} }
else else

View File

@ -48,8 +48,9 @@ int hawk_rtx_setrec (hawk_rtx_t* rtx, hawk_oow_t idx, const hawk_oocs_t* str, in
if (split_record(rtx, prefer_number) <= -1) goto oops; if (split_record(rtx, prefer_number) <= -1) goto oops;
v = prefer_number? hawk_rtx_makenumorstrvalwithoochars(rtx, HAWK_OOECS_PTR(&rtx->inrec.line), HAWK_OOECS_LEN(&rtx->inrec.line)): v = prefer_number? hawk_rtx_makenumorstrvalwithoochars(rtx, HAWK_OOECS_PTR(&rtx->inrec.line), HAWK_OOECS_LEN(&rtx->inrec.line)): /* number or string */
hawk_rtx_makenstrvalwithoochars(rtx, HAWK_OOECS_PTR(&rtx->inrec.line), HAWK_OOECS_LEN(&rtx->inrec.line)); hawk_rtx_makenstrvalwithoochars(rtx, HAWK_OOECS_PTR(&rtx->inrec.line), HAWK_OOECS_LEN(&rtx->inrec.line)); /* str with nstr flag */
if (HAWK_UNLIKELY(!v)) goto oops; if (HAWK_UNLIKELY(!v)) goto oops;
} }
else else

View File

@ -472,6 +472,22 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t
break; break;
} }
case HAWK_GBL_NUMSTRDETECT:
{
hawk_int_t l;
hawk_flt_t r;
int vt;
vt = hawk_rtx_valtonum(rtx, val, &l, &r);
if (vt <= -1) return -1;
if (vt == 0)
rtx->gbl.numstrdetect = ((l > 0)? 1: (l < 0)? -1: 0);
else
rtx->gbl.numstrdetect = ((r > 0.0)? 1: (r < 0.0)? -1: 0);
break;
}
case HAWK_GBL_OFMT: case HAWK_GBL_OFMT:
{ {
hawk_oow_t i; hawk_oow_t i;
@ -1095,6 +1111,7 @@ static int init_rtx (hawk_rtx_t* rtx, hawk_t* hawk, hawk_rio_cbs_t* rio)
rtx->gbl.ignorecase = 0; rtx->gbl.ignorecase = 0;
rtx->gbl.striprecspc = -1; /* means 'not set' */ rtx->gbl.striprecspc = -1; /* means 'not set' */
rtx->gbl.stripstrspc = -1; /* means 'not set' */ rtx->gbl.stripstrspc = -1; /* means 'not set' */
rtx->gbl.numstrdetect = -1; /* means 'not set' */
return 0; return 0;

View File

@ -741,9 +741,12 @@ hawk_val_t* hawk_rtx_makenumorstrvalwithuchars (hawk_rtx_t* rtx, const hawk_uch_
if (len == 1 && ptr[0] == '.') goto make_str; if (len == 1 && ptr[0] == '.') goto make_str;
x = hawk_uchars_to_num(HAWK_OOCHARS_TO_NUM_MAKE_OPTION(1, 1, HAWK_RTX_IS_STRIPSTRSPC_ON(rtx), 0), ptr, len, &l, &r); if (HAWK_RTX_IS_NUMSTRDETECT_ON(rtx))
if (x == 0) return hawk_rtx_makeintval(rtx, l); {
else if (x >= 1) return hawk_rtx_makefltval(rtx, r); x = hawk_uchars_to_num(HAWK_OOCHARS_TO_NUM_MAKE_OPTION(1, 1, HAWK_RTX_IS_STRIPSTRSPC_ON(rtx), 0), ptr, len, &l, &r);
if (x == 0) return hawk_rtx_makeintval(rtx, l);
else if (x >= 1) return hawk_rtx_makefltval(rtx, r);
}
make_str: make_str:
return hawk_rtx_makestrvalwithuchars(rtx, ptr, len); return hawk_rtx_makestrvalwithuchars(rtx, ptr, len);
@ -757,9 +760,12 @@ hawk_val_t* hawk_rtx_makenumorstrvalwithbchars (hawk_rtx_t* rtx, const hawk_bch_
if (len == 1 && ptr[0] == '.') goto make_str; if (len == 1 && ptr[0] == '.') goto make_str;
x = hawk_bchars_to_num(HAWK_OOCHARS_TO_NUM_MAKE_OPTION(1, 1, HAWK_RTX_IS_STRIPSTRSPC_ON(rtx), 0), ptr, len, &l, &r); if (HAWK_RTX_IS_NUMSTRDETECT_ON(rtx))
if (x == 0) return hawk_rtx_makeintval(rtx, l); {
else if (x >= 1) return hawk_rtx_makefltval(rtx, r); x = hawk_bchars_to_num(HAWK_OOCHARS_TO_NUM_MAKE_OPTION(1, 1, HAWK_RTX_IS_STRIPSTRSPC_ON(rtx), 0), ptr, len, &l, &r);
if (x == 0) return hawk_rtx_makeintval(rtx, l);
else if (x >= 1) return hawk_rtx_makefltval(rtx, r);
}
make_str: make_str:
return hawk_rtx_makestrvalwithbchars(rtx, ptr, len); return hawk_rtx_makestrvalwithbchars(rtx, ptr, len);
@ -971,9 +977,12 @@ hawk_val_t* hawk_rtx_makenumormbsvalwithuchars (hawk_rtx_t* rtx, const hawk_uch_
if (len == 1 && ptr[0] == '.') goto make_str; if (len == 1 && ptr[0] == '.') goto make_str;
x = hawk_uchars_to_num(HAWK_OOCHARS_TO_NUM_MAKE_OPTION(1, 1, HAWK_RTX_IS_STRIPSTRSPC_ON(rtx), 0), ptr, len, &l, &r); if (HAWK_RTX_IS_NUMSTRDETECT_ON(rtx))
if (x == 0) return hawk_rtx_makeintval(rtx, l); {
else if (x >= 1) return hawk_rtx_makefltval(rtx, r); x = hawk_uchars_to_num(HAWK_OOCHARS_TO_NUM_MAKE_OPTION(1, 1, HAWK_RTX_IS_STRIPSTRSPC_ON(rtx), 0), ptr, len, &l, &r);
if (x == 0) return hawk_rtx_makeintval(rtx, l);
else if (x >= 1) return hawk_rtx_makefltval(rtx, r);
}
make_str: make_str:
return hawk_rtx_makembsvalwithuchars(rtx, ptr, len); return hawk_rtx_makembsvalwithuchars(rtx, ptr, len);
@ -987,9 +996,12 @@ hawk_val_t* hawk_rtx_makenumormbsvalwithbchars (hawk_rtx_t* rtx, const hawk_bch_
if (len == 1 && ptr[0] == '.') goto make_str; if (len == 1 && ptr[0] == '.') goto make_str;
x = hawk_bchars_to_num(HAWK_OOCHARS_TO_NUM_MAKE_OPTION(1, 1, HAWK_RTX_IS_STRIPSTRSPC_ON(rtx), 0), ptr, len, &l, &r); if (HAWK_RTX_IS_NUMSTRDETECT_ON(rtx))
if (x == 0) return hawk_rtx_makeintval(rtx, l); {
else if (x >= 1) return hawk_rtx_makefltval(rtx, r); x = hawk_bchars_to_num(HAWK_OOCHARS_TO_NUM_MAKE_OPTION(1, 1, HAWK_RTX_IS_STRIPSTRSPC_ON(rtx), 0), ptr, len, &l, &r);
if (x == 0) return hawk_rtx_makeintval(rtx, l);
else if (x >= 1) return hawk_rtx_makefltval(rtx, r);
}
make_str: make_str:
return hawk_rtx_makembsvalwithbchars(rtx, ptr, len); return hawk_rtx_makembsvalwithbchars(rtx, ptr, len);