From 0e989c619c8fdb767e0a5f3526b7d58a12ccaf00 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Tue, 4 Mar 2008 03:31:41 +0000 Subject: [PATCH] --- ase/awk/awk_i.h | 10 ++- ase/awk/extio.c | 8 +- ase/awk/func.c | 28 +++--- ase/awk/makefile.in | 6 +- ase/awk/misc.c | 46 +++++++++- ase/awk/misc.h | 9 ++ ase/awk/parse.c | 2 +- ase/awk/run.c | 27 +++--- ase/awk/val.c | 8 +- ase/cmn/makefile.in | 4 + ase/{awk => cmn}/rex.c | 189 +++++++++++++++++++++-------------------- ase/{awk => cmn}/rex.h | 53 ++++++++---- 12 files changed, 238 insertions(+), 152 deletions(-) rename ase/{awk => cmn}/rex.c (90%) rename ase/{awk => cmn}/rex.h (52%) diff --git a/ase/awk/awk_i.h b/ase/awk/awk_i.h index 5570f868..bd214c7e 100644 --- a/ase/awk/awk_i.h +++ b/ase/awk/awk_i.h @@ -10,12 +10,12 @@ #include #include #include +#include typedef struct ase_awk_chain_t ase_awk_chain_t; typedef struct ase_awk_tree_t ase_awk_tree_t; #include -#include #include #include #include @@ -365,4 +365,12 @@ struct ase_awk_run_t ase_awk_runcbs_t* cbs; }; + +#define ASE_AWK_FREEREX(awk,code) ase_freerex(&(awk)->prmfns.mmgr,code) +#define ASE_AWK_ISEMPTYREX(awk,code) ase_isemptyrex(code) +#define ASE_AWK_BUILDREX(awk,ptn,len,errnum) \ + ase_awk_buildrex(awk,ptn,len,errnum) +#define ASE_AWK_MATCHREX(awk,code,option,str,len,match_ptr,match_len,errnum) \ + ase_awk_matchrex(awk,code,option,str,len,match_ptr,match_len,errnum) + #endif diff --git a/ase/awk/extio.c b/ase/awk/extio.c index 41205b93..fb944656 100644 --- a/ase/awk/extio.c +++ b/ase/awk/extio.c @@ -260,9 +260,9 @@ int ase_awk_readextio ( ASE_ASSERT (run->global.rs != ASE_NULL); - n = ase_awk_matchrex ( + n = ASE_AWK_MATCHREX ( run->awk, run->global.rs, - ((run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0), + ((run->global.ignorecase)? ASE_REX_IGNORECASE: 0), ASE_STR_BUF(buf), ASE_STR_LEN(buf), &match_ptr, &match_len, &run->errnum); if (n == -1) @@ -348,9 +348,9 @@ int ase_awk_readextio ( ASE_ASSERT (run->global.rs != ASE_NULL); - n = ase_awk_matchrex ( + n = ASE_AWK_MATCHREX ( run->awk, run->global.rs, - ((run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0), + ((run->global.ignorecase)? ASE_REX_IGNORECASE: 0), ASE_STR_BUF(buf), ASE_STR_LEN(buf), &match_ptr, &match_len, &run->errnum); if (n == -1) diff --git a/ase/awk/func.c b/ase/awk/func.c index d0501825..aedade25 100644 --- a/ase/awk/func.c +++ b/ase/awk/func.c @@ -702,7 +702,7 @@ static int bfn_split ( if (fs_len > 1) { - fs_rex = ase_awk_buildrex ( + fs_rex = ASE_AWK_BUILDREX ( run->awk, fs_ptr, fs_len, &errnum); if (fs_rex == ASE_NULL) { @@ -725,7 +725,7 @@ static int bfn_split ( if (fs_free != ASE_NULL) ASE_AWK_FREE (run->awk, fs_free); if (fs_rex_free != ASE_NULL) - ase_awk_freerex (run->awk, fs_rex_free); + ASE_AWK_FREEREX (run->awk, fs_rex_free); /*ase_awk_setrunerrnum (run, ASE_AWK_ENOMEM);*/ return -1; } @@ -756,7 +756,7 @@ static int bfn_split ( if (fs_free != ASE_NULL) ASE_AWK_FREE (run->awk, fs_free); if (fs_rex_free != ASE_NULL) - ase_awk_freerex (run->awk, fs_rex_free); + ASE_AWK_FREEREX (run->awk, fs_rex_free); ase_awk_setrunerrnum (run, errnum); return -1; } @@ -780,7 +780,7 @@ static int bfn_split ( if (fs_free != ASE_NULL) ASE_AWK_FREE (run->awk, fs_free); if (fs_rex_free != ASE_NULL) - ase_awk_freerex (run->awk, fs_rex_free); + ASE_AWK_FREEREX (run->awk, fs_rex_free); /*ase_awk_setrunerrnum (run, ASE_AWK_ENOMEM);*/ return -1; } @@ -807,7 +807,7 @@ static int bfn_split ( if (fs_free != ASE_NULL) ASE_AWK_FREE (run->awk, fs_free); if (fs_rex_free != ASE_NULL) - ase_awk_freerex (run->awk, fs_rex_free); + ASE_AWK_FREEREX (run->awk, fs_rex_free); ase_awk_setrunerrnum (run, ASE_AWK_ENOMEM); return -1; } @@ -818,7 +818,7 @@ static int bfn_split ( if (str_free != ASE_NULL) ASE_AWK_FREE (run->awk, str_free); if (fs_free != ASE_NULL) ASE_AWK_FREE (run->awk, fs_free); - if (fs_rex_free != ASE_NULL) ase_awk_freerex (run->awk, fs_rex_free); + if (fs_rex_free != ASE_NULL) ASE_AWK_FREEREX (run->awk, fs_rex_free); if (sta == 1) num--; @@ -946,7 +946,7 @@ static int __substitute (ase_awk_run_t* run, ase_long_t max_count) } while (0) #define FREE_A0_REX(awk,rex) \ do { \ - if (a0->type != ASE_AWK_VAL_REX) ase_awk_freerex (awk, rex); \ + if (a0->type != ASE_AWK_VAL_REX) ASE_AWK_FREEREX (awk, rex); \ } while (0) if (a0->type == ASE_AWK_VAL_REX) @@ -1053,7 +1053,7 @@ static int __substitute (ase_awk_run_t* run, ase_long_t max_count) if (a0->type != ASE_AWK_VAL_REX) { - rex = ase_awk_buildrex (run->awk, a0_ptr, a0_len, &run->errnum); + rex = ASE_AWK_BUILDREX (run->awk, a0_ptr, a0_len, &run->errnum); if (rex == ASE_NULL) { ase_str_close (&new); @@ -1062,7 +1062,7 @@ static int __substitute (ase_awk_run_t* run, ase_long_t max_count) } } - opt = (run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0; + opt = (run->global.ignorecase)? ASE_REX_IGNORECASE: 0; cur_ptr = a2_ptr; cur_len = a2_len; sub_count = 0; @@ -1071,7 +1071,7 @@ static int __substitute (ase_awk_run_t* run, ase_long_t max_count) { if (max_count == 0 || sub_count < max_count) { - n = ase_awk_matchrex ( + n = ASE_AWK_MATCHREX ( run->awk, rex, opt, cur_ptr, cur_len, &mat_ptr, &mat_len, &run->errnum); } @@ -1270,7 +1270,7 @@ static int bfn_match ( } } - rex = ase_awk_buildrex (run->awk, str1, len1, &run->errnum); + rex = ASE_AWK_BUILDREX (run->awk, str1, len1, &run->errnum); if (rex == ASE_NULL) { if (a0->type != ASE_AWK_VAL_STR) @@ -1281,13 +1281,13 @@ static int bfn_match ( if (a1->type != ASE_AWK_VAL_STR) ASE_AWK_FREE (run->awk, str1); } - opt = (run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0; - n = ase_awk_matchrex ( + opt = (run->global.ignorecase)? ASE_REX_IGNORECASE: 0; + n = ASE_AWK_MATCHREX ( run->awk, rex, opt, str0, len0, &mat_ptr, &mat_len, &run->errnum); if (a0->type != ASE_AWK_VAL_STR) ASE_AWK_FREE (run->awk, str0); - if (a1->type != ASE_AWK_VAL_REX) ase_awk_freerex (run->awk, rex); + if (a1->type != ASE_AWK_VAL_REX) ASE_AWK_FREEREX (run->awk, rex); if (n == -1) return -1; diff --git a/ase/awk/makefile.in b/ase/awk/makefile.in index f0336936..5ca22713 100644 --- a/ase/awk/makefile.in +++ b/ase/awk/makefile.in @@ -52,8 +52,7 @@ OBJ_FILES_LIB = \ $(TMP_DIR)/val.o \ $(TMP_DIR)/func.o \ $(TMP_DIR)/misc.o \ - $(TMP_DIR)/extio.o \ - $(TMP_DIR)/rex.o + $(TMP_DIR)/extio.o OBJ_FILES_JNI = $(TMP_DIR)/jni.o @@ -150,9 +149,6 @@ $(TMP_DIR)/misc.o: misc.c $(TMP_DIR)/extio.o: extio.c $(LIBTOOL_COMPILE) $(CC) $(CFLAGS) -o $@ -c extio.c -$(TMP_DIR)/rex.o: rex.c - $(LIBTOOL_COMPILE) $(CC) $(CFLAGS) -o $@ -c rex.c - $(TMP_DIR)/jni.o: jni.c $(LIBTOOL_COMPILE) $(CC) $(CFLAGS) $(CFLAGS_JNI) -o $@ -c jni.c diff --git a/ase/awk/misc.c b/ase/awk/misc.c index 401bb50b..57a259d5 100644 --- a/ase/awk/misc.c +++ b/ase/awk/misc.c @@ -828,9 +828,9 @@ ase_char_t* ase_awk_strxntokbyrex ( while (len > 0) { - n = ase_awk_matchrex ( + n = ASE_AWK_MATCHREX ( run->awk, rex, - ((run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0), + ((run->global.ignorecase)? ASE_REX_IGNORECASE: 0), ptr, left, (const ase_char_t**)&match_ptr, &match_len, errnum); if (n == -1) return ASE_NULL; @@ -908,3 +908,45 @@ exit_loop: ASE_NULL: (match_ptr+match_len); } } + +#define ASE_AWK_REXERRTOERR(err) \ + ((err == ASE_REX_ENOERR)? ASE_AWK_ENOERR: \ + (err == ASE_REX_ENOMEM)? ASE_AWK_ENOMEM: \ + (err == ASE_REX_ERECUR)? ASE_AWK_EREXRECUR: \ + (err == ASE_REX_ERPAREN)? ASE_AWK_EREXRPAREN: \ + (err == ASE_REX_ERBRACKET)? ASE_AWK_EREXRBRACKET: \ + (err == ASE_REX_ERBRACE)? ASE_AWK_EREXRBRACE: \ + (err == ASE_REX_EUNBALPAR)? ASE_AWK_EREXUNBALPAR: \ + (err == ASE_REX_ECOLON)? ASE_AWK_EREXCOLON: \ + (err == ASE_REX_ECRANGE)? ASE_AWK_EREXCRANGE: \ + (err == ASE_REX_ECCLASS)? ASE_AWK_EREXCCLASS: \ + (err == ASE_REX_EBRANGE)? ASE_AWK_EREXBRANGE: \ + (err == ASE_REX_EEND)? ASE_AWK_EREXEND: \ + (err == ASE_REX_EGARBAGE)? ASE_AWK_EREXGARBAGE: \ + ASE_AWK_EINTERN) + +void* ase_awk_buildrex ( + ase_awk_t* awk, const ase_char_t* ptn, ase_size_t len, int* errnum) +{ + int err; + void* p; + + p = ase_buildrex ( + &awk->prmfns.mmgr, awk->rex.depth.max.build, ptn, len, &err); + if (p == ASE_NULL) *errnum = ASE_AWK_REXERRTOERR(err); + return p; +} + +int ase_awk_matchrex ( + ase_awk_t* awk, void* code, int option, + const ase_char_t* str, ase_size_t len, + const ase_char_t** match_ptr, ase_size_t* match_len, int* errnum) +{ + int err, x; + + x = ase_matchrex ( + &awk->prmfns.mmgr, &awk->prmfns.ccls, awk->rex.depth.max.match, + code, option, str, len, match_ptr, match_len, &err); + if (x < 0) *errnum = ASE_AWK_REXERRTOERR(err); + return x; +} diff --git a/ase/awk/misc.h b/ase/awk/misc.h index f397c3c6..e814e813 100644 --- a/ase/awk/misc.h +++ b/ase/awk/misc.h @@ -37,6 +37,15 @@ ase_char_t* ase_awk_strxntokbyrex ( ase_awk_run_t* run, const ase_char_t* s, ase_size_t len, void* rex, ase_char_t** tok, ase_size_t* tok_len, int* errnum); + +void* ase_awk_buildrex ( + ase_awk_t* awk, const ase_char_t* ptn, ase_size_t len, int* errnum); + +int ase_awk_matchrex ( + ase_awk_t* awk, void* code, int option, + const ase_char_t* str, ase_size_t len, + const ase_char_t** match_ptr, ase_size_t* match_len, int* errnum); + #ifdef __cplusplus } #endif diff --git a/ase/awk/parse.c b/ase/awk/parse.c index 66096880..44d48504 100644 --- a/ase/awk/parse.c +++ b/ase/awk/parse.c @@ -2868,7 +2868,7 @@ static ase_awk_nde_t* parse_primary (ase_awk_t* awk, ase_size_t line) return ASE_NULL; } - nde->code = ase_awk_buildrex (awk, + nde->code = ASE_AWK_BUILDREX (awk, ASE_STR_BUF(&awk->token.name), ASE_STR_LEN(&awk->token.name), &errnum); diff --git a/ase/awk/run.c b/ase/awk/run.c index c9727d7d..cbc18e44 100644 --- a/ase/awk/run.c +++ b/ase/awk/run.c @@ -354,7 +354,7 @@ static int set_global ( /* compile the regular expression */ /* TODO: use safebuild */ - rex = ase_awk_buildrex ( + rex = ASE_AWK_BUILDREX ( run->awk, fs_ptr, fs_len, &run->errnum); if (rex == ASE_NULL) { @@ -365,7 +365,7 @@ static int set_global ( if (run->global.fs != ASE_NULL) { - ase_awk_freerex (run->awk, run->global.fs); + ASE_AWK_FREEREX (run->awk, run->global.fs); } run->global.fs = rex; } @@ -499,7 +499,7 @@ static int set_global ( /* compile the regular expression */ /* TODO: use safebuild */ - rex = ase_awk_buildrex ( + rex = ASE_AWK_BUILDREX ( run->awk, rs_ptr, rs_len, &run->errnum); if (rex == ASE_NULL) { @@ -510,7 +510,7 @@ static int set_global ( if (run->global.rs != ASE_NULL) { - ase_awk_freerex (run->awk, run->global.rs); + ASE_AWK_FREEREX (run->awk, run->global.rs); } run->global.rs = rex; } @@ -3079,8 +3079,7 @@ static ase_awk_val_t* eval_expression (ase_awk_run_t* run, ase_awk_nde_t* nde) /* the record has never been read. * probably, this functions has been triggered * by the statements in the BEGIN block */ - n = ase_awk_isemptyrex ( - run->awk, ((ase_awk_val_rex_t*)v)->code)? 1: 0; + n = ASE_AWK_ISEMPTYREX(run->awk,((ase_awk_val_rex_t*)v)->code)? 1: 0; } else { @@ -3088,10 +3087,10 @@ static ase_awk_val_t* eval_expression (ase_awk_run_t* run, ase_awk_nde_t* nde) run->inrec.d0->type == ASE_AWK_VAL_STR, "the internal value representing $0 should always be of the string type once it has been set/updated. it is nil initially."); - n = ase_awk_matchrex ( + n = ASE_AWK_MATCHREX ( ((ase_awk_run_t*)run)->awk, ((ase_awk_val_rex_t*)v)->code, - ((((ase_awk_run_t*)run)->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0), + ((((ase_awk_run_t*)run)->global.ignorecase)? ASE_REX_IGNORECASE: 0), ((ase_awk_val_str_t*)run->inrec.d0)->buf, ((ase_awk_val_str_t*)run->inrec.d0)->len, ASE_NULL, ASE_NULL, &errnum); @@ -4723,7 +4722,7 @@ static ase_awk_val_t* eval_binop_match0 ( } else if (right->type == ASE_AWK_VAL_STR) { - rex_code = ase_awk_buildrex ( + rex_code = ASE_AWK_BUILDREX ( run->awk, ((ase_awk_val_str_t*)right)->buf, ((ase_awk_val_str_t*)right)->len, &errnum); @@ -4739,7 +4738,7 @@ static ase_awk_val_t* eval_binop_match0 ( run, right, ASE_AWK_VALTOSTR_CLEAR, ASE_NULL, &len); if (str == ASE_NULL) return ASE_NULL; - rex_code = ase_awk_buildrex (run->awk, str, len, &errnum); + rex_code = ASE_AWK_BUILDREX (run->awk, str, len, &errnum); if (rex_code == ASE_NULL) { ASE_AWK_FREE (run->awk, str); @@ -4753,9 +4752,9 @@ static ase_awk_val_t* eval_binop_match0 ( if (left->type == ASE_AWK_VAL_STR) { - n = ase_awk_matchrex ( + n = ASE_AWK_MATCHREX ( run->awk, rex_code, - ((run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0), + ((run->global.ignorecase)? ASE_REX_IGNORECASE: 0), ((ase_awk_val_str_t*)left)->buf, ((ase_awk_val_str_t*)left)->len, ASE_NULL, ASE_NULL, &errnum); @@ -4790,9 +4789,9 @@ static ase_awk_val_t* eval_binop_match0 ( return ASE_NULL; } - n = ase_awk_matchrex ( + n = ASE_AWK_MATCHREX ( run->awk, rex_code, - ((run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0), + ((run->global.ignorecase)? ASE_REX_IGNORECASE: 0), str, len, ASE_NULL, ASE_NULL, &errnum); if (n == -1) { diff --git a/ase/awk/val.c b/ase/awk/val.c index 72500227..8427609a 100644 --- a/ase/awk/val.c +++ b/ase/awk/val.c @@ -320,7 +320,7 @@ ase_awk_val_t* ase_awk_makerexval ( val = (ase_awk_val_rex_t*) ASE_AWK_MALLOC ( run->awk, ASE_SIZEOF(ase_awk_val_rex_t) + - (ASE_SIZEOF(*buf)*len+1) + ASE_AWK_REX_LEN(code)); + (ASE_SIZEOF(*buf)*len+1) + ASE_REX_LEN(code)); if (val == ASE_NULL) return ASE_NULL; val->type = ASE_AWK_VAL_REX; @@ -338,7 +338,7 @@ ase_awk_val_t* ase_awk_makerexval ( ase_strncpy (val->buf, buf, len); /* - val->code = ASE_AWK_MALLOC (run->awk, ASE_AWK_REX_LEN(code)); + val->code = ASE_AWK_MALLOC (run->awk, ASE_REX_LEN(code)); if (val->code == ASE_NULL) { ASE_AWK_FREE (run->awk, val->buf); @@ -348,7 +348,7 @@ ase_awk_val_t* ase_awk_makerexval ( } */ val->code = val->buf + len + 1; - ase_memcpy (val->code, code, ASE_AWK_REX_LEN(code)); + ase_memcpy (val->code, code, ASE_REX_LEN(code)); return (ase_awk_val_t*)val; } @@ -506,7 +506,7 @@ void ase_awk_freeval (ase_awk_run_t* run, ase_awk_val_t* val, ase_bool_t cache) { /* ASE_AWK_FREE (run->awk, ((ase_awk_val_rex_t*)val)->buf); - ase_awk_freerex (run->awk, ((ase_awk_val_rex_t*)val)->code); + ASE_AWK_FREEREX (run->awk, ((ase_awk_val_rex_t*)val)->code); */ ASE_AWK_FREE (run->awk, val); } diff --git a/ase/cmn/makefile.in b/ase/cmn/makefile.in index 7b3afbef..20564799 100644 --- a/ase/cmn/makefile.in +++ b/ase/cmn/makefile.in @@ -22,6 +22,7 @@ OBJ_FILES = \ $(TMP_DIR)/mem.o \ $(TMP_DIR)/str.o \ $(TMP_DIR)/map.o \ + $(TMP_DIR)/rex.o \ $(TMP_DIR)/misc.o lib: $(OUT_FILE) @@ -39,6 +40,9 @@ $(TMP_DIR)/str.o: str.c $(TMP_DIR)/map.o: map.c $(CC) $(CFLAGS) -o $@ -c map.c +$(TMP_DIR)/rex.o: rex.c + $(CC) $(CFLAGS) -o $@ -c rex.c + $(TMP_DIR)/misc.o: misc.c $(CC) $(CFLAGS) -o $@ -c misc.c diff --git a/ase/awk/rex.c b/ase/cmn/rex.c similarity index 90% rename from ase/awk/rex.c rename to ase/cmn/rex.c index 8c455878..0fd28329 100644 --- a/ase/awk/rex.c +++ b/ase/cmn/rex.c @@ -4,7 +4,7 @@ * {License} */ -#include +#include #ifdef DEBUG_REX #include @@ -64,7 +64,7 @@ typedef struct cshdr_t cshdr_t; struct builder_t { - ase_awk_t* awk; + ase_mmgr_t* mmgr; struct { @@ -97,7 +97,8 @@ struct builder_t struct matcher_t { - ase_awk_t* awk; + ase_mmgr_t* mmgr; + ase_ccls_t* ccls; struct { @@ -214,28 +215,31 @@ static const ase_byte_t* match_occurrences ( static ase_bool_t __test_charset ( matcher_t* matcher, const ase_byte_t* p, ase_size_t csc, ase_char_t c); -static ase_bool_t cc_isalnum (ase_awk_t* awk, ase_char_t c); -static ase_bool_t cc_isalpha (ase_awk_t* awk, ase_char_t c); -static ase_bool_t cc_isblank (ase_awk_t* awk, ase_char_t c); -static ase_bool_t cc_iscntrl (ase_awk_t* awk, ase_char_t c); -static ase_bool_t cc_isdigit (ase_awk_t* awk, ase_char_t c); -static ase_bool_t cc_isgraph (ase_awk_t* awk, ase_char_t c); -static ase_bool_t cc_islower (ase_awk_t* awk, ase_char_t c); -static ase_bool_t cc_isprint (ase_awk_t* awk, ase_char_t c); -static ase_bool_t cc_ispunct (ase_awk_t* awk, ase_char_t c); -static ase_bool_t cc_isspace (ase_awk_t* awk, ase_char_t c); -static ase_bool_t cc_isupper (ase_awk_t* awk, ase_char_t c); -static ase_bool_t cc_isxdigit (ase_awk_t* awk, ase_char_t c); +static ase_bool_t cc_isalnum (ase_ccls_t* ccls, ase_char_t c); +static ase_bool_t cc_isalpha (ase_ccls_t* ccls, ase_char_t c); +static ase_bool_t cc_isblank (ase_ccls_t* ccls, ase_char_t c); +static ase_bool_t cc_iscntrl (ase_ccls_t* ccls, ase_char_t c); +static ase_bool_t cc_isdigit (ase_ccls_t* ccls, ase_char_t c); +static ase_bool_t cc_isgraph (ase_ccls_t* ccls, ase_char_t c); +static ase_bool_t cc_islower (ase_ccls_t* ccls, ase_char_t c); +static ase_bool_t cc_isprint (ase_ccls_t* ccls, ase_char_t c); +static ase_bool_t cc_ispunct (ase_ccls_t* ccls, ase_char_t c); +static ase_bool_t cc_isspace (ase_ccls_t* ccls, ase_char_t c); +static ase_bool_t cc_isupper (ase_ccls_t* ccls, ase_char_t c); +static ase_bool_t cc_isxdigit (ase_ccls_t* ccls, ase_char_t c); +#if 0 +XXX static const ase_byte_t* __print_pattern (ase_awk_t* awk, const ase_byte_t* p); static const ase_byte_t* __print_branch (ase_awk_t* awk, const ase_byte_t* p); static const ase_byte_t* __print_atom (ase_awk_t* awk, const ase_byte_t* p); +#endif struct __char_class_t { const ase_char_t* name; ase_size_t name_len; - ase_bool_t (*func) (ase_awk_t* awk, ase_char_t c); + ase_bool_t (*func) (ase_ccls_t* ccls, ase_char_t c); }; static struct __char_class_t __char_class[] = @@ -265,19 +269,20 @@ static struct __char_class_t __char_class[] = { ASE_NULL, 0, ASE_NULL } }; -void* ase_awk_buildrex ( - ase_awk_t* awk, const ase_char_t* ptn, ase_size_t len, int* errnum) +void* ase_buildrex ( + ase_mmgr_t* mmgr, ase_size_t depth, + const ase_char_t* ptn, ase_size_t len, int* errnum) { builder_t builder; - builder.awk = awk; + builder.mmgr = mmgr; builder.code.capa = DEF_CODE_CAPA; builder.code.size = 0; builder.code.buf = (ase_byte_t*) - ASE_AWK_MALLOC (builder.awk, builder.code.capa); + ASE_MALLOC (builder.mmgr, builder.code.capa); if (builder.code.buf == ASE_NULL) { - *errnum = ASE_AWK_ENOMEM; + *errnum = ASE_REX_ENOMEM; return ASE_NULL; } @@ -289,20 +294,20 @@ void* ase_awk_buildrex ( builder.ptn.curc.value = ASE_T('\0'); builder.ptn.curc.escaped = ase_false; - builder.depth.max = awk->rex.depth.max.build; + builder.depth.max = depth; builder.depth.cur = 0; if (next_char (&builder, LEVEL_TOP) == -1) { if (errnum != ASE_NULL) *errnum = builder.errnum; - ASE_AWK_FREE (builder.awk, builder.code.buf); + ASE_FREE (builder.mmgr, builder.code.buf); return ASE_NULL; } if (build_pattern (&builder) == -1) { if (errnum != ASE_NULL) *errnum = builder.errnum; - ASE_AWK_FREE (builder.awk, builder.code.buf); + ASE_FREE (builder.mmgr, builder.code.buf); return ASE_NULL; } @@ -313,23 +318,24 @@ void* ase_awk_buildrex ( if (builder.ptn.curc.type == CT_SPECIAL && builder.ptn.curc.value == ASE_T(')')) { - *errnum = ASE_AWK_EREXUNBALPAR; + *errnum = ASE_REX_EUNBALPAR; } else { - *errnum = ASE_AWK_EREXGARBAGE; + *errnum = ASE_REX_EGARBAGE; } } - ASE_AWK_FREE (builder.awk, builder.code.buf); + ASE_FREE (builder.mmgr, builder.code.buf); return ASE_NULL; } return builder.code.buf; } -int ase_awk_matchrex ( - ase_awk_t* awk, void* code, int option, +int ase_matchrex ( + ase_mmgr_t* mmgr, ase_ccls_t* ccls, ase_size_t depth, + void* code, int option, const ase_char_t* str, ase_size_t len, const ase_char_t** match_ptr, ase_size_t* match_len, int* errnum) { @@ -338,15 +344,16 @@ int ase_awk_matchrex ( ase_size_t offset = 0; /*const ase_char_t* match_ptr_zero = ASE_NULL;*/ - matcher.awk = awk; + matcher.mmgr = mmgr; + matcher.ccls = ccls; /* store the source string */ matcher.match.str.ptr = str; matcher.match.str.end = str + len; - matcher.depth.max = awk->rex.depth.max.match; + matcher.depth.max = depth; matcher.depth.cur = 0; - matcher.ignorecase = (option & ASE_AWK_REX_IGNORECASE)? 1: 0; + matcher.ignorecase = (option & ASE_REX_IGNORECASE)? 1: 0; mat.matched = ase_false; /* TODO: should it allow an offset here??? */ @@ -395,13 +402,13 @@ int ase_awk_matchrex ( return (mat.matched)? 1: 0; } -void ase_awk_freerex (ase_awk_t* awk, void* code) +void ase_freerex (ase_mmgr_t* mmgr, void* code) { ASE_ASSERT (code != ASE_NULL); - ASE_AWK_FREE (awk, code); + ASE_FREE (mmgr, code); } -ase_bool_t ase_awk_isemptyrex (ase_awk_t* awk, void* code) +ase_bool_t ase_isemptyrex (void* code) { rhdr_t* rhdr = (rhdr_t*) code; ASE_ASSERT (rhdr != ASE_NULL); @@ -421,7 +428,7 @@ static int build_pattern (builder_t* builder) if (builder->depth.max > 0 && builder->depth.cur >= builder->depth.max) { - builder->errnum = ASE_AWK_EREXRECUR; + builder->errnum = ASE_REX_ERECUR; return -1; } @@ -558,7 +565,7 @@ static int build_atom (builder_t* builder) if (builder->ptn.curc.type != CT_SPECIAL || builder->ptn.curc.value != ASE_T(')')) { - builder->errnum = ASE_AWK_EREXRPAREN; + builder->errnum = ASE_REX_ERPAREN; return -1; } } @@ -608,7 +615,7 @@ static int build_atom (builder_t* builder) if (builder->ptn.curc.type != CT_SPECIAL || builder->ptn.curc.value != ASE_T(']')) { - builder->errnum = ASE_AWK_EREXRBRACKET; + builder->errnum = ASE_REX_ERBRACKET; return -1; } @@ -730,7 +737,7 @@ static int build_charset (builder_t* builder, code_t* cmd) ase_dprintf ( ASE_T("build_charset: invalid character set range\n")); #endif - builder->errnum = ASE_AWK_EREXCRANGE; + builder->errnum = ASE_REX_ECRANGE; return -1; } @@ -761,7 +768,7 @@ static int build_cclass (builder_t* builder, ase_char_t* cc) #ifdef DEBUG_REX ase_dprintf (ASE_T("build_cclass: wrong class name\n")); #endif - builder->errnum = ASE_AWK_EREXCCLASS; + builder->errnum = ASE_REX_ECCLASS; return -1; } @@ -774,7 +781,7 @@ static int build_cclass (builder_t* builder, ase_char_t* cc) #ifdef DEBUG_REX ase_dprintf (ASE_T("build_cclass: a colon(:) expected\n")); #endif - builder->errnum = ASE_AWK_EREXCOLON; + builder->errnum = ASE_REX_ECOLON; return -1; } @@ -787,7 +794,7 @@ static int build_cclass (builder_t* builder, ase_char_t* cc) #ifdef DEBUG_REX ase_dprintf (ASE_T("build_cclass: ] expected\n")); #endif - builder->errnum = ASE_AWK_EREXRBRACKET; + builder->errnum = ASE_REX_ERBRACKET; return -1; } @@ -836,7 +843,7 @@ static int build_occurrences (builder_t* builder, code_t* cmd) if (builder->ptn.curc.type != CT_SPECIAL || builder->ptn.curc.value != ASE_T('}')) { - builder->errnum = ASE_AWK_EREXRBRACE; + builder->errnum = ASE_REX_ERBRACE; return -1; } @@ -886,7 +893,7 @@ what if it is not in the raight format? convert it to ordinary characters?? */ if (cmd->lbound > cmd->ubound) { /* invalid occurrences range */ - builder->errnum = ASE_AWK_EREXBRANGE; + builder->errnum = ASE_REX_EBRANGE; return -1; } @@ -897,7 +904,7 @@ what if it is not in the raight format? convert it to ordinary characters?? */ do { \ if (builder->ptn.curp >= builder->ptn.end) \ { \ - builder->errnum = ASE_AWK_EREXEND; \ + builder->errnum = ASE_REX_EEND; \ return -1; \ } \ } while(0) @@ -1079,29 +1086,29 @@ static int add_code (builder_t* builder, void* data, ase_size_t len) if (capa == 0) capa = DEF_CODE_CAPA; while (len > capa - builder->code.size) { capa = capa * 2; } - if (builder->awk->prmfns.mmgr.realloc != ASE_NULL) + if (builder->mmgr->realloc != ASE_NULL) { - tmp = (ase_byte_t*) ASE_AWK_REALLOC ( - builder->awk, builder->code.buf, capa); + tmp = (ase_byte_t*) ASE_REALLOC ( + builder->mmgr, builder->code.buf, capa); if (tmp == ASE_NULL) { - builder->errnum = ASE_AWK_ENOMEM; + builder->errnum = ASE_REX_ENOMEM; return -1; } } else { - tmp = (ase_byte_t*) ASE_AWK_MALLOC (builder->awk, capa); + tmp = (ase_byte_t*) ASE_MALLOC (builder->mmgr, capa); if (tmp == ASE_NULL) { - builder->errnum = ASE_AWK_ENOMEM; + builder->errnum = ASE_REX_ENOMEM; return -1; } if (builder->code.buf != ASE_NULL) { ase_memcpy (tmp, builder->code.buf, builder->code.capa); - ASE_AWK_FREE (builder->awk, builder->code.buf); + ASE_FREE (builder->mmgr, builder->code.buf); } } @@ -1193,7 +1200,7 @@ static const ase_byte_t* match_branch_body ( if (matcher->depth.max > 0 && matcher->depth.cur >= matcher->depth.max) { - matcher->errnum = ASE_AWK_EREXRECUR; + matcher->errnum = ASE_REX_ERECUR; return ASE_NULL; } @@ -1372,7 +1379,7 @@ static const ase_byte_t* match_ord_char ( ubound = cp->ubound; cc = *(ase_char_t*)p; p += ASE_SIZEOF(cc); - if (matcher->ignorecase) cc = ASE_AWK_TOUPPER(matcher->awk, cc); + if (matcher->ignorecase) cc = ASE_TOUPPER(matcher->ccls, cc); /* merge the same consecutive codes * for example, a{1,10}a{0,10} is shortened to a{1,20} */ @@ -1381,7 +1388,7 @@ static const ase_byte_t* match_ord_char ( while (p < mat->branch_end && cp->cmd == ((const code_t*)p)->cmd) { - if (ASE_AWK_TOUPPER (matcher->awk, *(ase_char_t*)(p+ASE_SIZEOF(*cp))) != cc) break; + if (ASE_TOUPPER (matcher->ccls, *(ase_char_t*)(p+ASE_SIZEOF(*cp))) != cc) break; lbound += ((const code_t*)p)->lbound; ubound += ((const code_t*)p)->ubound; @@ -1423,7 +1430,7 @@ static const ase_byte_t* match_ord_char ( ASE_T("match_ord_char: %c %c\n"), cc, mat->match_ptr[si]); #endif - if (cc != ASE_AWK_TOUPPER (matcher->awk, mat->match_ptr[si])) break; + if (cc != ASE_TOUPPER (matcher->ccls, mat->match_ptr[si])) break; si++; } } @@ -1486,7 +1493,7 @@ static const ase_byte_t* match_charset ( if (&mat->match_ptr[si] >= matcher->match.str.end) break; c = mat->match_ptr[si]; - if (matcher->ignorecase) c = ASE_AWK_TOUPPER(matcher->awk, c); + if (matcher->ignorecase) c = ASE_TOUPPER(matcher->ccls, c); n = __test_charset (matcher, p, cshdr->csc, c); if (cp->negate) n = !n; @@ -1555,11 +1562,11 @@ static const ase_byte_t* match_group ( } else { - grp_len = (ase_size_t*) ASE_AWK_MALLOC ( - matcher->awk, ASE_SIZEOF(ase_size_t) * cp->ubound); + grp_len = (ase_size_t*) ASE_MALLOC ( + matcher->mmgr, ASE_SIZEOF(ase_size_t) * cp->ubound); if (grp_len == ASE_NULL) { - matcher->errnum = ASE_AWK_ENOMEM; + matcher->errnum = ASE_REX_ENOMEM; return ASE_NULL; } } @@ -1574,7 +1581,7 @@ static const ase_byte_t* match_group ( if (match_pattern (matcher, p, &mat2) == ASE_NULL) { if (grp_len != grp_len_static) - ASE_AWK_FREE (matcher->awk, grp_len); + ASE_FREE (matcher->mmgr, grp_len); return ASE_NULL; } if (!mat2.matched) break; @@ -1620,7 +1627,7 @@ static const ase_byte_t* match_group ( if (tmp == ASE_NULL) { if (grp_len != grp_len_static) - ASE_AWK_FREE (matcher->awk, grp_len); + ASE_FREE (matcher->mmgr, grp_len); return ASE_NULL; } @@ -1640,7 +1647,7 @@ static const ase_byte_t* match_group ( } - if (grp_len != grp_len_static) ASE_AWK_FREE (matcher->awk, grp_len); + if (grp_len != grp_len_static) ASE_FREE (matcher->mmgr, grp_len); return p; } @@ -1751,7 +1758,7 @@ static ase_bool_t __test_charset ( { c1 = *(const ase_char_t*)p; if (matcher->ignorecase) - c1 = ASE_AWK_TOUPPER(matcher->awk, c1); + c1 = ASE_TOUPPER(matcher->ccls, c1); #ifdef DEBUG_REX ase_dprintf ( ASE_T("match_charset: %c %c\n"), c, c1); @@ -1766,8 +1773,8 @@ static ase_bool_t __test_charset ( if (matcher->ignorecase) { - c1 = ASE_AWK_TOUPPER(matcher->awk, c1); - c2 = ASE_AWK_TOUPPER(matcher->awk, c2); + c1 = ASE_TOUPPER(matcher->ccls, c1); + c2 = ASE_TOUPPER(matcher->ccls, c2); } #ifdef DEBUG_REX ase_dprintf ( @@ -1784,7 +1791,7 @@ static ase_bool_t __test_charset ( c, __char_class[c1].name); #endif if (__char_class[c1].func ( - matcher->awk, c)) return ase_true; + matcher->ccls, c)) return ase_true; } else { @@ -1798,66 +1805,67 @@ static ase_bool_t __test_charset ( return ase_false; } -static ase_bool_t cc_isalnum (ase_awk_t* awk, ase_char_t c) +static ase_bool_t cc_isalnum (ase_ccls_t* ccls, ase_char_t c) { - return ASE_AWK_ISALNUM (awk, c); + return ASE_ISALNUM (ccls, c); } -static ase_bool_t cc_isalpha (ase_awk_t* awk, ase_char_t c) +static ase_bool_t cc_isalpha (ase_ccls_t* ccls, ase_char_t c) { - return ASE_AWK_ISALPHA (awk, c); + return ASE_ISALPHA (ccls, c); } -static ase_bool_t cc_isblank (ase_awk_t* awk, ase_char_t c) +static ase_bool_t cc_isblank (ase_ccls_t* ccls, ase_char_t c) { return c == ASE_T(' ') || c == ASE_T('\t'); } -static ase_bool_t cc_iscntrl (ase_awk_t* awk, ase_char_t c) +static ase_bool_t cc_iscntrl (ase_ccls_t* ccls, ase_char_t c) { - return ASE_AWK_ISCNTRL (awk, c); + return ASE_ISCNTRL (ccls, c); } -static ase_bool_t cc_isdigit (ase_awk_t* awk, ase_char_t c) +static ase_bool_t cc_isdigit (ase_ccls_t* ccls, ase_char_t c) { - return ASE_AWK_ISDIGIT (awk, c); + return ASE_ISDIGIT (ccls, c); } -static ase_bool_t cc_isgraph (ase_awk_t* awk, ase_char_t c) +static ase_bool_t cc_isgraph (ase_ccls_t* ccls, ase_char_t c) { - return ASE_AWK_ISGRAPH (awk, c); + return ASE_ISGRAPH (ccls, c); } -static ase_bool_t cc_islower (ase_awk_t* awk, ase_char_t c) +static ase_bool_t cc_islower (ase_ccls_t* ccls, ase_char_t c) { - return ASE_AWK_ISLOWER (awk, c); + return ASE_ISLOWER (ccls, c); } -static ase_bool_t cc_isprint (ase_awk_t* awk, ase_char_t c) +static ase_bool_t cc_isprint (ase_ccls_t* ccls, ase_char_t c) { - return ASE_AWK_ISPRINT (awk, c); + return ASE_ISPRINT (ccls, c); } -static ase_bool_t cc_ispunct (ase_awk_t* awk, ase_char_t c) +static ase_bool_t cc_ispunct (ase_ccls_t* ccls, ase_char_t c) { - return ASE_AWK_ISPUNCT (awk, c); + return ASE_ISPUNCT (ccls, c); } -static ase_bool_t cc_isspace (ase_awk_t* awk, ase_char_t c) +static ase_bool_t cc_isspace (ase_ccls_t* ccls, ase_char_t c) { - return ASE_AWK_ISSPACE (awk, c); + return ASE_ISSPACE (ccls, c); } -static ase_bool_t cc_isupper (ase_awk_t* awk, ase_char_t c) +static ase_bool_t cc_isupper (ase_ccls_t* ccls, ase_char_t c) { - return ASE_AWK_ISUPPER (awk, c); + return ASE_ISUPPER (ccls, c); } -static ase_bool_t cc_isxdigit (ase_awk_t* awk, ase_char_t c) +static ase_bool_t cc_isxdigit (ase_ccls_t* ccls, ase_char_t c) { - return ASE_AWK_ISXDIGIT (awk, c); + return ASE_ISXDIGIT (ccls, c); } +#if 0 #define DPRINTF awk->prmfns.misc.dprintf #define DCUSTOM awk->prmfns.misc.custom_data @@ -1995,3 +2003,4 @@ static const ase_byte_t* __print_atom (ase_awk_t* awk, const ase_byte_t* p) return p; } +#endif diff --git a/ase/awk/rex.h b/ase/cmn/rex.h similarity index 52% rename from ase/awk/rex.h rename to ase/cmn/rex.h index f7df124b..68cbe6ab 100644 --- a/ase/awk/rex.h +++ b/ase/cmn/rex.h @@ -4,12 +4,11 @@ * {License} */ -#ifndef _ASE_AWK_REX_H_ -#define _ASE_AWK_REX_H_ +#ifndef _ASE_CMN_REX_H_ +#define _ASE_CMN_REX_H_ -#ifndef _ASE_AWK_AWK_H_ -#error Never include this file directly. Include instead -#endif +#include +#include /* * Regular Esseression Syntax @@ -39,34 +38,54 @@ * ab|xy -> |2|10|4|ORD_CHAR(no bound)|a|ORD_CHAR(no bound)|b|4|ORD_CHAR(no bound)|x|ORD_CHAR(no bound)|y| */ -#define ASE_AWK_REX_NA(code) (*(ase_size_t*)(code)) +#define ASE_REX_NA(code) (*(ase_size_t*)(code)) -#define ASE_AWK_REX_LEN(code) \ +#define ASE_REX_LEN(code) \ (*(ase_size_t*)((ase_byte_t*)(code)+ASE_SIZEOF(ase_size_t))) -enum ase_awk_rex_opt_t +enum ase_rex_option_t { - ASE_AWK_REX_IGNORECASE = (1 << 0) + ASE_REX_IGNORECASE = (1 << 0) +}; + +enum ase_rex_errnum_t +{ + ASE_REX_ENOERR = 0, + ASE_REX_ENOMEM, + ASE_REX_ERECUR, /* recursion too deep */ + ASE_REX_ERPAREN, /* a right parenthesis is expected */ + ASE_REX_ERBRACKET, /* a right bracket is expected */ + ASE_REX_ERBRACE, /* a right brace is expected */ + ASE_REX_EUNBALPAR, /* unbalanced parenthesis */ + ASE_REX_ECOLON, /* a colon is expected */ + ASE_REX_ECRANGE, /* invalid character range */ + ASE_REX_ECCLASS, /* invalid character class */ + ASE_REX_EBRANGE, /* invalid boundary range */ + ASE_REX_EEND, /* unexpected end of the pattern */ + ASE_REX_EGARBAGE /* garbage after the pattern */ }; #ifdef __cplusplus extern "C" { #endif -void* ase_awk_buildrex ( - ase_awk_t* awk, const ase_char_t* ptn, - ase_size_t len, int* errnum); +void* ase_buildrex ( + ase_mmgr_t* mmgr, ase_size_t depth, + const ase_char_t* ptn, ase_size_t len, int* errnum); -int ase_awk_matchrex ( - ase_awk_t* awk, void* code, int option, +int ase_matchrex ( + ase_mmgr_t* mmgr, ase_ccls_t* ccls, ase_size_t depth, + void* code, int option, const ase_char_t* str, ase_size_t len, const ase_char_t** match_ptr, ase_size_t* match_len, int* errnum); -void ase_awk_freerex (ase_awk_t* awk, void* code); +void ase_freerex (ase_mmgr_t* mmgr, void* code); -ase_bool_t ase_awk_isemptyrex (ase_awk_t* awk, void* code); +ase_bool_t ase_isemptyrex (void* code); -void ase_awk_dprintrex (ase_awk_t* awk, void* rex); +#if 0 +void ase_dprintrex (ase_rex_t* rex, void* rex); +#endif #ifdef __cplusplus }