This commit is contained in:
hyung-hwan 2008-03-04 03:31:41 +00:00
parent 67b8fd9419
commit 0e989c619c
12 changed files with 238 additions and 152 deletions

View File

@ -10,12 +10,12 @@
#include <ase/cmn/mem.h>
#include <ase/cmn/str.h>
#include <ase/cmn/map.h>
#include <ase/cmn/rex.h>
typedef struct ase_awk_chain_t ase_awk_chain_t;
typedef struct ase_awk_tree_t ase_awk_tree_t;
#include <ase/awk/awk.h>
#include <ase/awk/rex.h>
#include <ase/awk/tree.h>
#include <ase/awk/val.h>
#include <ase/awk/func.h>
@ -365,4 +365,12 @@ struct ase_awk_run_t
ase_awk_runcbs_t* cbs;
};
#define ASE_AWK_FREEREX(awk,code) ase_freerex(&(awk)->prmfns.mmgr,code)
#define ASE_AWK_ISEMPTYREX(awk,code) ase_isemptyrex(code)
#define ASE_AWK_BUILDREX(awk,ptn,len,errnum) \
ase_awk_buildrex(awk,ptn,len,errnum)
#define ASE_AWK_MATCHREX(awk,code,option,str,len,match_ptr,match_len,errnum) \
ase_awk_matchrex(awk,code,option,str,len,match_ptr,match_len,errnum)
#endif

View File

@ -260,9 +260,9 @@ int ase_awk_readextio (
ASE_ASSERT (run->global.rs != ASE_NULL);
n = ase_awk_matchrex (
n = ASE_AWK_MATCHREX (
run->awk, run->global.rs,
((run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0),
((run->global.ignorecase)? ASE_REX_IGNORECASE: 0),
ASE_STR_BUF(buf), ASE_STR_LEN(buf),
&match_ptr, &match_len, &run->errnum);
if (n == -1)
@ -348,9 +348,9 @@ int ase_awk_readextio (
ASE_ASSERT (run->global.rs != ASE_NULL);
n = ase_awk_matchrex (
n = ASE_AWK_MATCHREX (
run->awk, run->global.rs,
((run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0),
((run->global.ignorecase)? ASE_REX_IGNORECASE: 0),
ASE_STR_BUF(buf), ASE_STR_LEN(buf),
&match_ptr, &match_len, &run->errnum);
if (n == -1)

View File

@ -702,7 +702,7 @@ static int bfn_split (
if (fs_len > 1)
{
fs_rex = ase_awk_buildrex (
fs_rex = ASE_AWK_BUILDREX (
run->awk, fs_ptr, fs_len, &errnum);
if (fs_rex == ASE_NULL)
{
@ -725,7 +725,7 @@ static int bfn_split (
if (fs_free != ASE_NULL)
ASE_AWK_FREE (run->awk, fs_free);
if (fs_rex_free != ASE_NULL)
ase_awk_freerex (run->awk, fs_rex_free);
ASE_AWK_FREEREX (run->awk, fs_rex_free);
/*ase_awk_setrunerrnum (run, ASE_AWK_ENOMEM);*/
return -1;
}
@ -756,7 +756,7 @@ static int bfn_split (
if (fs_free != ASE_NULL)
ASE_AWK_FREE (run->awk, fs_free);
if (fs_rex_free != ASE_NULL)
ase_awk_freerex (run->awk, fs_rex_free);
ASE_AWK_FREEREX (run->awk, fs_rex_free);
ase_awk_setrunerrnum (run, errnum);
return -1;
}
@ -780,7 +780,7 @@ static int bfn_split (
if (fs_free != ASE_NULL)
ASE_AWK_FREE (run->awk, fs_free);
if (fs_rex_free != ASE_NULL)
ase_awk_freerex (run->awk, fs_rex_free);
ASE_AWK_FREEREX (run->awk, fs_rex_free);
/*ase_awk_setrunerrnum (run, ASE_AWK_ENOMEM);*/
return -1;
}
@ -807,7 +807,7 @@ static int bfn_split (
if (fs_free != ASE_NULL)
ASE_AWK_FREE (run->awk, fs_free);
if (fs_rex_free != ASE_NULL)
ase_awk_freerex (run->awk, fs_rex_free);
ASE_AWK_FREEREX (run->awk, fs_rex_free);
ase_awk_setrunerrnum (run, ASE_AWK_ENOMEM);
return -1;
}
@ -818,7 +818,7 @@ static int bfn_split (
if (str_free != ASE_NULL) ASE_AWK_FREE (run->awk, str_free);
if (fs_free != ASE_NULL) ASE_AWK_FREE (run->awk, fs_free);
if (fs_rex_free != ASE_NULL) ase_awk_freerex (run->awk, fs_rex_free);
if (fs_rex_free != ASE_NULL) ASE_AWK_FREEREX (run->awk, fs_rex_free);
if (sta == 1) num--;
@ -946,7 +946,7 @@ static int __substitute (ase_awk_run_t* run, ase_long_t max_count)
} while (0)
#define FREE_A0_REX(awk,rex) \
do { \
if (a0->type != ASE_AWK_VAL_REX) ase_awk_freerex (awk, rex); \
if (a0->type != ASE_AWK_VAL_REX) ASE_AWK_FREEREX (awk, rex); \
} while (0)
if (a0->type == ASE_AWK_VAL_REX)
@ -1053,7 +1053,7 @@ static int __substitute (ase_awk_run_t* run, ase_long_t max_count)
if (a0->type != ASE_AWK_VAL_REX)
{
rex = ase_awk_buildrex (run->awk, a0_ptr, a0_len, &run->errnum);
rex = ASE_AWK_BUILDREX (run->awk, a0_ptr, a0_len, &run->errnum);
if (rex == ASE_NULL)
{
ase_str_close (&new);
@ -1062,7 +1062,7 @@ static int __substitute (ase_awk_run_t* run, ase_long_t max_count)
}
}
opt = (run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0;
opt = (run->global.ignorecase)? ASE_REX_IGNORECASE: 0;
cur_ptr = a2_ptr;
cur_len = a2_len;
sub_count = 0;
@ -1071,7 +1071,7 @@ static int __substitute (ase_awk_run_t* run, ase_long_t max_count)
{
if (max_count == 0 || sub_count < max_count)
{
n = ase_awk_matchrex (
n = ASE_AWK_MATCHREX (
run->awk, rex, opt, cur_ptr, cur_len,
&mat_ptr, &mat_len, &run->errnum);
}
@ -1270,7 +1270,7 @@ static int bfn_match (
}
}
rex = ase_awk_buildrex (run->awk, str1, len1, &run->errnum);
rex = ASE_AWK_BUILDREX (run->awk, str1, len1, &run->errnum);
if (rex == ASE_NULL)
{
if (a0->type != ASE_AWK_VAL_STR)
@ -1281,13 +1281,13 @@ static int bfn_match (
if (a1->type != ASE_AWK_VAL_STR) ASE_AWK_FREE (run->awk, str1);
}
opt = (run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0;
n = ase_awk_matchrex (
opt = (run->global.ignorecase)? ASE_REX_IGNORECASE: 0;
n = ASE_AWK_MATCHREX (
run->awk, rex, opt, str0, len0,
&mat_ptr, &mat_len, &run->errnum);
if (a0->type != ASE_AWK_VAL_STR) ASE_AWK_FREE (run->awk, str0);
if (a1->type != ASE_AWK_VAL_REX) ase_awk_freerex (run->awk, rex);
if (a1->type != ASE_AWK_VAL_REX) ASE_AWK_FREEREX (run->awk, rex);
if (n == -1) return -1;

View File

@ -52,8 +52,7 @@ OBJ_FILES_LIB = \
$(TMP_DIR)/val.o \
$(TMP_DIR)/func.o \
$(TMP_DIR)/misc.o \
$(TMP_DIR)/extio.o \
$(TMP_DIR)/rex.o
$(TMP_DIR)/extio.o
OBJ_FILES_JNI = $(TMP_DIR)/jni.o
@ -150,9 +149,6 @@ $(TMP_DIR)/misc.o: misc.c
$(TMP_DIR)/extio.o: extio.c
$(LIBTOOL_COMPILE) $(CC) $(CFLAGS) -o $@ -c extio.c
$(TMP_DIR)/rex.o: rex.c
$(LIBTOOL_COMPILE) $(CC) $(CFLAGS) -o $@ -c rex.c
$(TMP_DIR)/jni.o: jni.c
$(LIBTOOL_COMPILE) $(CC) $(CFLAGS) $(CFLAGS_JNI) -o $@ -c jni.c

View File

@ -828,9 +828,9 @@ ase_char_t* ase_awk_strxntokbyrex (
while (len > 0)
{
n = ase_awk_matchrex (
n = ASE_AWK_MATCHREX (
run->awk, rex,
((run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0),
((run->global.ignorecase)? ASE_REX_IGNORECASE: 0),
ptr, left, (const ase_char_t**)&match_ptr, &match_len,
errnum);
if (n == -1) return ASE_NULL;
@ -908,3 +908,45 @@ exit_loop:
ASE_NULL: (match_ptr+match_len);
}
}
#define ASE_AWK_REXERRTOERR(err) \
((err == ASE_REX_ENOERR)? ASE_AWK_ENOERR: \
(err == ASE_REX_ENOMEM)? ASE_AWK_ENOMEM: \
(err == ASE_REX_ERECUR)? ASE_AWK_EREXRECUR: \
(err == ASE_REX_ERPAREN)? ASE_AWK_EREXRPAREN: \
(err == ASE_REX_ERBRACKET)? ASE_AWK_EREXRBRACKET: \
(err == ASE_REX_ERBRACE)? ASE_AWK_EREXRBRACE: \
(err == ASE_REX_EUNBALPAR)? ASE_AWK_EREXUNBALPAR: \
(err == ASE_REX_ECOLON)? ASE_AWK_EREXCOLON: \
(err == ASE_REX_ECRANGE)? ASE_AWK_EREXCRANGE: \
(err == ASE_REX_ECCLASS)? ASE_AWK_EREXCCLASS: \
(err == ASE_REX_EBRANGE)? ASE_AWK_EREXBRANGE: \
(err == ASE_REX_EEND)? ASE_AWK_EREXEND: \
(err == ASE_REX_EGARBAGE)? ASE_AWK_EREXGARBAGE: \
ASE_AWK_EINTERN)
void* ase_awk_buildrex (
ase_awk_t* awk, const ase_char_t* ptn, ase_size_t len, int* errnum)
{
int err;
void* p;
p = ase_buildrex (
&awk->prmfns.mmgr, awk->rex.depth.max.build, ptn, len, &err);
if (p == ASE_NULL) *errnum = ASE_AWK_REXERRTOERR(err);
return p;
}
int ase_awk_matchrex (
ase_awk_t* awk, void* code, int option,
const ase_char_t* str, ase_size_t len,
const ase_char_t** match_ptr, ase_size_t* match_len, int* errnum)
{
int err, x;
x = ase_matchrex (
&awk->prmfns.mmgr, &awk->prmfns.ccls, awk->rex.depth.max.match,
code, option, str, len, match_ptr, match_len, &err);
if (x < 0) *errnum = ASE_AWK_REXERRTOERR(err);
return x;
}

View File

@ -37,6 +37,15 @@ ase_char_t* ase_awk_strxntokbyrex (
ase_awk_run_t* run, const ase_char_t* s, ase_size_t len,
void* rex, ase_char_t** tok, ase_size_t* tok_len, int* errnum);
void* ase_awk_buildrex (
ase_awk_t* awk, const ase_char_t* ptn, ase_size_t len, int* errnum);
int ase_awk_matchrex (
ase_awk_t* awk, void* code, int option,
const ase_char_t* str, ase_size_t len,
const ase_char_t** match_ptr, ase_size_t* match_len, int* errnum);
#ifdef __cplusplus
}
#endif

View File

@ -2868,7 +2868,7 @@ static ase_awk_nde_t* parse_primary (ase_awk_t* awk, ase_size_t line)
return ASE_NULL;
}
nde->code = ase_awk_buildrex (awk,
nde->code = ASE_AWK_BUILDREX (awk,
ASE_STR_BUF(&awk->token.name),
ASE_STR_LEN(&awk->token.name),
&errnum);

View File

@ -354,7 +354,7 @@ static int set_global (
/* compile the regular expression */
/* TODO: use safebuild */
rex = ase_awk_buildrex (
rex = ASE_AWK_BUILDREX (
run->awk, fs_ptr, fs_len, &run->errnum);
if (rex == ASE_NULL)
{
@ -365,7 +365,7 @@ static int set_global (
if (run->global.fs != ASE_NULL)
{
ase_awk_freerex (run->awk, run->global.fs);
ASE_AWK_FREEREX (run->awk, run->global.fs);
}
run->global.fs = rex;
}
@ -499,7 +499,7 @@ static int set_global (
/* compile the regular expression */
/* TODO: use safebuild */
rex = ase_awk_buildrex (
rex = ASE_AWK_BUILDREX (
run->awk, rs_ptr, rs_len, &run->errnum);
if (rex == ASE_NULL)
{
@ -510,7 +510,7 @@ static int set_global (
if (run->global.rs != ASE_NULL)
{
ase_awk_freerex (run->awk, run->global.rs);
ASE_AWK_FREEREX (run->awk, run->global.rs);
}
run->global.rs = rex;
}
@ -3079,8 +3079,7 @@ static ase_awk_val_t* eval_expression (ase_awk_run_t* run, ase_awk_nde_t* nde)
/* the record has never been read.
* probably, this functions has been triggered
* by the statements in the BEGIN block */
n = ase_awk_isemptyrex (
run->awk, ((ase_awk_val_rex_t*)v)->code)? 1: 0;
n = ASE_AWK_ISEMPTYREX(run->awk,((ase_awk_val_rex_t*)v)->code)? 1: 0;
}
else
{
@ -3088,10 +3087,10 @@ static ase_awk_val_t* eval_expression (ase_awk_run_t* run, ase_awk_nde_t* nde)
run->inrec.d0->type == ASE_AWK_VAL_STR,
"the internal value representing $0 should always be of the string type once it has been set/updated. it is nil initially.");
n = ase_awk_matchrex (
n = ASE_AWK_MATCHREX (
((ase_awk_run_t*)run)->awk,
((ase_awk_val_rex_t*)v)->code,
((((ase_awk_run_t*)run)->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0),
((((ase_awk_run_t*)run)->global.ignorecase)? ASE_REX_IGNORECASE: 0),
((ase_awk_val_str_t*)run->inrec.d0)->buf,
((ase_awk_val_str_t*)run->inrec.d0)->len,
ASE_NULL, ASE_NULL, &errnum);
@ -4723,7 +4722,7 @@ static ase_awk_val_t* eval_binop_match0 (
}
else if (right->type == ASE_AWK_VAL_STR)
{
rex_code = ase_awk_buildrex (
rex_code = ASE_AWK_BUILDREX (
run->awk,
((ase_awk_val_str_t*)right)->buf,
((ase_awk_val_str_t*)right)->len, &errnum);
@ -4739,7 +4738,7 @@ static ase_awk_val_t* eval_binop_match0 (
run, right, ASE_AWK_VALTOSTR_CLEAR, ASE_NULL, &len);
if (str == ASE_NULL) return ASE_NULL;
rex_code = ase_awk_buildrex (run->awk, str, len, &errnum);
rex_code = ASE_AWK_BUILDREX (run->awk, str, len, &errnum);
if (rex_code == ASE_NULL)
{
ASE_AWK_FREE (run->awk, str);
@ -4753,9 +4752,9 @@ static ase_awk_val_t* eval_binop_match0 (
if (left->type == ASE_AWK_VAL_STR)
{
n = ase_awk_matchrex (
n = ASE_AWK_MATCHREX (
run->awk, rex_code,
((run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0),
((run->global.ignorecase)? ASE_REX_IGNORECASE: 0),
((ase_awk_val_str_t*)left)->buf,
((ase_awk_val_str_t*)left)->len,
ASE_NULL, ASE_NULL, &errnum);
@ -4790,9 +4789,9 @@ static ase_awk_val_t* eval_binop_match0 (
return ASE_NULL;
}
n = ase_awk_matchrex (
n = ASE_AWK_MATCHREX (
run->awk, rex_code,
((run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0),
((run->global.ignorecase)? ASE_REX_IGNORECASE: 0),
str, len, ASE_NULL, ASE_NULL, &errnum);
if (n == -1)
{

View File

@ -320,7 +320,7 @@ ase_awk_val_t* ase_awk_makerexval (
val = (ase_awk_val_rex_t*) ASE_AWK_MALLOC (
run->awk, ASE_SIZEOF(ase_awk_val_rex_t) +
(ASE_SIZEOF(*buf)*len+1) + ASE_AWK_REX_LEN(code));
(ASE_SIZEOF(*buf)*len+1) + ASE_REX_LEN(code));
if (val == ASE_NULL) return ASE_NULL;
val->type = ASE_AWK_VAL_REX;
@ -338,7 +338,7 @@ ase_awk_val_t* ase_awk_makerexval (
ase_strncpy (val->buf, buf, len);
/*
val->code = ASE_AWK_MALLOC (run->awk, ASE_AWK_REX_LEN(code));
val->code = ASE_AWK_MALLOC (run->awk, ASE_REX_LEN(code));
if (val->code == ASE_NULL)
{
ASE_AWK_FREE (run->awk, val->buf);
@ -348,7 +348,7 @@ ase_awk_val_t* ase_awk_makerexval (
}
*/
val->code = val->buf + len + 1;
ase_memcpy (val->code, code, ASE_AWK_REX_LEN(code));
ase_memcpy (val->code, code, ASE_REX_LEN(code));
return (ase_awk_val_t*)val;
}
@ -506,7 +506,7 @@ void ase_awk_freeval (ase_awk_run_t* run, ase_awk_val_t* val, ase_bool_t cache)
{
/*
ASE_AWK_FREE (run->awk, ((ase_awk_val_rex_t*)val)->buf);
ase_awk_freerex (run->awk, ((ase_awk_val_rex_t*)val)->code);
ASE_AWK_FREEREX (run->awk, ((ase_awk_val_rex_t*)val)->code);
*/
ASE_AWK_FREE (run->awk, val);
}

View File

@ -22,6 +22,7 @@ OBJ_FILES = \
$(TMP_DIR)/mem.o \
$(TMP_DIR)/str.o \
$(TMP_DIR)/map.o \
$(TMP_DIR)/rex.o \
$(TMP_DIR)/misc.o
lib: $(OUT_FILE)
@ -39,6 +40,9 @@ $(TMP_DIR)/str.o: str.c
$(TMP_DIR)/map.o: map.c
$(CC) $(CFLAGS) -o $@ -c map.c
$(TMP_DIR)/rex.o: rex.c
$(CC) $(CFLAGS) -o $@ -c rex.c
$(TMP_DIR)/misc.o: misc.c
$(CC) $(CFLAGS) -o $@ -c misc.c

View File

@ -4,7 +4,7 @@
* {License}
*/
#include <ase/awk/awk_i.h>
#include <ase/cmn/rex.h>
#ifdef DEBUG_REX
#include <ase/utl/stdio.h>
@ -64,7 +64,7 @@ typedef struct cshdr_t cshdr_t;
struct builder_t
{
ase_awk_t* awk;
ase_mmgr_t* mmgr;
struct
{
@ -97,7 +97,8 @@ struct builder_t
struct matcher_t
{
ase_awk_t* awk;
ase_mmgr_t* mmgr;
ase_ccls_t* ccls;
struct
{
@ -214,28 +215,31 @@ static const ase_byte_t* match_occurrences (
static ase_bool_t __test_charset (
matcher_t* matcher, const ase_byte_t* p, ase_size_t csc, ase_char_t c);
static ase_bool_t cc_isalnum (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_isalpha (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_isblank (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_iscntrl (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_isdigit (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_isgraph (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_islower (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_isprint (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_ispunct (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_isspace (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_isupper (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_isxdigit (ase_awk_t* awk, ase_char_t c);
static ase_bool_t cc_isalnum (ase_ccls_t* ccls, ase_char_t c);
static ase_bool_t cc_isalpha (ase_ccls_t* ccls, ase_char_t c);
static ase_bool_t cc_isblank (ase_ccls_t* ccls, ase_char_t c);
static ase_bool_t cc_iscntrl (ase_ccls_t* ccls, ase_char_t c);
static ase_bool_t cc_isdigit (ase_ccls_t* ccls, ase_char_t c);
static ase_bool_t cc_isgraph (ase_ccls_t* ccls, ase_char_t c);
static ase_bool_t cc_islower (ase_ccls_t* ccls, ase_char_t c);
static ase_bool_t cc_isprint (ase_ccls_t* ccls, ase_char_t c);
static ase_bool_t cc_ispunct (ase_ccls_t* ccls, ase_char_t c);
static ase_bool_t cc_isspace (ase_ccls_t* ccls, ase_char_t c);
static ase_bool_t cc_isupper (ase_ccls_t* ccls, ase_char_t c);
static ase_bool_t cc_isxdigit (ase_ccls_t* ccls, ase_char_t c);
#if 0
XXX
static const ase_byte_t* __print_pattern (ase_awk_t* awk, const ase_byte_t* p);
static const ase_byte_t* __print_branch (ase_awk_t* awk, const ase_byte_t* p);
static const ase_byte_t* __print_atom (ase_awk_t* awk, const ase_byte_t* p);
#endif
struct __char_class_t
{
const ase_char_t* name;
ase_size_t name_len;
ase_bool_t (*func) (ase_awk_t* awk, ase_char_t c);
ase_bool_t (*func) (ase_ccls_t* ccls, ase_char_t c);
};
static struct __char_class_t __char_class[] =
@ -265,19 +269,20 @@ static struct __char_class_t __char_class[] =
{ ASE_NULL, 0, ASE_NULL }
};
void* ase_awk_buildrex (
ase_awk_t* awk, const ase_char_t* ptn, ase_size_t len, int* errnum)
void* ase_buildrex (
ase_mmgr_t* mmgr, ase_size_t depth,
const ase_char_t* ptn, ase_size_t len, int* errnum)
{
builder_t builder;
builder.awk = awk;
builder.mmgr = mmgr;
builder.code.capa = DEF_CODE_CAPA;
builder.code.size = 0;
builder.code.buf = (ase_byte_t*)
ASE_AWK_MALLOC (builder.awk, builder.code.capa);
ASE_MALLOC (builder.mmgr, builder.code.capa);
if (builder.code.buf == ASE_NULL)
{
*errnum = ASE_AWK_ENOMEM;
*errnum = ASE_REX_ENOMEM;
return ASE_NULL;
}
@ -289,20 +294,20 @@ void* ase_awk_buildrex (
builder.ptn.curc.value = ASE_T('\0');
builder.ptn.curc.escaped = ase_false;
builder.depth.max = awk->rex.depth.max.build;
builder.depth.max = depth;
builder.depth.cur = 0;
if (next_char (&builder, LEVEL_TOP) == -1)
{
if (errnum != ASE_NULL) *errnum = builder.errnum;
ASE_AWK_FREE (builder.awk, builder.code.buf);
ASE_FREE (builder.mmgr, builder.code.buf);
return ASE_NULL;
}
if (build_pattern (&builder) == -1)
{
if (errnum != ASE_NULL) *errnum = builder.errnum;
ASE_AWK_FREE (builder.awk, builder.code.buf);
ASE_FREE (builder.mmgr, builder.code.buf);
return ASE_NULL;
}
@ -313,23 +318,24 @@ void* ase_awk_buildrex (
if (builder.ptn.curc.type == CT_SPECIAL &&
builder.ptn.curc.value == ASE_T(')'))
{
*errnum = ASE_AWK_EREXUNBALPAR;
*errnum = ASE_REX_EUNBALPAR;
}
else
{
*errnum = ASE_AWK_EREXGARBAGE;
*errnum = ASE_REX_EGARBAGE;
}
}
ASE_AWK_FREE (builder.awk, builder.code.buf);
ASE_FREE (builder.mmgr, builder.code.buf);
return ASE_NULL;
}
return builder.code.buf;
}
int ase_awk_matchrex (
ase_awk_t* awk, void* code, int option,
int ase_matchrex (
ase_mmgr_t* mmgr, ase_ccls_t* ccls, ase_size_t depth,
void* code, int option,
const ase_char_t* str, ase_size_t len,
const ase_char_t** match_ptr, ase_size_t* match_len, int* errnum)
{
@ -338,15 +344,16 @@ int ase_awk_matchrex (
ase_size_t offset = 0;
/*const ase_char_t* match_ptr_zero = ASE_NULL;*/
matcher.awk = awk;
matcher.mmgr = mmgr;
matcher.ccls = ccls;
/* store the source string */
matcher.match.str.ptr = str;
matcher.match.str.end = str + len;
matcher.depth.max = awk->rex.depth.max.match;
matcher.depth.max = depth;
matcher.depth.cur = 0;
matcher.ignorecase = (option & ASE_AWK_REX_IGNORECASE)? 1: 0;
matcher.ignorecase = (option & ASE_REX_IGNORECASE)? 1: 0;
mat.matched = ase_false;
/* TODO: should it allow an offset here??? */
@ -395,13 +402,13 @@ int ase_awk_matchrex (
return (mat.matched)? 1: 0;
}
void ase_awk_freerex (ase_awk_t* awk, void* code)
void ase_freerex (ase_mmgr_t* mmgr, void* code)
{
ASE_ASSERT (code != ASE_NULL);
ASE_AWK_FREE (awk, code);
ASE_FREE (mmgr, code);
}
ase_bool_t ase_awk_isemptyrex (ase_awk_t* awk, void* code)
ase_bool_t ase_isemptyrex (void* code)
{
rhdr_t* rhdr = (rhdr_t*) code;
ASE_ASSERT (rhdr != ASE_NULL);
@ -421,7 +428,7 @@ static int build_pattern (builder_t* builder)
if (builder->depth.max > 0 && builder->depth.cur >= builder->depth.max)
{
builder->errnum = ASE_AWK_EREXRECUR;
builder->errnum = ASE_REX_ERECUR;
return -1;
}
@ -558,7 +565,7 @@ static int build_atom (builder_t* builder)
if (builder->ptn.curc.type != CT_SPECIAL ||
builder->ptn.curc.value != ASE_T(')'))
{
builder->errnum = ASE_AWK_EREXRPAREN;
builder->errnum = ASE_REX_ERPAREN;
return -1;
}
}
@ -608,7 +615,7 @@ static int build_atom (builder_t* builder)
if (builder->ptn.curc.type != CT_SPECIAL ||
builder->ptn.curc.value != ASE_T(']'))
{
builder->errnum = ASE_AWK_EREXRBRACKET;
builder->errnum = ASE_REX_ERBRACKET;
return -1;
}
@ -730,7 +737,7 @@ static int build_charset (builder_t* builder, code_t* cmd)
ase_dprintf (
ASE_T("build_charset: invalid character set range\n"));
#endif
builder->errnum = ASE_AWK_EREXCRANGE;
builder->errnum = ASE_REX_ECRANGE;
return -1;
}
@ -761,7 +768,7 @@ static int build_cclass (builder_t* builder, ase_char_t* cc)
#ifdef DEBUG_REX
ase_dprintf (ASE_T("build_cclass: wrong class name\n"));
#endif
builder->errnum = ASE_AWK_EREXCCLASS;
builder->errnum = ASE_REX_ECCLASS;
return -1;
}
@ -774,7 +781,7 @@ static int build_cclass (builder_t* builder, ase_char_t* cc)
#ifdef DEBUG_REX
ase_dprintf (ASE_T("build_cclass: a colon(:) expected\n"));
#endif
builder->errnum = ASE_AWK_EREXCOLON;
builder->errnum = ASE_REX_ECOLON;
return -1;
}
@ -787,7 +794,7 @@ static int build_cclass (builder_t* builder, ase_char_t* cc)
#ifdef DEBUG_REX
ase_dprintf (ASE_T("build_cclass: ] expected\n"));
#endif
builder->errnum = ASE_AWK_EREXRBRACKET;
builder->errnum = ASE_REX_ERBRACKET;
return -1;
}
@ -836,7 +843,7 @@ static int build_occurrences (builder_t* builder, code_t* cmd)
if (builder->ptn.curc.type != CT_SPECIAL ||
builder->ptn.curc.value != ASE_T('}'))
{
builder->errnum = ASE_AWK_EREXRBRACE;
builder->errnum = ASE_REX_ERBRACE;
return -1;
}
@ -886,7 +893,7 @@ what if it is not in the raight format? convert it to ordinary characters?? */
if (cmd->lbound > cmd->ubound)
{
/* invalid occurrences range */
builder->errnum = ASE_AWK_EREXBRANGE;
builder->errnum = ASE_REX_EBRANGE;
return -1;
}
@ -897,7 +904,7 @@ what if it is not in the raight format? convert it to ordinary characters?? */
do { \
if (builder->ptn.curp >= builder->ptn.end) \
{ \
builder->errnum = ASE_AWK_EREXEND; \
builder->errnum = ASE_REX_EEND; \
return -1; \
} \
} while(0)
@ -1079,29 +1086,29 @@ static int add_code (builder_t* builder, void* data, ase_size_t len)
if (capa == 0) capa = DEF_CODE_CAPA;
while (len > capa - builder->code.size) { capa = capa * 2; }
if (builder->awk->prmfns.mmgr.realloc != ASE_NULL)
if (builder->mmgr->realloc != ASE_NULL)
{
tmp = (ase_byte_t*) ASE_AWK_REALLOC (
builder->awk, builder->code.buf, capa);
tmp = (ase_byte_t*) ASE_REALLOC (
builder->mmgr, builder->code.buf, capa);
if (tmp == ASE_NULL)
{
builder->errnum = ASE_AWK_ENOMEM;
builder->errnum = ASE_REX_ENOMEM;
return -1;
}
}
else
{
tmp = (ase_byte_t*) ASE_AWK_MALLOC (builder->awk, capa);
tmp = (ase_byte_t*) ASE_MALLOC (builder->mmgr, capa);
if (tmp == ASE_NULL)
{
builder->errnum = ASE_AWK_ENOMEM;
builder->errnum = ASE_REX_ENOMEM;
return -1;
}
if (builder->code.buf != ASE_NULL)
{
ase_memcpy (tmp, builder->code.buf, builder->code.capa);
ASE_AWK_FREE (builder->awk, builder->code.buf);
ASE_FREE (builder->mmgr, builder->code.buf);
}
}
@ -1193,7 +1200,7 @@ static const ase_byte_t* match_branch_body (
if (matcher->depth.max > 0 && matcher->depth.cur >= matcher->depth.max)
{
matcher->errnum = ASE_AWK_EREXRECUR;
matcher->errnum = ASE_REX_ERECUR;
return ASE_NULL;
}
@ -1372,7 +1379,7 @@ static const ase_byte_t* match_ord_char (
ubound = cp->ubound;
cc = *(ase_char_t*)p; p += ASE_SIZEOF(cc);
if (matcher->ignorecase) cc = ASE_AWK_TOUPPER(matcher->awk, cc);
if (matcher->ignorecase) cc = ASE_TOUPPER(matcher->ccls, cc);
/* merge the same consecutive codes
* for example, a{1,10}a{0,10} is shortened to a{1,20} */
@ -1381,7 +1388,7 @@ static const ase_byte_t* match_ord_char (
while (p < mat->branch_end &&
cp->cmd == ((const code_t*)p)->cmd)
{
if (ASE_AWK_TOUPPER (matcher->awk, *(ase_char_t*)(p+ASE_SIZEOF(*cp))) != cc) break;
if (ASE_TOUPPER (matcher->ccls, *(ase_char_t*)(p+ASE_SIZEOF(*cp))) != cc) break;
lbound += ((const code_t*)p)->lbound;
ubound += ((const code_t*)p)->ubound;
@ -1423,7 +1430,7 @@ static const ase_byte_t* match_ord_char (
ASE_T("match_ord_char: <ignorecase> %c %c\n"),
cc, mat->match_ptr[si]);
#endif
if (cc != ASE_AWK_TOUPPER (matcher->awk, mat->match_ptr[si])) break;
if (cc != ASE_TOUPPER (matcher->ccls, mat->match_ptr[si])) break;
si++;
}
}
@ -1486,7 +1493,7 @@ static const ase_byte_t* match_charset (
if (&mat->match_ptr[si] >= matcher->match.str.end) break;
c = mat->match_ptr[si];
if (matcher->ignorecase) c = ASE_AWK_TOUPPER(matcher->awk, c);
if (matcher->ignorecase) c = ASE_TOUPPER(matcher->ccls, c);
n = __test_charset (matcher, p, cshdr->csc, c);
if (cp->negate) n = !n;
@ -1555,11 +1562,11 @@ static const ase_byte_t* match_group (
}
else
{
grp_len = (ase_size_t*) ASE_AWK_MALLOC (
matcher->awk, ASE_SIZEOF(ase_size_t) * cp->ubound);
grp_len = (ase_size_t*) ASE_MALLOC (
matcher->mmgr, ASE_SIZEOF(ase_size_t) * cp->ubound);
if (grp_len == ASE_NULL)
{
matcher->errnum = ASE_AWK_ENOMEM;
matcher->errnum = ASE_REX_ENOMEM;
return ASE_NULL;
}
}
@ -1574,7 +1581,7 @@ static const ase_byte_t* match_group (
if (match_pattern (matcher, p, &mat2) == ASE_NULL)
{
if (grp_len != grp_len_static)
ASE_AWK_FREE (matcher->awk, grp_len);
ASE_FREE (matcher->mmgr, grp_len);
return ASE_NULL;
}
if (!mat2.matched) break;
@ -1620,7 +1627,7 @@ static const ase_byte_t* match_group (
if (tmp == ASE_NULL)
{
if (grp_len != grp_len_static)
ASE_AWK_FREE (matcher->awk, grp_len);
ASE_FREE (matcher->mmgr, grp_len);
return ASE_NULL;
}
@ -1640,7 +1647,7 @@ static const ase_byte_t* match_group (
}
if (grp_len != grp_len_static) ASE_AWK_FREE (matcher->awk, grp_len);
if (grp_len != grp_len_static) ASE_FREE (matcher->mmgr, grp_len);
return p;
}
@ -1751,7 +1758,7 @@ static ase_bool_t __test_charset (
{
c1 = *(const ase_char_t*)p;
if (matcher->ignorecase)
c1 = ASE_AWK_TOUPPER(matcher->awk, c1);
c1 = ASE_TOUPPER(matcher->ccls, c1);
#ifdef DEBUG_REX
ase_dprintf (
ASE_T("match_charset: <one> %c %c\n"), c, c1);
@ -1766,8 +1773,8 @@ static ase_bool_t __test_charset (
if (matcher->ignorecase)
{
c1 = ASE_AWK_TOUPPER(matcher->awk, c1);
c2 = ASE_AWK_TOUPPER(matcher->awk, c2);
c1 = ASE_TOUPPER(matcher->ccls, c1);
c2 = ASE_TOUPPER(matcher->ccls, c2);
}
#ifdef DEBUG_REX
ase_dprintf (
@ -1784,7 +1791,7 @@ static ase_bool_t __test_charset (
c, __char_class[c1].name);
#endif
if (__char_class[c1].func (
matcher->awk, c)) return ase_true;
matcher->ccls, c)) return ase_true;
}
else
{
@ -1798,66 +1805,67 @@ static ase_bool_t __test_charset (
return ase_false;
}
static ase_bool_t cc_isalnum (ase_awk_t* awk, ase_char_t c)
static ase_bool_t cc_isalnum (ase_ccls_t* ccls, ase_char_t c)
{
return ASE_AWK_ISALNUM (awk, c);
return ASE_ISALNUM (ccls, c);
}
static ase_bool_t cc_isalpha (ase_awk_t* awk, ase_char_t c)
static ase_bool_t cc_isalpha (ase_ccls_t* ccls, ase_char_t c)
{
return ASE_AWK_ISALPHA (awk, c);
return ASE_ISALPHA (ccls, c);
}
static ase_bool_t cc_isblank (ase_awk_t* awk, ase_char_t c)
static ase_bool_t cc_isblank (ase_ccls_t* ccls, ase_char_t c)
{
return c == ASE_T(' ') || c == ASE_T('\t');
}
static ase_bool_t cc_iscntrl (ase_awk_t* awk, ase_char_t c)
static ase_bool_t cc_iscntrl (ase_ccls_t* ccls, ase_char_t c)
{
return ASE_AWK_ISCNTRL (awk, c);
return ASE_ISCNTRL (ccls, c);
}
static ase_bool_t cc_isdigit (ase_awk_t* awk, ase_char_t c)
static ase_bool_t cc_isdigit (ase_ccls_t* ccls, ase_char_t c)
{
return ASE_AWK_ISDIGIT (awk, c);
return ASE_ISDIGIT (ccls, c);
}
static ase_bool_t cc_isgraph (ase_awk_t* awk, ase_char_t c)
static ase_bool_t cc_isgraph (ase_ccls_t* ccls, ase_char_t c)
{
return ASE_AWK_ISGRAPH (awk, c);
return ASE_ISGRAPH (ccls, c);
}
static ase_bool_t cc_islower (ase_awk_t* awk, ase_char_t c)
static ase_bool_t cc_islower (ase_ccls_t* ccls, ase_char_t c)
{
return ASE_AWK_ISLOWER (awk, c);
return ASE_ISLOWER (ccls, c);
}
static ase_bool_t cc_isprint (ase_awk_t* awk, ase_char_t c)
static ase_bool_t cc_isprint (ase_ccls_t* ccls, ase_char_t c)
{
return ASE_AWK_ISPRINT (awk, c);
return ASE_ISPRINT (ccls, c);
}
static ase_bool_t cc_ispunct (ase_awk_t* awk, ase_char_t c)
static ase_bool_t cc_ispunct (ase_ccls_t* ccls, ase_char_t c)
{
return ASE_AWK_ISPUNCT (awk, c);
return ASE_ISPUNCT (ccls, c);
}
static ase_bool_t cc_isspace (ase_awk_t* awk, ase_char_t c)
static ase_bool_t cc_isspace (ase_ccls_t* ccls, ase_char_t c)
{
return ASE_AWK_ISSPACE (awk, c);
return ASE_ISSPACE (ccls, c);
}
static ase_bool_t cc_isupper (ase_awk_t* awk, ase_char_t c)
static ase_bool_t cc_isupper (ase_ccls_t* ccls, ase_char_t c)
{
return ASE_AWK_ISUPPER (awk, c);
return ASE_ISUPPER (ccls, c);
}
static ase_bool_t cc_isxdigit (ase_awk_t* awk, ase_char_t c)
static ase_bool_t cc_isxdigit (ase_ccls_t* ccls, ase_char_t c)
{
return ASE_AWK_ISXDIGIT (awk, c);
return ASE_ISXDIGIT (ccls, c);
}
#if 0
#define DPRINTF awk->prmfns.misc.dprintf
#define DCUSTOM awk->prmfns.misc.custom_data
@ -1995,3 +2003,4 @@ static const ase_byte_t* __print_atom (ase_awk_t* awk, const ase_byte_t* p)
return p;
}
#endif

View File

@ -4,12 +4,11 @@
* {License}
*/
#ifndef _ASE_AWK_REX_H_
#define _ASE_AWK_REX_H_
#ifndef _ASE_CMN_REX_H_
#define _ASE_CMN_REX_H_
#ifndef _ASE_AWK_AWK_H_
#error Never include this file directly. Include <ase/awk/awk.h> instead
#endif
#include <ase/cmn/types.h>
#include <ase/cmn/macros.h>
/*
* Regular Esseression Syntax
@ -39,34 +38,54 @@
* ab|xy -> |2|10|4|ORD_CHAR(no bound)|a|ORD_CHAR(no bound)|b|4|ORD_CHAR(no bound)|x|ORD_CHAR(no bound)|y|
*/
#define ASE_AWK_REX_NA(code) (*(ase_size_t*)(code))
#define ASE_REX_NA(code) (*(ase_size_t*)(code))
#define ASE_AWK_REX_LEN(code) \
#define ASE_REX_LEN(code) \
(*(ase_size_t*)((ase_byte_t*)(code)+ASE_SIZEOF(ase_size_t)))
enum ase_awk_rex_opt_t
enum ase_rex_option_t
{
ASE_AWK_REX_IGNORECASE = (1 << 0)
ASE_REX_IGNORECASE = (1 << 0)
};
enum ase_rex_errnum_t
{
ASE_REX_ENOERR = 0,
ASE_REX_ENOMEM,
ASE_REX_ERECUR, /* recursion too deep */
ASE_REX_ERPAREN, /* a right parenthesis is expected */
ASE_REX_ERBRACKET, /* a right bracket is expected */
ASE_REX_ERBRACE, /* a right brace is expected */
ASE_REX_EUNBALPAR, /* unbalanced parenthesis */
ASE_REX_ECOLON, /* a colon is expected */
ASE_REX_ECRANGE, /* invalid character range */
ASE_REX_ECCLASS, /* invalid character class */
ASE_REX_EBRANGE, /* invalid boundary range */
ASE_REX_EEND, /* unexpected end of the pattern */
ASE_REX_EGARBAGE /* garbage after the pattern */
};
#ifdef __cplusplus
extern "C" {
#endif
void* ase_awk_buildrex (
ase_awk_t* awk, const ase_char_t* ptn,
ase_size_t len, int* errnum);
void* ase_buildrex (
ase_mmgr_t* mmgr, ase_size_t depth,
const ase_char_t* ptn, ase_size_t len, int* errnum);
int ase_awk_matchrex (
ase_awk_t* awk, void* code, int option,
int ase_matchrex (
ase_mmgr_t* mmgr, ase_ccls_t* ccls, ase_size_t depth,
void* code, int option,
const ase_char_t* str, ase_size_t len,
const ase_char_t** match_ptr, ase_size_t* match_len, int* errnum);
void ase_awk_freerex (ase_awk_t* awk, void* code);
void ase_freerex (ase_mmgr_t* mmgr, void* code);
ase_bool_t ase_awk_isemptyrex (ase_awk_t* awk, void* code);
ase_bool_t ase_isemptyrex (void* code);
void ase_awk_dprintrex (ase_awk_t* awk, void* rex);
#if 0
void ase_dprintrex (ase_rex_t* rex, void* rex);
#endif
#ifdef __cplusplus
}