This commit is contained in:
parent
67b8fd9419
commit
0e989c619c
@ -10,12 +10,12 @@
|
|||||||
#include <ase/cmn/mem.h>
|
#include <ase/cmn/mem.h>
|
||||||
#include <ase/cmn/str.h>
|
#include <ase/cmn/str.h>
|
||||||
#include <ase/cmn/map.h>
|
#include <ase/cmn/map.h>
|
||||||
|
#include <ase/cmn/rex.h>
|
||||||
|
|
||||||
typedef struct ase_awk_chain_t ase_awk_chain_t;
|
typedef struct ase_awk_chain_t ase_awk_chain_t;
|
||||||
typedef struct ase_awk_tree_t ase_awk_tree_t;
|
typedef struct ase_awk_tree_t ase_awk_tree_t;
|
||||||
|
|
||||||
#include <ase/awk/awk.h>
|
#include <ase/awk/awk.h>
|
||||||
#include <ase/awk/rex.h>
|
|
||||||
#include <ase/awk/tree.h>
|
#include <ase/awk/tree.h>
|
||||||
#include <ase/awk/val.h>
|
#include <ase/awk/val.h>
|
||||||
#include <ase/awk/func.h>
|
#include <ase/awk/func.h>
|
||||||
@ -365,4 +365,12 @@ struct ase_awk_run_t
|
|||||||
ase_awk_runcbs_t* cbs;
|
ase_awk_runcbs_t* cbs;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#define ASE_AWK_FREEREX(awk,code) ase_freerex(&(awk)->prmfns.mmgr,code)
|
||||||
|
#define ASE_AWK_ISEMPTYREX(awk,code) ase_isemptyrex(code)
|
||||||
|
#define ASE_AWK_BUILDREX(awk,ptn,len,errnum) \
|
||||||
|
ase_awk_buildrex(awk,ptn,len,errnum)
|
||||||
|
#define ASE_AWK_MATCHREX(awk,code,option,str,len,match_ptr,match_len,errnum) \
|
||||||
|
ase_awk_matchrex(awk,code,option,str,len,match_ptr,match_len,errnum)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -260,9 +260,9 @@ int ase_awk_readextio (
|
|||||||
|
|
||||||
ASE_ASSERT (run->global.rs != ASE_NULL);
|
ASE_ASSERT (run->global.rs != ASE_NULL);
|
||||||
|
|
||||||
n = ase_awk_matchrex (
|
n = ASE_AWK_MATCHREX (
|
||||||
run->awk, run->global.rs,
|
run->awk, run->global.rs,
|
||||||
((run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0),
|
((run->global.ignorecase)? ASE_REX_IGNORECASE: 0),
|
||||||
ASE_STR_BUF(buf), ASE_STR_LEN(buf),
|
ASE_STR_BUF(buf), ASE_STR_LEN(buf),
|
||||||
&match_ptr, &match_len, &run->errnum);
|
&match_ptr, &match_len, &run->errnum);
|
||||||
if (n == -1)
|
if (n == -1)
|
||||||
@ -348,9 +348,9 @@ int ase_awk_readextio (
|
|||||||
|
|
||||||
ASE_ASSERT (run->global.rs != ASE_NULL);
|
ASE_ASSERT (run->global.rs != ASE_NULL);
|
||||||
|
|
||||||
n = ase_awk_matchrex (
|
n = ASE_AWK_MATCHREX (
|
||||||
run->awk, run->global.rs,
|
run->awk, run->global.rs,
|
||||||
((run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0),
|
((run->global.ignorecase)? ASE_REX_IGNORECASE: 0),
|
||||||
ASE_STR_BUF(buf), ASE_STR_LEN(buf),
|
ASE_STR_BUF(buf), ASE_STR_LEN(buf),
|
||||||
&match_ptr, &match_len, &run->errnum);
|
&match_ptr, &match_len, &run->errnum);
|
||||||
if (n == -1)
|
if (n == -1)
|
||||||
|
@ -702,7 +702,7 @@ static int bfn_split (
|
|||||||
|
|
||||||
if (fs_len > 1)
|
if (fs_len > 1)
|
||||||
{
|
{
|
||||||
fs_rex = ase_awk_buildrex (
|
fs_rex = ASE_AWK_BUILDREX (
|
||||||
run->awk, fs_ptr, fs_len, &errnum);
|
run->awk, fs_ptr, fs_len, &errnum);
|
||||||
if (fs_rex == ASE_NULL)
|
if (fs_rex == ASE_NULL)
|
||||||
{
|
{
|
||||||
@ -725,7 +725,7 @@ static int bfn_split (
|
|||||||
if (fs_free != ASE_NULL)
|
if (fs_free != ASE_NULL)
|
||||||
ASE_AWK_FREE (run->awk, fs_free);
|
ASE_AWK_FREE (run->awk, fs_free);
|
||||||
if (fs_rex_free != ASE_NULL)
|
if (fs_rex_free != ASE_NULL)
|
||||||
ase_awk_freerex (run->awk, fs_rex_free);
|
ASE_AWK_FREEREX (run->awk, fs_rex_free);
|
||||||
/*ase_awk_setrunerrnum (run, ASE_AWK_ENOMEM);*/
|
/*ase_awk_setrunerrnum (run, ASE_AWK_ENOMEM);*/
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -756,7 +756,7 @@ static int bfn_split (
|
|||||||
if (fs_free != ASE_NULL)
|
if (fs_free != ASE_NULL)
|
||||||
ASE_AWK_FREE (run->awk, fs_free);
|
ASE_AWK_FREE (run->awk, fs_free);
|
||||||
if (fs_rex_free != ASE_NULL)
|
if (fs_rex_free != ASE_NULL)
|
||||||
ase_awk_freerex (run->awk, fs_rex_free);
|
ASE_AWK_FREEREX (run->awk, fs_rex_free);
|
||||||
ase_awk_setrunerrnum (run, errnum);
|
ase_awk_setrunerrnum (run, errnum);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -780,7 +780,7 @@ static int bfn_split (
|
|||||||
if (fs_free != ASE_NULL)
|
if (fs_free != ASE_NULL)
|
||||||
ASE_AWK_FREE (run->awk, fs_free);
|
ASE_AWK_FREE (run->awk, fs_free);
|
||||||
if (fs_rex_free != ASE_NULL)
|
if (fs_rex_free != ASE_NULL)
|
||||||
ase_awk_freerex (run->awk, fs_rex_free);
|
ASE_AWK_FREEREX (run->awk, fs_rex_free);
|
||||||
/*ase_awk_setrunerrnum (run, ASE_AWK_ENOMEM);*/
|
/*ase_awk_setrunerrnum (run, ASE_AWK_ENOMEM);*/
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -807,7 +807,7 @@ static int bfn_split (
|
|||||||
if (fs_free != ASE_NULL)
|
if (fs_free != ASE_NULL)
|
||||||
ASE_AWK_FREE (run->awk, fs_free);
|
ASE_AWK_FREE (run->awk, fs_free);
|
||||||
if (fs_rex_free != ASE_NULL)
|
if (fs_rex_free != ASE_NULL)
|
||||||
ase_awk_freerex (run->awk, fs_rex_free);
|
ASE_AWK_FREEREX (run->awk, fs_rex_free);
|
||||||
ase_awk_setrunerrnum (run, ASE_AWK_ENOMEM);
|
ase_awk_setrunerrnum (run, ASE_AWK_ENOMEM);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -818,7 +818,7 @@ static int bfn_split (
|
|||||||
|
|
||||||
if (str_free != ASE_NULL) ASE_AWK_FREE (run->awk, str_free);
|
if (str_free != ASE_NULL) ASE_AWK_FREE (run->awk, str_free);
|
||||||
if (fs_free != ASE_NULL) ASE_AWK_FREE (run->awk, fs_free);
|
if (fs_free != ASE_NULL) ASE_AWK_FREE (run->awk, fs_free);
|
||||||
if (fs_rex_free != ASE_NULL) ase_awk_freerex (run->awk, fs_rex_free);
|
if (fs_rex_free != ASE_NULL) ASE_AWK_FREEREX (run->awk, fs_rex_free);
|
||||||
|
|
||||||
if (sta == 1) num--;
|
if (sta == 1) num--;
|
||||||
|
|
||||||
@ -946,7 +946,7 @@ static int __substitute (ase_awk_run_t* run, ase_long_t max_count)
|
|||||||
} while (0)
|
} while (0)
|
||||||
#define FREE_A0_REX(awk,rex) \
|
#define FREE_A0_REX(awk,rex) \
|
||||||
do { \
|
do { \
|
||||||
if (a0->type != ASE_AWK_VAL_REX) ase_awk_freerex (awk, rex); \
|
if (a0->type != ASE_AWK_VAL_REX) ASE_AWK_FREEREX (awk, rex); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
if (a0->type == ASE_AWK_VAL_REX)
|
if (a0->type == ASE_AWK_VAL_REX)
|
||||||
@ -1053,7 +1053,7 @@ static int __substitute (ase_awk_run_t* run, ase_long_t max_count)
|
|||||||
|
|
||||||
if (a0->type != ASE_AWK_VAL_REX)
|
if (a0->type != ASE_AWK_VAL_REX)
|
||||||
{
|
{
|
||||||
rex = ase_awk_buildrex (run->awk, a0_ptr, a0_len, &run->errnum);
|
rex = ASE_AWK_BUILDREX (run->awk, a0_ptr, a0_len, &run->errnum);
|
||||||
if (rex == ASE_NULL)
|
if (rex == ASE_NULL)
|
||||||
{
|
{
|
||||||
ase_str_close (&new);
|
ase_str_close (&new);
|
||||||
@ -1062,7 +1062,7 @@ static int __substitute (ase_awk_run_t* run, ase_long_t max_count)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
opt = (run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0;
|
opt = (run->global.ignorecase)? ASE_REX_IGNORECASE: 0;
|
||||||
cur_ptr = a2_ptr;
|
cur_ptr = a2_ptr;
|
||||||
cur_len = a2_len;
|
cur_len = a2_len;
|
||||||
sub_count = 0;
|
sub_count = 0;
|
||||||
@ -1071,7 +1071,7 @@ static int __substitute (ase_awk_run_t* run, ase_long_t max_count)
|
|||||||
{
|
{
|
||||||
if (max_count == 0 || sub_count < max_count)
|
if (max_count == 0 || sub_count < max_count)
|
||||||
{
|
{
|
||||||
n = ase_awk_matchrex (
|
n = ASE_AWK_MATCHREX (
|
||||||
run->awk, rex, opt, cur_ptr, cur_len,
|
run->awk, rex, opt, cur_ptr, cur_len,
|
||||||
&mat_ptr, &mat_len, &run->errnum);
|
&mat_ptr, &mat_len, &run->errnum);
|
||||||
}
|
}
|
||||||
@ -1270,7 +1270,7 @@ static int bfn_match (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
rex = ase_awk_buildrex (run->awk, str1, len1, &run->errnum);
|
rex = ASE_AWK_BUILDREX (run->awk, str1, len1, &run->errnum);
|
||||||
if (rex == ASE_NULL)
|
if (rex == ASE_NULL)
|
||||||
{
|
{
|
||||||
if (a0->type != ASE_AWK_VAL_STR)
|
if (a0->type != ASE_AWK_VAL_STR)
|
||||||
@ -1281,13 +1281,13 @@ static int bfn_match (
|
|||||||
if (a1->type != ASE_AWK_VAL_STR) ASE_AWK_FREE (run->awk, str1);
|
if (a1->type != ASE_AWK_VAL_STR) ASE_AWK_FREE (run->awk, str1);
|
||||||
}
|
}
|
||||||
|
|
||||||
opt = (run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0;
|
opt = (run->global.ignorecase)? ASE_REX_IGNORECASE: 0;
|
||||||
n = ase_awk_matchrex (
|
n = ASE_AWK_MATCHREX (
|
||||||
run->awk, rex, opt, str0, len0,
|
run->awk, rex, opt, str0, len0,
|
||||||
&mat_ptr, &mat_len, &run->errnum);
|
&mat_ptr, &mat_len, &run->errnum);
|
||||||
|
|
||||||
if (a0->type != ASE_AWK_VAL_STR) ASE_AWK_FREE (run->awk, str0);
|
if (a0->type != ASE_AWK_VAL_STR) ASE_AWK_FREE (run->awk, str0);
|
||||||
if (a1->type != ASE_AWK_VAL_REX) ase_awk_freerex (run->awk, rex);
|
if (a1->type != ASE_AWK_VAL_REX) ASE_AWK_FREEREX (run->awk, rex);
|
||||||
|
|
||||||
if (n == -1) return -1;
|
if (n == -1) return -1;
|
||||||
|
|
||||||
|
@ -52,8 +52,7 @@ OBJ_FILES_LIB = \
|
|||||||
$(TMP_DIR)/val.o \
|
$(TMP_DIR)/val.o \
|
||||||
$(TMP_DIR)/func.o \
|
$(TMP_DIR)/func.o \
|
||||||
$(TMP_DIR)/misc.o \
|
$(TMP_DIR)/misc.o \
|
||||||
$(TMP_DIR)/extio.o \
|
$(TMP_DIR)/extio.o
|
||||||
$(TMP_DIR)/rex.o
|
|
||||||
|
|
||||||
OBJ_FILES_JNI = $(TMP_DIR)/jni.o
|
OBJ_FILES_JNI = $(TMP_DIR)/jni.o
|
||||||
|
|
||||||
@ -150,9 +149,6 @@ $(TMP_DIR)/misc.o: misc.c
|
|||||||
$(TMP_DIR)/extio.o: extio.c
|
$(TMP_DIR)/extio.o: extio.c
|
||||||
$(LIBTOOL_COMPILE) $(CC) $(CFLAGS) -o $@ -c extio.c
|
$(LIBTOOL_COMPILE) $(CC) $(CFLAGS) -o $@ -c extio.c
|
||||||
|
|
||||||
$(TMP_DIR)/rex.o: rex.c
|
|
||||||
$(LIBTOOL_COMPILE) $(CC) $(CFLAGS) -o $@ -c rex.c
|
|
||||||
|
|
||||||
$(TMP_DIR)/jni.o: jni.c
|
$(TMP_DIR)/jni.o: jni.c
|
||||||
$(LIBTOOL_COMPILE) $(CC) $(CFLAGS) $(CFLAGS_JNI) -o $@ -c jni.c
|
$(LIBTOOL_COMPILE) $(CC) $(CFLAGS) $(CFLAGS_JNI) -o $@ -c jni.c
|
||||||
|
|
||||||
|
@ -828,9 +828,9 @@ ase_char_t* ase_awk_strxntokbyrex (
|
|||||||
|
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
n = ase_awk_matchrex (
|
n = ASE_AWK_MATCHREX (
|
||||||
run->awk, rex,
|
run->awk, rex,
|
||||||
((run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0),
|
((run->global.ignorecase)? ASE_REX_IGNORECASE: 0),
|
||||||
ptr, left, (const ase_char_t**)&match_ptr, &match_len,
|
ptr, left, (const ase_char_t**)&match_ptr, &match_len,
|
||||||
errnum);
|
errnum);
|
||||||
if (n == -1) return ASE_NULL;
|
if (n == -1) return ASE_NULL;
|
||||||
@ -908,3 +908,45 @@ exit_loop:
|
|||||||
ASE_NULL: (match_ptr+match_len);
|
ASE_NULL: (match_ptr+match_len);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define ASE_AWK_REXERRTOERR(err) \
|
||||||
|
((err == ASE_REX_ENOERR)? ASE_AWK_ENOERR: \
|
||||||
|
(err == ASE_REX_ENOMEM)? ASE_AWK_ENOMEM: \
|
||||||
|
(err == ASE_REX_ERECUR)? ASE_AWK_EREXRECUR: \
|
||||||
|
(err == ASE_REX_ERPAREN)? ASE_AWK_EREXRPAREN: \
|
||||||
|
(err == ASE_REX_ERBRACKET)? ASE_AWK_EREXRBRACKET: \
|
||||||
|
(err == ASE_REX_ERBRACE)? ASE_AWK_EREXRBRACE: \
|
||||||
|
(err == ASE_REX_EUNBALPAR)? ASE_AWK_EREXUNBALPAR: \
|
||||||
|
(err == ASE_REX_ECOLON)? ASE_AWK_EREXCOLON: \
|
||||||
|
(err == ASE_REX_ECRANGE)? ASE_AWK_EREXCRANGE: \
|
||||||
|
(err == ASE_REX_ECCLASS)? ASE_AWK_EREXCCLASS: \
|
||||||
|
(err == ASE_REX_EBRANGE)? ASE_AWK_EREXBRANGE: \
|
||||||
|
(err == ASE_REX_EEND)? ASE_AWK_EREXEND: \
|
||||||
|
(err == ASE_REX_EGARBAGE)? ASE_AWK_EREXGARBAGE: \
|
||||||
|
ASE_AWK_EINTERN)
|
||||||
|
|
||||||
|
void* ase_awk_buildrex (
|
||||||
|
ase_awk_t* awk, const ase_char_t* ptn, ase_size_t len, int* errnum)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
void* p;
|
||||||
|
|
||||||
|
p = ase_buildrex (
|
||||||
|
&awk->prmfns.mmgr, awk->rex.depth.max.build, ptn, len, &err);
|
||||||
|
if (p == ASE_NULL) *errnum = ASE_AWK_REXERRTOERR(err);
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ase_awk_matchrex (
|
||||||
|
ase_awk_t* awk, void* code, int option,
|
||||||
|
const ase_char_t* str, ase_size_t len,
|
||||||
|
const ase_char_t** match_ptr, ase_size_t* match_len, int* errnum)
|
||||||
|
{
|
||||||
|
int err, x;
|
||||||
|
|
||||||
|
x = ase_matchrex (
|
||||||
|
&awk->prmfns.mmgr, &awk->prmfns.ccls, awk->rex.depth.max.match,
|
||||||
|
code, option, str, len, match_ptr, match_len, &err);
|
||||||
|
if (x < 0) *errnum = ASE_AWK_REXERRTOERR(err);
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
@ -37,6 +37,15 @@ ase_char_t* ase_awk_strxntokbyrex (
|
|||||||
ase_awk_run_t* run, const ase_char_t* s, ase_size_t len,
|
ase_awk_run_t* run, const ase_char_t* s, ase_size_t len,
|
||||||
void* rex, ase_char_t** tok, ase_size_t* tok_len, int* errnum);
|
void* rex, ase_char_t** tok, ase_size_t* tok_len, int* errnum);
|
||||||
|
|
||||||
|
|
||||||
|
void* ase_awk_buildrex (
|
||||||
|
ase_awk_t* awk, const ase_char_t* ptn, ase_size_t len, int* errnum);
|
||||||
|
|
||||||
|
int ase_awk_matchrex (
|
||||||
|
ase_awk_t* awk, void* code, int option,
|
||||||
|
const ase_char_t* str, ase_size_t len,
|
||||||
|
const ase_char_t** match_ptr, ase_size_t* match_len, int* errnum);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -2868,7 +2868,7 @@ static ase_awk_nde_t* parse_primary (ase_awk_t* awk, ase_size_t line)
|
|||||||
return ASE_NULL;
|
return ASE_NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
nde->code = ase_awk_buildrex (awk,
|
nde->code = ASE_AWK_BUILDREX (awk,
|
||||||
ASE_STR_BUF(&awk->token.name),
|
ASE_STR_BUF(&awk->token.name),
|
||||||
ASE_STR_LEN(&awk->token.name),
|
ASE_STR_LEN(&awk->token.name),
|
||||||
&errnum);
|
&errnum);
|
||||||
|
@ -354,7 +354,7 @@ static int set_global (
|
|||||||
|
|
||||||
/* compile the regular expression */
|
/* compile the regular expression */
|
||||||
/* TODO: use safebuild */
|
/* TODO: use safebuild */
|
||||||
rex = ase_awk_buildrex (
|
rex = ASE_AWK_BUILDREX (
|
||||||
run->awk, fs_ptr, fs_len, &run->errnum);
|
run->awk, fs_ptr, fs_len, &run->errnum);
|
||||||
if (rex == ASE_NULL)
|
if (rex == ASE_NULL)
|
||||||
{
|
{
|
||||||
@ -365,7 +365,7 @@ static int set_global (
|
|||||||
|
|
||||||
if (run->global.fs != ASE_NULL)
|
if (run->global.fs != ASE_NULL)
|
||||||
{
|
{
|
||||||
ase_awk_freerex (run->awk, run->global.fs);
|
ASE_AWK_FREEREX (run->awk, run->global.fs);
|
||||||
}
|
}
|
||||||
run->global.fs = rex;
|
run->global.fs = rex;
|
||||||
}
|
}
|
||||||
@ -499,7 +499,7 @@ static int set_global (
|
|||||||
|
|
||||||
/* compile the regular expression */
|
/* compile the regular expression */
|
||||||
/* TODO: use safebuild */
|
/* TODO: use safebuild */
|
||||||
rex = ase_awk_buildrex (
|
rex = ASE_AWK_BUILDREX (
|
||||||
run->awk, rs_ptr, rs_len, &run->errnum);
|
run->awk, rs_ptr, rs_len, &run->errnum);
|
||||||
if (rex == ASE_NULL)
|
if (rex == ASE_NULL)
|
||||||
{
|
{
|
||||||
@ -510,7 +510,7 @@ static int set_global (
|
|||||||
|
|
||||||
if (run->global.rs != ASE_NULL)
|
if (run->global.rs != ASE_NULL)
|
||||||
{
|
{
|
||||||
ase_awk_freerex (run->awk, run->global.rs);
|
ASE_AWK_FREEREX (run->awk, run->global.rs);
|
||||||
}
|
}
|
||||||
run->global.rs = rex;
|
run->global.rs = rex;
|
||||||
}
|
}
|
||||||
@ -3079,8 +3079,7 @@ static ase_awk_val_t* eval_expression (ase_awk_run_t* run, ase_awk_nde_t* nde)
|
|||||||
/* the record has never been read.
|
/* the record has never been read.
|
||||||
* probably, this functions has been triggered
|
* probably, this functions has been triggered
|
||||||
* by the statements in the BEGIN block */
|
* by the statements in the BEGIN block */
|
||||||
n = ase_awk_isemptyrex (
|
n = ASE_AWK_ISEMPTYREX(run->awk,((ase_awk_val_rex_t*)v)->code)? 1: 0;
|
||||||
run->awk, ((ase_awk_val_rex_t*)v)->code)? 1: 0;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -3088,10 +3087,10 @@ static ase_awk_val_t* eval_expression (ase_awk_run_t* run, ase_awk_nde_t* nde)
|
|||||||
run->inrec.d0->type == ASE_AWK_VAL_STR,
|
run->inrec.d0->type == ASE_AWK_VAL_STR,
|
||||||
"the internal value representing $0 should always be of the string type once it has been set/updated. it is nil initially.");
|
"the internal value representing $0 should always be of the string type once it has been set/updated. it is nil initially.");
|
||||||
|
|
||||||
n = ase_awk_matchrex (
|
n = ASE_AWK_MATCHREX (
|
||||||
((ase_awk_run_t*)run)->awk,
|
((ase_awk_run_t*)run)->awk,
|
||||||
((ase_awk_val_rex_t*)v)->code,
|
((ase_awk_val_rex_t*)v)->code,
|
||||||
((((ase_awk_run_t*)run)->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0),
|
((((ase_awk_run_t*)run)->global.ignorecase)? ASE_REX_IGNORECASE: 0),
|
||||||
((ase_awk_val_str_t*)run->inrec.d0)->buf,
|
((ase_awk_val_str_t*)run->inrec.d0)->buf,
|
||||||
((ase_awk_val_str_t*)run->inrec.d0)->len,
|
((ase_awk_val_str_t*)run->inrec.d0)->len,
|
||||||
ASE_NULL, ASE_NULL, &errnum);
|
ASE_NULL, ASE_NULL, &errnum);
|
||||||
@ -4723,7 +4722,7 @@ static ase_awk_val_t* eval_binop_match0 (
|
|||||||
}
|
}
|
||||||
else if (right->type == ASE_AWK_VAL_STR)
|
else if (right->type == ASE_AWK_VAL_STR)
|
||||||
{
|
{
|
||||||
rex_code = ase_awk_buildrex (
|
rex_code = ASE_AWK_BUILDREX (
|
||||||
run->awk,
|
run->awk,
|
||||||
((ase_awk_val_str_t*)right)->buf,
|
((ase_awk_val_str_t*)right)->buf,
|
||||||
((ase_awk_val_str_t*)right)->len, &errnum);
|
((ase_awk_val_str_t*)right)->len, &errnum);
|
||||||
@ -4739,7 +4738,7 @@ static ase_awk_val_t* eval_binop_match0 (
|
|||||||
run, right, ASE_AWK_VALTOSTR_CLEAR, ASE_NULL, &len);
|
run, right, ASE_AWK_VALTOSTR_CLEAR, ASE_NULL, &len);
|
||||||
if (str == ASE_NULL) return ASE_NULL;
|
if (str == ASE_NULL) return ASE_NULL;
|
||||||
|
|
||||||
rex_code = ase_awk_buildrex (run->awk, str, len, &errnum);
|
rex_code = ASE_AWK_BUILDREX (run->awk, str, len, &errnum);
|
||||||
if (rex_code == ASE_NULL)
|
if (rex_code == ASE_NULL)
|
||||||
{
|
{
|
||||||
ASE_AWK_FREE (run->awk, str);
|
ASE_AWK_FREE (run->awk, str);
|
||||||
@ -4753,9 +4752,9 @@ static ase_awk_val_t* eval_binop_match0 (
|
|||||||
|
|
||||||
if (left->type == ASE_AWK_VAL_STR)
|
if (left->type == ASE_AWK_VAL_STR)
|
||||||
{
|
{
|
||||||
n = ase_awk_matchrex (
|
n = ASE_AWK_MATCHREX (
|
||||||
run->awk, rex_code,
|
run->awk, rex_code,
|
||||||
((run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0),
|
((run->global.ignorecase)? ASE_REX_IGNORECASE: 0),
|
||||||
((ase_awk_val_str_t*)left)->buf,
|
((ase_awk_val_str_t*)left)->buf,
|
||||||
((ase_awk_val_str_t*)left)->len,
|
((ase_awk_val_str_t*)left)->len,
|
||||||
ASE_NULL, ASE_NULL, &errnum);
|
ASE_NULL, ASE_NULL, &errnum);
|
||||||
@ -4790,9 +4789,9 @@ static ase_awk_val_t* eval_binop_match0 (
|
|||||||
return ASE_NULL;
|
return ASE_NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
n = ase_awk_matchrex (
|
n = ASE_AWK_MATCHREX (
|
||||||
run->awk, rex_code,
|
run->awk, rex_code,
|
||||||
((run->global.ignorecase)? ASE_AWK_REX_IGNORECASE: 0),
|
((run->global.ignorecase)? ASE_REX_IGNORECASE: 0),
|
||||||
str, len, ASE_NULL, ASE_NULL, &errnum);
|
str, len, ASE_NULL, ASE_NULL, &errnum);
|
||||||
if (n == -1)
|
if (n == -1)
|
||||||
{
|
{
|
||||||
|
@ -320,7 +320,7 @@ ase_awk_val_t* ase_awk_makerexval (
|
|||||||
|
|
||||||
val = (ase_awk_val_rex_t*) ASE_AWK_MALLOC (
|
val = (ase_awk_val_rex_t*) ASE_AWK_MALLOC (
|
||||||
run->awk, ASE_SIZEOF(ase_awk_val_rex_t) +
|
run->awk, ASE_SIZEOF(ase_awk_val_rex_t) +
|
||||||
(ASE_SIZEOF(*buf)*len+1) + ASE_AWK_REX_LEN(code));
|
(ASE_SIZEOF(*buf)*len+1) + ASE_REX_LEN(code));
|
||||||
if (val == ASE_NULL) return ASE_NULL;
|
if (val == ASE_NULL) return ASE_NULL;
|
||||||
|
|
||||||
val->type = ASE_AWK_VAL_REX;
|
val->type = ASE_AWK_VAL_REX;
|
||||||
@ -338,7 +338,7 @@ ase_awk_val_t* ase_awk_makerexval (
|
|||||||
ase_strncpy (val->buf, buf, len);
|
ase_strncpy (val->buf, buf, len);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
val->code = ASE_AWK_MALLOC (run->awk, ASE_AWK_REX_LEN(code));
|
val->code = ASE_AWK_MALLOC (run->awk, ASE_REX_LEN(code));
|
||||||
if (val->code == ASE_NULL)
|
if (val->code == ASE_NULL)
|
||||||
{
|
{
|
||||||
ASE_AWK_FREE (run->awk, val->buf);
|
ASE_AWK_FREE (run->awk, val->buf);
|
||||||
@ -348,7 +348,7 @@ ase_awk_val_t* ase_awk_makerexval (
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
val->code = val->buf + len + 1;
|
val->code = val->buf + len + 1;
|
||||||
ase_memcpy (val->code, code, ASE_AWK_REX_LEN(code));
|
ase_memcpy (val->code, code, ASE_REX_LEN(code));
|
||||||
|
|
||||||
return (ase_awk_val_t*)val;
|
return (ase_awk_val_t*)val;
|
||||||
}
|
}
|
||||||
@ -506,7 +506,7 @@ void ase_awk_freeval (ase_awk_run_t* run, ase_awk_val_t* val, ase_bool_t cache)
|
|||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
ASE_AWK_FREE (run->awk, ((ase_awk_val_rex_t*)val)->buf);
|
ASE_AWK_FREE (run->awk, ((ase_awk_val_rex_t*)val)->buf);
|
||||||
ase_awk_freerex (run->awk, ((ase_awk_val_rex_t*)val)->code);
|
ASE_AWK_FREEREX (run->awk, ((ase_awk_val_rex_t*)val)->code);
|
||||||
*/
|
*/
|
||||||
ASE_AWK_FREE (run->awk, val);
|
ASE_AWK_FREE (run->awk, val);
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,7 @@ OBJ_FILES = \
|
|||||||
$(TMP_DIR)/mem.o \
|
$(TMP_DIR)/mem.o \
|
||||||
$(TMP_DIR)/str.o \
|
$(TMP_DIR)/str.o \
|
||||||
$(TMP_DIR)/map.o \
|
$(TMP_DIR)/map.o \
|
||||||
|
$(TMP_DIR)/rex.o \
|
||||||
$(TMP_DIR)/misc.o
|
$(TMP_DIR)/misc.o
|
||||||
|
|
||||||
lib: $(OUT_FILE)
|
lib: $(OUT_FILE)
|
||||||
@ -39,6 +40,9 @@ $(TMP_DIR)/str.o: str.c
|
|||||||
$(TMP_DIR)/map.o: map.c
|
$(TMP_DIR)/map.o: map.c
|
||||||
$(CC) $(CFLAGS) -o $@ -c map.c
|
$(CC) $(CFLAGS) -o $@ -c map.c
|
||||||
|
|
||||||
|
$(TMP_DIR)/rex.o: rex.c
|
||||||
|
$(CC) $(CFLAGS) -o $@ -c rex.c
|
||||||
|
|
||||||
$(TMP_DIR)/misc.o: misc.c
|
$(TMP_DIR)/misc.o: misc.c
|
||||||
$(CC) $(CFLAGS) -o $@ -c misc.c
|
$(CC) $(CFLAGS) -o $@ -c misc.c
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
* {License}
|
* {License}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <ase/awk/awk_i.h>
|
#include <ase/cmn/rex.h>
|
||||||
|
|
||||||
#ifdef DEBUG_REX
|
#ifdef DEBUG_REX
|
||||||
#include <ase/utl/stdio.h>
|
#include <ase/utl/stdio.h>
|
||||||
@ -64,7 +64,7 @@ typedef struct cshdr_t cshdr_t;
|
|||||||
|
|
||||||
struct builder_t
|
struct builder_t
|
||||||
{
|
{
|
||||||
ase_awk_t* awk;
|
ase_mmgr_t* mmgr;
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
@ -97,7 +97,8 @@ struct builder_t
|
|||||||
|
|
||||||
struct matcher_t
|
struct matcher_t
|
||||||
{
|
{
|
||||||
ase_awk_t* awk;
|
ase_mmgr_t* mmgr;
|
||||||
|
ase_ccls_t* ccls;
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
@ -214,28 +215,31 @@ static const ase_byte_t* match_occurrences (
|
|||||||
static ase_bool_t __test_charset (
|
static ase_bool_t __test_charset (
|
||||||
matcher_t* matcher, const ase_byte_t* p, ase_size_t csc, ase_char_t c);
|
matcher_t* matcher, const ase_byte_t* p, ase_size_t csc, ase_char_t c);
|
||||||
|
|
||||||
static ase_bool_t cc_isalnum (ase_awk_t* awk, ase_char_t c);
|
static ase_bool_t cc_isalnum (ase_ccls_t* ccls, ase_char_t c);
|
||||||
static ase_bool_t cc_isalpha (ase_awk_t* awk, ase_char_t c);
|
static ase_bool_t cc_isalpha (ase_ccls_t* ccls, ase_char_t c);
|
||||||
static ase_bool_t cc_isblank (ase_awk_t* awk, ase_char_t c);
|
static ase_bool_t cc_isblank (ase_ccls_t* ccls, ase_char_t c);
|
||||||
static ase_bool_t cc_iscntrl (ase_awk_t* awk, ase_char_t c);
|
static ase_bool_t cc_iscntrl (ase_ccls_t* ccls, ase_char_t c);
|
||||||
static ase_bool_t cc_isdigit (ase_awk_t* awk, ase_char_t c);
|
static ase_bool_t cc_isdigit (ase_ccls_t* ccls, ase_char_t c);
|
||||||
static ase_bool_t cc_isgraph (ase_awk_t* awk, ase_char_t c);
|
static ase_bool_t cc_isgraph (ase_ccls_t* ccls, ase_char_t c);
|
||||||
static ase_bool_t cc_islower (ase_awk_t* awk, ase_char_t c);
|
static ase_bool_t cc_islower (ase_ccls_t* ccls, ase_char_t c);
|
||||||
static ase_bool_t cc_isprint (ase_awk_t* awk, ase_char_t c);
|
static ase_bool_t cc_isprint (ase_ccls_t* ccls, ase_char_t c);
|
||||||
static ase_bool_t cc_ispunct (ase_awk_t* awk, ase_char_t c);
|
static ase_bool_t cc_ispunct (ase_ccls_t* ccls, ase_char_t c);
|
||||||
static ase_bool_t cc_isspace (ase_awk_t* awk, ase_char_t c);
|
static ase_bool_t cc_isspace (ase_ccls_t* ccls, ase_char_t c);
|
||||||
static ase_bool_t cc_isupper (ase_awk_t* awk, ase_char_t c);
|
static ase_bool_t cc_isupper (ase_ccls_t* ccls, ase_char_t c);
|
||||||
static ase_bool_t cc_isxdigit (ase_awk_t* awk, ase_char_t c);
|
static ase_bool_t cc_isxdigit (ase_ccls_t* ccls, ase_char_t c);
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
XXX
|
||||||
static const ase_byte_t* __print_pattern (ase_awk_t* awk, const ase_byte_t* p);
|
static const ase_byte_t* __print_pattern (ase_awk_t* awk, const ase_byte_t* p);
|
||||||
static const ase_byte_t* __print_branch (ase_awk_t* awk, const ase_byte_t* p);
|
static const ase_byte_t* __print_branch (ase_awk_t* awk, const ase_byte_t* p);
|
||||||
static const ase_byte_t* __print_atom (ase_awk_t* awk, const ase_byte_t* p);
|
static const ase_byte_t* __print_atom (ase_awk_t* awk, const ase_byte_t* p);
|
||||||
|
#endif
|
||||||
|
|
||||||
struct __char_class_t
|
struct __char_class_t
|
||||||
{
|
{
|
||||||
const ase_char_t* name;
|
const ase_char_t* name;
|
||||||
ase_size_t name_len;
|
ase_size_t name_len;
|
||||||
ase_bool_t (*func) (ase_awk_t* awk, ase_char_t c);
|
ase_bool_t (*func) (ase_ccls_t* ccls, ase_char_t c);
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct __char_class_t __char_class[] =
|
static struct __char_class_t __char_class[] =
|
||||||
@ -265,19 +269,20 @@ static struct __char_class_t __char_class[] =
|
|||||||
{ ASE_NULL, 0, ASE_NULL }
|
{ ASE_NULL, 0, ASE_NULL }
|
||||||
};
|
};
|
||||||
|
|
||||||
void* ase_awk_buildrex (
|
void* ase_buildrex (
|
||||||
ase_awk_t* awk, const ase_char_t* ptn, ase_size_t len, int* errnum)
|
ase_mmgr_t* mmgr, ase_size_t depth,
|
||||||
|
const ase_char_t* ptn, ase_size_t len, int* errnum)
|
||||||
{
|
{
|
||||||
builder_t builder;
|
builder_t builder;
|
||||||
|
|
||||||
builder.awk = awk;
|
builder.mmgr = mmgr;
|
||||||
builder.code.capa = DEF_CODE_CAPA;
|
builder.code.capa = DEF_CODE_CAPA;
|
||||||
builder.code.size = 0;
|
builder.code.size = 0;
|
||||||
builder.code.buf = (ase_byte_t*)
|
builder.code.buf = (ase_byte_t*)
|
||||||
ASE_AWK_MALLOC (builder.awk, builder.code.capa);
|
ASE_MALLOC (builder.mmgr, builder.code.capa);
|
||||||
if (builder.code.buf == ASE_NULL)
|
if (builder.code.buf == ASE_NULL)
|
||||||
{
|
{
|
||||||
*errnum = ASE_AWK_ENOMEM;
|
*errnum = ASE_REX_ENOMEM;
|
||||||
return ASE_NULL;
|
return ASE_NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -289,20 +294,20 @@ void* ase_awk_buildrex (
|
|||||||
builder.ptn.curc.value = ASE_T('\0');
|
builder.ptn.curc.value = ASE_T('\0');
|
||||||
builder.ptn.curc.escaped = ase_false;
|
builder.ptn.curc.escaped = ase_false;
|
||||||
|
|
||||||
builder.depth.max = awk->rex.depth.max.build;
|
builder.depth.max = depth;
|
||||||
builder.depth.cur = 0;
|
builder.depth.cur = 0;
|
||||||
|
|
||||||
if (next_char (&builder, LEVEL_TOP) == -1)
|
if (next_char (&builder, LEVEL_TOP) == -1)
|
||||||
{
|
{
|
||||||
if (errnum != ASE_NULL) *errnum = builder.errnum;
|
if (errnum != ASE_NULL) *errnum = builder.errnum;
|
||||||
ASE_AWK_FREE (builder.awk, builder.code.buf);
|
ASE_FREE (builder.mmgr, builder.code.buf);
|
||||||
return ASE_NULL;
|
return ASE_NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (build_pattern (&builder) == -1)
|
if (build_pattern (&builder) == -1)
|
||||||
{
|
{
|
||||||
if (errnum != ASE_NULL) *errnum = builder.errnum;
|
if (errnum != ASE_NULL) *errnum = builder.errnum;
|
||||||
ASE_AWK_FREE (builder.awk, builder.code.buf);
|
ASE_FREE (builder.mmgr, builder.code.buf);
|
||||||
return ASE_NULL;
|
return ASE_NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -313,23 +318,24 @@ void* ase_awk_buildrex (
|
|||||||
if (builder.ptn.curc.type == CT_SPECIAL &&
|
if (builder.ptn.curc.type == CT_SPECIAL &&
|
||||||
builder.ptn.curc.value == ASE_T(')'))
|
builder.ptn.curc.value == ASE_T(')'))
|
||||||
{
|
{
|
||||||
*errnum = ASE_AWK_EREXUNBALPAR;
|
*errnum = ASE_REX_EUNBALPAR;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
*errnum = ASE_AWK_EREXGARBAGE;
|
*errnum = ASE_REX_EGARBAGE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ASE_AWK_FREE (builder.awk, builder.code.buf);
|
ASE_FREE (builder.mmgr, builder.code.buf);
|
||||||
return ASE_NULL;
|
return ASE_NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
return builder.code.buf;
|
return builder.code.buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ase_awk_matchrex (
|
int ase_matchrex (
|
||||||
ase_awk_t* awk, void* code, int option,
|
ase_mmgr_t* mmgr, ase_ccls_t* ccls, ase_size_t depth,
|
||||||
|
void* code, int option,
|
||||||
const ase_char_t* str, ase_size_t len,
|
const ase_char_t* str, ase_size_t len,
|
||||||
const ase_char_t** match_ptr, ase_size_t* match_len, int* errnum)
|
const ase_char_t** match_ptr, ase_size_t* match_len, int* errnum)
|
||||||
{
|
{
|
||||||
@ -338,15 +344,16 @@ int ase_awk_matchrex (
|
|||||||
ase_size_t offset = 0;
|
ase_size_t offset = 0;
|
||||||
/*const ase_char_t* match_ptr_zero = ASE_NULL;*/
|
/*const ase_char_t* match_ptr_zero = ASE_NULL;*/
|
||||||
|
|
||||||
matcher.awk = awk;
|
matcher.mmgr = mmgr;
|
||||||
|
matcher.ccls = ccls;
|
||||||
|
|
||||||
/* store the source string */
|
/* store the source string */
|
||||||
matcher.match.str.ptr = str;
|
matcher.match.str.ptr = str;
|
||||||
matcher.match.str.end = str + len;
|
matcher.match.str.end = str + len;
|
||||||
|
|
||||||
matcher.depth.max = awk->rex.depth.max.match;
|
matcher.depth.max = depth;
|
||||||
matcher.depth.cur = 0;
|
matcher.depth.cur = 0;
|
||||||
matcher.ignorecase = (option & ASE_AWK_REX_IGNORECASE)? 1: 0;
|
matcher.ignorecase = (option & ASE_REX_IGNORECASE)? 1: 0;
|
||||||
|
|
||||||
mat.matched = ase_false;
|
mat.matched = ase_false;
|
||||||
/* TODO: should it allow an offset here??? */
|
/* TODO: should it allow an offset here??? */
|
||||||
@ -395,13 +402,13 @@ int ase_awk_matchrex (
|
|||||||
return (mat.matched)? 1: 0;
|
return (mat.matched)? 1: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ase_awk_freerex (ase_awk_t* awk, void* code)
|
void ase_freerex (ase_mmgr_t* mmgr, void* code)
|
||||||
{
|
{
|
||||||
ASE_ASSERT (code != ASE_NULL);
|
ASE_ASSERT (code != ASE_NULL);
|
||||||
ASE_AWK_FREE (awk, code);
|
ASE_FREE (mmgr, code);
|
||||||
}
|
}
|
||||||
|
|
||||||
ase_bool_t ase_awk_isemptyrex (ase_awk_t* awk, void* code)
|
ase_bool_t ase_isemptyrex (void* code)
|
||||||
{
|
{
|
||||||
rhdr_t* rhdr = (rhdr_t*) code;
|
rhdr_t* rhdr = (rhdr_t*) code;
|
||||||
ASE_ASSERT (rhdr != ASE_NULL);
|
ASE_ASSERT (rhdr != ASE_NULL);
|
||||||
@ -421,7 +428,7 @@ static int build_pattern (builder_t* builder)
|
|||||||
|
|
||||||
if (builder->depth.max > 0 && builder->depth.cur >= builder->depth.max)
|
if (builder->depth.max > 0 && builder->depth.cur >= builder->depth.max)
|
||||||
{
|
{
|
||||||
builder->errnum = ASE_AWK_EREXRECUR;
|
builder->errnum = ASE_REX_ERECUR;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -558,7 +565,7 @@ static int build_atom (builder_t* builder)
|
|||||||
if (builder->ptn.curc.type != CT_SPECIAL ||
|
if (builder->ptn.curc.type != CT_SPECIAL ||
|
||||||
builder->ptn.curc.value != ASE_T(')'))
|
builder->ptn.curc.value != ASE_T(')'))
|
||||||
{
|
{
|
||||||
builder->errnum = ASE_AWK_EREXRPAREN;
|
builder->errnum = ASE_REX_ERPAREN;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -608,7 +615,7 @@ static int build_atom (builder_t* builder)
|
|||||||
if (builder->ptn.curc.type != CT_SPECIAL ||
|
if (builder->ptn.curc.type != CT_SPECIAL ||
|
||||||
builder->ptn.curc.value != ASE_T(']'))
|
builder->ptn.curc.value != ASE_T(']'))
|
||||||
{
|
{
|
||||||
builder->errnum = ASE_AWK_EREXRBRACKET;
|
builder->errnum = ASE_REX_ERBRACKET;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -730,7 +737,7 @@ static int build_charset (builder_t* builder, code_t* cmd)
|
|||||||
ase_dprintf (
|
ase_dprintf (
|
||||||
ASE_T("build_charset: invalid character set range\n"));
|
ASE_T("build_charset: invalid character set range\n"));
|
||||||
#endif
|
#endif
|
||||||
builder->errnum = ASE_AWK_EREXCRANGE;
|
builder->errnum = ASE_REX_ECRANGE;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -761,7 +768,7 @@ static int build_cclass (builder_t* builder, ase_char_t* cc)
|
|||||||
#ifdef DEBUG_REX
|
#ifdef DEBUG_REX
|
||||||
ase_dprintf (ASE_T("build_cclass: wrong class name\n"));
|
ase_dprintf (ASE_T("build_cclass: wrong class name\n"));
|
||||||
#endif
|
#endif
|
||||||
builder->errnum = ASE_AWK_EREXCCLASS;
|
builder->errnum = ASE_REX_ECCLASS;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -774,7 +781,7 @@ static int build_cclass (builder_t* builder, ase_char_t* cc)
|
|||||||
#ifdef DEBUG_REX
|
#ifdef DEBUG_REX
|
||||||
ase_dprintf (ASE_T("build_cclass: a colon(:) expected\n"));
|
ase_dprintf (ASE_T("build_cclass: a colon(:) expected\n"));
|
||||||
#endif
|
#endif
|
||||||
builder->errnum = ASE_AWK_EREXCOLON;
|
builder->errnum = ASE_REX_ECOLON;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -787,7 +794,7 @@ static int build_cclass (builder_t* builder, ase_char_t* cc)
|
|||||||
#ifdef DEBUG_REX
|
#ifdef DEBUG_REX
|
||||||
ase_dprintf (ASE_T("build_cclass: ] expected\n"));
|
ase_dprintf (ASE_T("build_cclass: ] expected\n"));
|
||||||
#endif
|
#endif
|
||||||
builder->errnum = ASE_AWK_EREXRBRACKET;
|
builder->errnum = ASE_REX_ERBRACKET;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -836,7 +843,7 @@ static int build_occurrences (builder_t* builder, code_t* cmd)
|
|||||||
if (builder->ptn.curc.type != CT_SPECIAL ||
|
if (builder->ptn.curc.type != CT_SPECIAL ||
|
||||||
builder->ptn.curc.value != ASE_T('}'))
|
builder->ptn.curc.value != ASE_T('}'))
|
||||||
{
|
{
|
||||||
builder->errnum = ASE_AWK_EREXRBRACE;
|
builder->errnum = ASE_REX_ERBRACE;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -886,7 +893,7 @@ what if it is not in the raight format? convert it to ordinary characters?? */
|
|||||||
if (cmd->lbound > cmd->ubound)
|
if (cmd->lbound > cmd->ubound)
|
||||||
{
|
{
|
||||||
/* invalid occurrences range */
|
/* invalid occurrences range */
|
||||||
builder->errnum = ASE_AWK_EREXBRANGE;
|
builder->errnum = ASE_REX_EBRANGE;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -897,7 +904,7 @@ what if it is not in the raight format? convert it to ordinary characters?? */
|
|||||||
do { \
|
do { \
|
||||||
if (builder->ptn.curp >= builder->ptn.end) \
|
if (builder->ptn.curp >= builder->ptn.end) \
|
||||||
{ \
|
{ \
|
||||||
builder->errnum = ASE_AWK_EREXEND; \
|
builder->errnum = ASE_REX_EEND; \
|
||||||
return -1; \
|
return -1; \
|
||||||
} \
|
} \
|
||||||
} while(0)
|
} while(0)
|
||||||
@ -1079,29 +1086,29 @@ static int add_code (builder_t* builder, void* data, ase_size_t len)
|
|||||||
if (capa == 0) capa = DEF_CODE_CAPA;
|
if (capa == 0) capa = DEF_CODE_CAPA;
|
||||||
while (len > capa - builder->code.size) { capa = capa * 2; }
|
while (len > capa - builder->code.size) { capa = capa * 2; }
|
||||||
|
|
||||||
if (builder->awk->prmfns.mmgr.realloc != ASE_NULL)
|
if (builder->mmgr->realloc != ASE_NULL)
|
||||||
{
|
{
|
||||||
tmp = (ase_byte_t*) ASE_AWK_REALLOC (
|
tmp = (ase_byte_t*) ASE_REALLOC (
|
||||||
builder->awk, builder->code.buf, capa);
|
builder->mmgr, builder->code.buf, capa);
|
||||||
if (tmp == ASE_NULL)
|
if (tmp == ASE_NULL)
|
||||||
{
|
{
|
||||||
builder->errnum = ASE_AWK_ENOMEM;
|
builder->errnum = ASE_REX_ENOMEM;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
tmp = (ase_byte_t*) ASE_AWK_MALLOC (builder->awk, capa);
|
tmp = (ase_byte_t*) ASE_MALLOC (builder->mmgr, capa);
|
||||||
if (tmp == ASE_NULL)
|
if (tmp == ASE_NULL)
|
||||||
{
|
{
|
||||||
builder->errnum = ASE_AWK_ENOMEM;
|
builder->errnum = ASE_REX_ENOMEM;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (builder->code.buf != ASE_NULL)
|
if (builder->code.buf != ASE_NULL)
|
||||||
{
|
{
|
||||||
ase_memcpy (tmp, builder->code.buf, builder->code.capa);
|
ase_memcpy (tmp, builder->code.buf, builder->code.capa);
|
||||||
ASE_AWK_FREE (builder->awk, builder->code.buf);
|
ASE_FREE (builder->mmgr, builder->code.buf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1193,7 +1200,7 @@ static const ase_byte_t* match_branch_body (
|
|||||||
|
|
||||||
if (matcher->depth.max > 0 && matcher->depth.cur >= matcher->depth.max)
|
if (matcher->depth.max > 0 && matcher->depth.cur >= matcher->depth.max)
|
||||||
{
|
{
|
||||||
matcher->errnum = ASE_AWK_EREXRECUR;
|
matcher->errnum = ASE_REX_ERECUR;
|
||||||
return ASE_NULL;
|
return ASE_NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1372,7 +1379,7 @@ static const ase_byte_t* match_ord_char (
|
|||||||
ubound = cp->ubound;
|
ubound = cp->ubound;
|
||||||
|
|
||||||
cc = *(ase_char_t*)p; p += ASE_SIZEOF(cc);
|
cc = *(ase_char_t*)p; p += ASE_SIZEOF(cc);
|
||||||
if (matcher->ignorecase) cc = ASE_AWK_TOUPPER(matcher->awk, cc);
|
if (matcher->ignorecase) cc = ASE_TOUPPER(matcher->ccls, cc);
|
||||||
|
|
||||||
/* merge the same consecutive codes
|
/* merge the same consecutive codes
|
||||||
* for example, a{1,10}a{0,10} is shortened to a{1,20} */
|
* for example, a{1,10}a{0,10} is shortened to a{1,20} */
|
||||||
@ -1381,7 +1388,7 @@ static const ase_byte_t* match_ord_char (
|
|||||||
while (p < mat->branch_end &&
|
while (p < mat->branch_end &&
|
||||||
cp->cmd == ((const code_t*)p)->cmd)
|
cp->cmd == ((const code_t*)p)->cmd)
|
||||||
{
|
{
|
||||||
if (ASE_AWK_TOUPPER (matcher->awk, *(ase_char_t*)(p+ASE_SIZEOF(*cp))) != cc) break;
|
if (ASE_TOUPPER (matcher->ccls, *(ase_char_t*)(p+ASE_SIZEOF(*cp))) != cc) break;
|
||||||
|
|
||||||
lbound += ((const code_t*)p)->lbound;
|
lbound += ((const code_t*)p)->lbound;
|
||||||
ubound += ((const code_t*)p)->ubound;
|
ubound += ((const code_t*)p)->ubound;
|
||||||
@ -1423,7 +1430,7 @@ static const ase_byte_t* match_ord_char (
|
|||||||
ASE_T("match_ord_char: <ignorecase> %c %c\n"),
|
ASE_T("match_ord_char: <ignorecase> %c %c\n"),
|
||||||
cc, mat->match_ptr[si]);
|
cc, mat->match_ptr[si]);
|
||||||
#endif
|
#endif
|
||||||
if (cc != ASE_AWK_TOUPPER (matcher->awk, mat->match_ptr[si])) break;
|
if (cc != ASE_TOUPPER (matcher->ccls, mat->match_ptr[si])) break;
|
||||||
si++;
|
si++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1486,7 +1493,7 @@ static const ase_byte_t* match_charset (
|
|||||||
if (&mat->match_ptr[si] >= matcher->match.str.end) break;
|
if (&mat->match_ptr[si] >= matcher->match.str.end) break;
|
||||||
|
|
||||||
c = mat->match_ptr[si];
|
c = mat->match_ptr[si];
|
||||||
if (matcher->ignorecase) c = ASE_AWK_TOUPPER(matcher->awk, c);
|
if (matcher->ignorecase) c = ASE_TOUPPER(matcher->ccls, c);
|
||||||
|
|
||||||
n = __test_charset (matcher, p, cshdr->csc, c);
|
n = __test_charset (matcher, p, cshdr->csc, c);
|
||||||
if (cp->negate) n = !n;
|
if (cp->negate) n = !n;
|
||||||
@ -1555,11 +1562,11 @@ static const ase_byte_t* match_group (
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
grp_len = (ase_size_t*) ASE_AWK_MALLOC (
|
grp_len = (ase_size_t*) ASE_MALLOC (
|
||||||
matcher->awk, ASE_SIZEOF(ase_size_t) * cp->ubound);
|
matcher->mmgr, ASE_SIZEOF(ase_size_t) * cp->ubound);
|
||||||
if (grp_len == ASE_NULL)
|
if (grp_len == ASE_NULL)
|
||||||
{
|
{
|
||||||
matcher->errnum = ASE_AWK_ENOMEM;
|
matcher->errnum = ASE_REX_ENOMEM;
|
||||||
return ASE_NULL;
|
return ASE_NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1574,7 +1581,7 @@ static const ase_byte_t* match_group (
|
|||||||
if (match_pattern (matcher, p, &mat2) == ASE_NULL)
|
if (match_pattern (matcher, p, &mat2) == ASE_NULL)
|
||||||
{
|
{
|
||||||
if (grp_len != grp_len_static)
|
if (grp_len != grp_len_static)
|
||||||
ASE_AWK_FREE (matcher->awk, grp_len);
|
ASE_FREE (matcher->mmgr, grp_len);
|
||||||
return ASE_NULL;
|
return ASE_NULL;
|
||||||
}
|
}
|
||||||
if (!mat2.matched) break;
|
if (!mat2.matched) break;
|
||||||
@ -1620,7 +1627,7 @@ static const ase_byte_t* match_group (
|
|||||||
if (tmp == ASE_NULL)
|
if (tmp == ASE_NULL)
|
||||||
{
|
{
|
||||||
if (grp_len != grp_len_static)
|
if (grp_len != grp_len_static)
|
||||||
ASE_AWK_FREE (matcher->awk, grp_len);
|
ASE_FREE (matcher->mmgr, grp_len);
|
||||||
return ASE_NULL;
|
return ASE_NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1640,7 +1647,7 @@ static const ase_byte_t* match_group (
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (grp_len != grp_len_static) ASE_AWK_FREE (matcher->awk, grp_len);
|
if (grp_len != grp_len_static) ASE_FREE (matcher->mmgr, grp_len);
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1751,7 +1758,7 @@ static ase_bool_t __test_charset (
|
|||||||
{
|
{
|
||||||
c1 = *(const ase_char_t*)p;
|
c1 = *(const ase_char_t*)p;
|
||||||
if (matcher->ignorecase)
|
if (matcher->ignorecase)
|
||||||
c1 = ASE_AWK_TOUPPER(matcher->awk, c1);
|
c1 = ASE_TOUPPER(matcher->ccls, c1);
|
||||||
#ifdef DEBUG_REX
|
#ifdef DEBUG_REX
|
||||||
ase_dprintf (
|
ase_dprintf (
|
||||||
ASE_T("match_charset: <one> %c %c\n"), c, c1);
|
ASE_T("match_charset: <one> %c %c\n"), c, c1);
|
||||||
@ -1766,8 +1773,8 @@ static ase_bool_t __test_charset (
|
|||||||
|
|
||||||
if (matcher->ignorecase)
|
if (matcher->ignorecase)
|
||||||
{
|
{
|
||||||
c1 = ASE_AWK_TOUPPER(matcher->awk, c1);
|
c1 = ASE_TOUPPER(matcher->ccls, c1);
|
||||||
c2 = ASE_AWK_TOUPPER(matcher->awk, c2);
|
c2 = ASE_TOUPPER(matcher->ccls, c2);
|
||||||
}
|
}
|
||||||
#ifdef DEBUG_REX
|
#ifdef DEBUG_REX
|
||||||
ase_dprintf (
|
ase_dprintf (
|
||||||
@ -1784,7 +1791,7 @@ static ase_bool_t __test_charset (
|
|||||||
c, __char_class[c1].name);
|
c, __char_class[c1].name);
|
||||||
#endif
|
#endif
|
||||||
if (__char_class[c1].func (
|
if (__char_class[c1].func (
|
||||||
matcher->awk, c)) return ase_true;
|
matcher->ccls, c)) return ase_true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1798,66 +1805,67 @@ static ase_bool_t __test_charset (
|
|||||||
return ase_false;
|
return ase_false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ase_bool_t cc_isalnum (ase_awk_t* awk, ase_char_t c)
|
static ase_bool_t cc_isalnum (ase_ccls_t* ccls, ase_char_t c)
|
||||||
{
|
{
|
||||||
return ASE_AWK_ISALNUM (awk, c);
|
return ASE_ISALNUM (ccls, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ase_bool_t cc_isalpha (ase_awk_t* awk, ase_char_t c)
|
static ase_bool_t cc_isalpha (ase_ccls_t* ccls, ase_char_t c)
|
||||||
{
|
{
|
||||||
return ASE_AWK_ISALPHA (awk, c);
|
return ASE_ISALPHA (ccls, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ase_bool_t cc_isblank (ase_awk_t* awk, ase_char_t c)
|
static ase_bool_t cc_isblank (ase_ccls_t* ccls, ase_char_t c)
|
||||||
{
|
{
|
||||||
return c == ASE_T(' ') || c == ASE_T('\t');
|
return c == ASE_T(' ') || c == ASE_T('\t');
|
||||||
}
|
}
|
||||||
|
|
||||||
static ase_bool_t cc_iscntrl (ase_awk_t* awk, ase_char_t c)
|
static ase_bool_t cc_iscntrl (ase_ccls_t* ccls, ase_char_t c)
|
||||||
{
|
{
|
||||||
return ASE_AWK_ISCNTRL (awk, c);
|
return ASE_ISCNTRL (ccls, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ase_bool_t cc_isdigit (ase_awk_t* awk, ase_char_t c)
|
static ase_bool_t cc_isdigit (ase_ccls_t* ccls, ase_char_t c)
|
||||||
{
|
{
|
||||||
return ASE_AWK_ISDIGIT (awk, c);
|
return ASE_ISDIGIT (ccls, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ase_bool_t cc_isgraph (ase_awk_t* awk, ase_char_t c)
|
static ase_bool_t cc_isgraph (ase_ccls_t* ccls, ase_char_t c)
|
||||||
{
|
{
|
||||||
return ASE_AWK_ISGRAPH (awk, c);
|
return ASE_ISGRAPH (ccls, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ase_bool_t cc_islower (ase_awk_t* awk, ase_char_t c)
|
static ase_bool_t cc_islower (ase_ccls_t* ccls, ase_char_t c)
|
||||||
{
|
{
|
||||||
return ASE_AWK_ISLOWER (awk, c);
|
return ASE_ISLOWER (ccls, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ase_bool_t cc_isprint (ase_awk_t* awk, ase_char_t c)
|
static ase_bool_t cc_isprint (ase_ccls_t* ccls, ase_char_t c)
|
||||||
{
|
{
|
||||||
return ASE_AWK_ISPRINT (awk, c);
|
return ASE_ISPRINT (ccls, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ase_bool_t cc_ispunct (ase_awk_t* awk, ase_char_t c)
|
static ase_bool_t cc_ispunct (ase_ccls_t* ccls, ase_char_t c)
|
||||||
{
|
{
|
||||||
return ASE_AWK_ISPUNCT (awk, c);
|
return ASE_ISPUNCT (ccls, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ase_bool_t cc_isspace (ase_awk_t* awk, ase_char_t c)
|
static ase_bool_t cc_isspace (ase_ccls_t* ccls, ase_char_t c)
|
||||||
{
|
{
|
||||||
return ASE_AWK_ISSPACE (awk, c);
|
return ASE_ISSPACE (ccls, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ase_bool_t cc_isupper (ase_awk_t* awk, ase_char_t c)
|
static ase_bool_t cc_isupper (ase_ccls_t* ccls, ase_char_t c)
|
||||||
{
|
{
|
||||||
return ASE_AWK_ISUPPER (awk, c);
|
return ASE_ISUPPER (ccls, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ase_bool_t cc_isxdigit (ase_awk_t* awk, ase_char_t c)
|
static ase_bool_t cc_isxdigit (ase_ccls_t* ccls, ase_char_t c)
|
||||||
{
|
{
|
||||||
return ASE_AWK_ISXDIGIT (awk, c);
|
return ASE_ISXDIGIT (ccls, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
#define DPRINTF awk->prmfns.misc.dprintf
|
#define DPRINTF awk->prmfns.misc.dprintf
|
||||||
#define DCUSTOM awk->prmfns.misc.custom_data
|
#define DCUSTOM awk->prmfns.misc.custom_data
|
||||||
|
|
||||||
@ -1995,3 +2003,4 @@ static const ase_byte_t* __print_atom (ase_awk_t* awk, const ase_byte_t* p)
|
|||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -4,12 +4,11 @@
|
|||||||
* {License}
|
* {License}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _ASE_AWK_REX_H_
|
#ifndef _ASE_CMN_REX_H_
|
||||||
#define _ASE_AWK_REX_H_
|
#define _ASE_CMN_REX_H_
|
||||||
|
|
||||||
#ifndef _ASE_AWK_AWK_H_
|
#include <ase/cmn/types.h>
|
||||||
#error Never include this file directly. Include <ase/awk/awk.h> instead
|
#include <ase/cmn/macros.h>
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Regular Esseression Syntax
|
* Regular Esseression Syntax
|
||||||
@ -39,34 +38,54 @@
|
|||||||
* ab|xy -> |2|10|4|ORD_CHAR(no bound)|a|ORD_CHAR(no bound)|b|4|ORD_CHAR(no bound)|x|ORD_CHAR(no bound)|y|
|
* ab|xy -> |2|10|4|ORD_CHAR(no bound)|a|ORD_CHAR(no bound)|b|4|ORD_CHAR(no bound)|x|ORD_CHAR(no bound)|y|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define ASE_AWK_REX_NA(code) (*(ase_size_t*)(code))
|
#define ASE_REX_NA(code) (*(ase_size_t*)(code))
|
||||||
|
|
||||||
#define ASE_AWK_REX_LEN(code) \
|
#define ASE_REX_LEN(code) \
|
||||||
(*(ase_size_t*)((ase_byte_t*)(code)+ASE_SIZEOF(ase_size_t)))
|
(*(ase_size_t*)((ase_byte_t*)(code)+ASE_SIZEOF(ase_size_t)))
|
||||||
|
|
||||||
enum ase_awk_rex_opt_t
|
enum ase_rex_option_t
|
||||||
{
|
{
|
||||||
ASE_AWK_REX_IGNORECASE = (1 << 0)
|
ASE_REX_IGNORECASE = (1 << 0)
|
||||||
|
};
|
||||||
|
|
||||||
|
enum ase_rex_errnum_t
|
||||||
|
{
|
||||||
|
ASE_REX_ENOERR = 0,
|
||||||
|
ASE_REX_ENOMEM,
|
||||||
|
ASE_REX_ERECUR, /* recursion too deep */
|
||||||
|
ASE_REX_ERPAREN, /* a right parenthesis is expected */
|
||||||
|
ASE_REX_ERBRACKET, /* a right bracket is expected */
|
||||||
|
ASE_REX_ERBRACE, /* a right brace is expected */
|
||||||
|
ASE_REX_EUNBALPAR, /* unbalanced parenthesis */
|
||||||
|
ASE_REX_ECOLON, /* a colon is expected */
|
||||||
|
ASE_REX_ECRANGE, /* invalid character range */
|
||||||
|
ASE_REX_ECCLASS, /* invalid character class */
|
||||||
|
ASE_REX_EBRANGE, /* invalid boundary range */
|
||||||
|
ASE_REX_EEND, /* unexpected end of the pattern */
|
||||||
|
ASE_REX_EGARBAGE /* garbage after the pattern */
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void* ase_awk_buildrex (
|
void* ase_buildrex (
|
||||||
ase_awk_t* awk, const ase_char_t* ptn,
|
ase_mmgr_t* mmgr, ase_size_t depth,
|
||||||
ase_size_t len, int* errnum);
|
const ase_char_t* ptn, ase_size_t len, int* errnum);
|
||||||
|
|
||||||
int ase_awk_matchrex (
|
int ase_matchrex (
|
||||||
ase_awk_t* awk, void* code, int option,
|
ase_mmgr_t* mmgr, ase_ccls_t* ccls, ase_size_t depth,
|
||||||
|
void* code, int option,
|
||||||
const ase_char_t* str, ase_size_t len,
|
const ase_char_t* str, ase_size_t len,
|
||||||
const ase_char_t** match_ptr, ase_size_t* match_len, int* errnum);
|
const ase_char_t** match_ptr, ase_size_t* match_len, int* errnum);
|
||||||
|
|
||||||
void ase_awk_freerex (ase_awk_t* awk, void* code);
|
void ase_freerex (ase_mmgr_t* mmgr, void* code);
|
||||||
|
|
||||||
ase_bool_t ase_awk_isemptyrex (ase_awk_t* awk, void* code);
|
ase_bool_t ase_isemptyrex (void* code);
|
||||||
|
|
||||||
void ase_awk_dprintrex (ase_awk_t* awk, void* rex);
|
#if 0
|
||||||
|
void ase_dprintrex (ase_rex_t* rex, void* rex);
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user