From 5fef89a690ea8f1da0eb8a1e8a7671e822600a4f Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Tue, 5 Sep 2006 04:11:11 +0000 Subject: [PATCH] *** empty log message *** --- ase/awk/awk.h | 7 ++- ase/awk/awk_i.h | 10 +++- ase/awk/extio.c | 6 +-- ase/awk/func.c | 47 ++++++++++++++++-- ase/awk/misc.c | 42 +++++++++++++++-- ase/awk/run.c | 110 +++++++++++++++++++++++++++++++++---------- ase/test/awk/t38.awk | 5 +- ase/test/awk/t39.awk | 6 +++ 8 files changed, 194 insertions(+), 39 deletions(-) create mode 100644 ase/test/awk/t39.awk diff --git a/ase/awk/awk.h b/ase/awk/awk.h index 1e0e5a05..bdf17fc9 100644 --- a/ase/awk/awk.h +++ b/ase/awk/awk.h @@ -1,5 +1,5 @@ /* - * $Id: awk.h,v 1.108 2006-09-03 15:46:49 bacon Exp $ + * $Id: awk.h,v 1.109 2006-09-05 04:10:23 bacon Exp $ */ #ifndef _XP_AWK_AWK_H_ @@ -382,6 +382,11 @@ xp_char_t* xp_awk_strxntok ( const xp_char_t* delim, xp_size_t delim_len, xp_char_t** tok, xp_size_t* tok_len); +xp_char_t* xp_awk_strxntokbyrex ( + xp_awk_t* awk, const xp_char_t* s, xp_size_t len, + void* rex, xp_char_t** tok, xp_size_t* tok_len, int* errnum); + + int xp_awk_printf (xp_awk_t* awk, const xp_char_t* fmt, ...); int xp_awk_sprintf ( diff --git a/ase/awk/awk_i.h b/ase/awk/awk_i.h index 0e6df148..f7d4d7d2 100644 --- a/ase/awk/awk_i.h +++ b/ase/awk/awk_i.h @@ -1,5 +1,5 @@ /* - * $Id: awk_i.h,v 1.54 2006-09-01 07:18:39 bacon Exp $ + * $Id: awk_i.h,v 1.55 2006-09-05 04:10:23 bacon Exp $ */ #ifndef _XP_AWK_AWKI_H_ @@ -240,14 +240,20 @@ struct xp_awk_run_t xp_size_t len; xp_awk_val_t* val; /* $1 .. $NF */ }* flds; + } inrec; + struct + { + void* rs; + void* fs; + } rex; + /* extio chain */ struct { xp_awk_io_t handler[XP_AWK_EXTIO_NUM]; xp_awk_extio_t* chain; - void* rs_rex; } extio; int errnum; diff --git a/ase/awk/extio.c b/ase/awk/extio.c index 71d2246f..6e02c13d 100644 --- a/ase/awk/extio.c +++ b/ase/awk/extio.c @@ -1,5 +1,5 @@ /* - * $Id: extio.c,v 1.45 2006-09-01 07:18:39 bacon Exp $ + * $Id: extio.c,v 1.46 2006-09-05 04:10:23 bacon Exp $ */ #include @@ -287,11 +287,11 @@ int xp_awk_readextio ( const xp_char_t* match_ptr; xp_size_t match_len; - xp_assert (run->extio.rs_rex != XP_NULL); + xp_assert (run->rex.rs != XP_NULL); /* TODO: safematchrex */ n = xp_awk_matchrex ( - run->awk, run->extio.rs_rex, + run->awk, run->rex.rs, XP_AWK_STR_BUF(buf), XP_AWK_STR_LEN(buf), &match_ptr, &match_len, &run->errnum); if (n == -1) diff --git a/ase/awk/func.c b/ase/awk/func.c index 4456caf3..6dba3874 100644 --- a/ase/awk/func.c +++ b/ase/awk/func.c @@ -1,5 +1,5 @@ /* - * $Id: func.c,v 1.45 2006-09-03 15:46:49 bacon Exp $ + * $Id: func.c,v 1.46 2006-09-05 04:10:24 bacon Exp $ */ #include @@ -507,6 +507,9 @@ static int __bfn_split (xp_awk_t* awk, void* run) xp_size_t key_len; xp_char_t* fs_ptr, * fs_free; xp_size_t fs_len; + void* fs_rex = XP_NULL; + void* fs_rex_free = XP_NULL; + int errnum; nargs = xp_awk_getnargs (run); xp_assert (nargs >= 2 && nargs <= 3); @@ -575,6 +578,12 @@ static int __bfn_split (xp_awk_t* awk, void* run) } fs_free = fs_ptr; } + + if (fs_len > 1) + { + fs_rex = ((xp_awk_run_t*)run)->rex.fs; + fs_rex_free = XP_NULL; + } } else { @@ -596,6 +605,21 @@ static int __bfn_split (xp_awk_t* awk, void* run) } fs_free = fs_ptr; } + + if (fs_len > 1) + { + fs_rex = xp_awk_buildrex (awk, fs_ptr, fs_len, &errnum); + if (fs_rex == XP_NULL) + { + if (str_free != XP_NULL) + XP_AWK_FREE (awk, str_free); + if (fs_free != XP_NULL) + XP_AWK_FREE (awk, fs_free); + xp_awk_seterrnum (run, errnum); + return -1; + } + fs_rex_free = fs_rex; + } } t1 = xp_awk_makemapval (run); @@ -603,6 +627,7 @@ static int __bfn_split (xp_awk_t* awk, void* run) { if (str_free != XP_NULL) XP_AWK_FREE (awk, str_free); if (fs_free != XP_NULL) XP_AWK_FREE (awk, fs_free); + if (fs_rex_free != XP_NULL) xp_awk_freerex (awk, fs_rex_free); xp_awk_seterrnum (run, XP_AWK_ENOMEM); return -1; } @@ -621,8 +646,19 @@ static int __bfn_split (xp_awk_t* awk, void* run) } else { - /* TODO: FS regular expression */ - xp_printf (XP_T("MULTI-CHARACTER FS NOT READY IN SPLIT\n")); + p = xp_awk_strxntokbyrex (awk, p, str_len, + fs_rex, &tok, &tok_len, &errnum); + if (p == XP_NULL && errnum != XP_AWK_ENOERR) + { + if (str_free != XP_NULL) + XP_AWK_FREE (awk, str_free); + if (fs_free != XP_NULL) + XP_AWK_FREE (awk, fs_free); + if (fs_rex_free != XP_NULL) + xp_awk_freerex (awk, fs_rex_free); + xp_awk_seterrnum (run, errnum); + return -1; + } } if (num == 0 && p == XP_NULL && tok_len == 0) @@ -639,6 +675,7 @@ static int __bfn_split (xp_awk_t* awk, void* run) { if (str_free != XP_NULL) XP_AWK_FREE (awk, str_free); if (fs_free != XP_NULL) XP_AWK_FREE (awk, fs_free); + if (fs_rex_free != XP_NULL) xp_awk_freerex (awk, fs_rex_free); xp_awk_seterrnum (run, XP_AWK_ENOMEM); return -1; } @@ -654,6 +691,7 @@ static int __bfn_split (xp_awk_t* awk, void* run) { if (str_free != XP_NULL) XP_AWK_FREE (awk, str_free); if (fs_free != XP_NULL) XP_AWK_FREE (awk, fs_free); + if (fs_rex_free != XP_NULL) xp_awk_freerex (awk, fs_rex_free); xp_awk_seterrnum (run, XP_AWK_ENOMEM); return -1; } @@ -664,11 +702,12 @@ static int __bfn_split (xp_awk_t* awk, void* run) xp_awk_refupval (t2); num++; - str_len = str_left - (p - str + 1); + str_len = str_left - (p - str); } if (str_free != XP_NULL) XP_AWK_FREE (awk, str_free); if (fs_free != XP_NULL) XP_AWK_FREE (awk, fs_free); + if (fs_rex_free != XP_NULL) xp_awk_freerex (awk, fs_rex_free); t1 = xp_awk_makeintval (run, num); if (t1 == XP_NULL) diff --git a/ase/awk/misc.c b/ase/awk/misc.c index 62552380..8ae8a0c1 100644 --- a/ase/awk/misc.c +++ b/ase/awk/misc.c @@ -1,5 +1,5 @@ /* - * $Id: misc.c,v 1.13 2006-09-03 15:46:49 bacon Exp $ + * $Id: misc.c,v 1.14 2006-09-05 04:10:24 bacon Exp $ */ #include @@ -594,9 +594,12 @@ xp_char_t* xp_awk_strxntok ( if (sp == XP_NULL) sp = p; ep = p++; } + while (p < end && XP_AWK_ISSPACE(awk,*p)) p++; } else if (delim_mode == __DELIM_NOSPACES) { + /* each token is delimited by one of charaters + * in the delimeter set "delim". */ while (p < end) { c = *p; @@ -610,6 +613,9 @@ xp_char_t* xp_awk_strxntok ( } else /* if (delim_mode == __DELIM_COMPOSITE) */ { + /* each token is delimited by one of non-space charaters + * in the delimeter set "delim". however, all space characters + * surrounding the token are removed */ while (p < end && XP_AWK_ISSPACE(awk,*p)) p++; while (p < end) { @@ -641,8 +647,38 @@ exit_loop: } /* if XP_NULL is returned, this function should not be called anymore */ - return (p >= end)? XP_NULL: - (delim_mode == __DELIM_EMPTY)? p: ((xp_char_t*)++p); + if (p >= end) return XP_NULL; + if (delim_mode == __DELIM_EMPTY || + delim_mode == __DELIM_SPACES) return (xp_char_t*)p; + return (xp_char_t*)++p; +} + +xp_char_t* xp_awk_strxntokbyrex ( + xp_awk_t* awk, const xp_char_t* s, xp_size_t len, + void* rex, xp_char_t** tok, xp_size_t* tok_len, int* errnum) +{ + int n; + xp_char_t* match_ptr; + xp_size_t match_len; + + n = xp_awk_matchrex (awk, rex, s, len, &match_ptr, &match_len, errnum); + if (n == -1) return XP_NULL; + if (n == 0) + { + /* no match has been found. + * return the entire string as a token */ + *tok = (xp_char_t*)s; + *tok_len = len; + *errnum = XP_AWK_ENOERR; + return XP_NULL; + } + + assert (n == 1); + *tok = (xp_char_t*)s; + *tok_len = match_ptr - s; + + *errnum = XP_AWK_ENOERR; + return (match_ptr+match_len >= s+len)? XP_NULL: (match_ptr+match_len); } int xp_awk_printf (xp_awk_t* awk, const xp_char_t* fmt, ...) diff --git a/ase/awk/run.c b/ase/awk/run.c index 81101b9b..0a00312a 100644 --- a/ase/awk/run.c +++ b/ase/awk/run.c @@ -1,5 +1,5 @@ /* - * $Id: run.c,v 1.196 2006-09-03 15:46:49 bacon Exp $ + * $Id: run.c,v 1.197 2006-09-05 04:10:24 bacon Exp $ */ #include @@ -234,7 +234,7 @@ int xp_awk_setglobal (void* run, xp_size_t idx, xp_awk_val_t* val) PANIC_I (r, XP_AWK_ESCALARTOMAP); } - if (idx == XP_AWK_GLOBAL_RS) + if (idx == XP_AWK_GLOBAL_RS) { xp_char_t* rs_ptr; xp_size_t rs_len; @@ -271,18 +271,68 @@ int xp_awk_setglobal (void* run, xp_size_t idx, xp_awk_val_t* val) return -1; } - if (r->extio.rs_rex != XP_NULL) + if (r->rex.rs != XP_NULL) { xp_awk_freerex ( ((xp_awk_run_t*)run)->awk, - r->extio.rs_rex); + r->rex.rs); } - r->extio.rs_rex = rex; + r->rex.rs = rex; } if (val->type != XP_AWK_VAL_STR) XP_AWK_FREE (((xp_awk_run_t*)run)->awk, rs_ptr); } + else if (idx == XP_AWK_GLOBAL_FS) + { + xp_char_t* fs_ptr; + xp_size_t fs_len; + + if (val->type == XP_AWK_VAL_STR) + { + fs_ptr = ((xp_awk_val_str_t*)val)->buf; + fs_len = ((xp_awk_val_str_t*)val)->len; + } + else + { + /* due to the expression evaluation rule, the + * regular expression can not be an assigned value */ + xp_assert (val->type != XP_AWK_VAL_REX); + + fs_ptr = xp_awk_valtostr ( + run, val, xp_true, XP_NULL, &fs_len); + if (fs_ptr == XP_NULL) return -1; + } + + if (fs_len > 1) + { + void* rex; + + /* compile the regular expression */ + /* TODO: use safebuild */ + rex = xp_awk_buildrex ( + ((xp_awk_run_t*)run)->awk, + fs_ptr, fs_len, &r->errnum); + if (rex == XP_NULL) + { + if (val->type != XP_AWK_VAL_STR) + XP_AWK_FREE (((xp_awk_run_t*)run)->awk, fs_ptr); + return -1; + } + + if (r->rex.fs != XP_NULL) + { + xp_awk_freerex ( + ((xp_awk_run_t*)run)->awk, + r->rex.fs); + } + r->rex.fs = rex; + } + + if (val->type != XP_AWK_VAL_STR) + XP_AWK_FREE (((xp_awk_run_t*)run)->awk, fs_ptr); + } + /* TODO: if idx == XP_AWK_GLOBAL_NF recompute $0, etc */ @@ -534,7 +584,9 @@ static int __init_run ( run->extio.handler[XP_AWK_EXTIO_FILE] = runios->file; run->extio.handler[XP_AWK_EXTIO_CONSOLE] = runios->console; run->extio.chain = XP_NULL; - run->extio.rs_rex = XP_NULL; + + run->rex.rs = XP_NULL; + run->rex.fs = XP_NULL; return 0; } @@ -547,10 +599,16 @@ static void __deinit_run (xp_awk_run_t* run) /* TODO: what if this operation fails? */ xp_awk_clearextio (run); xp_assert (run->extio.chain == XP_NULL); - if (run->extio.rs_rex != XP_NULL) + + if (run->rex.rs != XP_NULL) { - XP_AWK_FREE (run->awk, run->extio.rs_rex); - run->extio.rs_rex = XP_NULL; + XP_AWK_FREE (run->awk, run->rex.rs); + run->rex.rs = XP_NULL; + } + if (run->rex.fs != XP_NULL) + { + XP_AWK_FREE (run->awk, run->rex.fs); + run->rex.fs = XP_NULL; } /* destroy input record. __clear_record should be called @@ -4680,6 +4738,7 @@ static int __split_record (xp_awk_run_t* run) xp_awk_val_t* v, * fs; xp_char_t* fs_ptr, * fs_free; xp_size_t fs_len; + int errnum; /* inrec should be cleared before __split_record is called */ xp_assert (run->inrec.nflds == 0); @@ -4720,11 +4779,14 @@ static int __split_record (xp_awk_run_t* run) } else { - /* TODO: FS regular expression */ - run->errnum = XP_AWK_EINTERNAL; - if (fs_free != XP_NULL) XP_AWK_FREE (run->awk, fs_free); - xp_printf (XP_T("MULTI-CHARACTER FS NOT READY..\n")); - return -1; + p = xp_awk_strxntokbyrex (run->awk, p, len, + run->rex.fs, &tok, &tok_len, &errnum); + if (p == XP_NULL && errnum != XP_AWK_ENOERR) + { + if (fs_free != XP_NULL) + XP_AWK_FREE (run->awk, fs_free); + PANIC_I (run, errnum); + } } if (nflds == 0 && p == XP_NULL && tok_len == 0) @@ -4765,23 +4827,21 @@ static int __split_record (xp_awk_run_t* run) while (p != XP_NULL) { - if (fs->type == XP_AWK_VAL_NIL) - { - p = xp_awk_strxntok (run->awk, - p, len, XP_T(" "), 1, &tok, &tok_len); - } - else if (fs_len <= 1) + if (fs_len <= 1) { p = xp_awk_strxntok (run->awk, p, len, fs_ptr, fs_len, &tok, &tok_len); } else { - /* TODO: FS regular expression */ - run->errnum = XP_AWK_EINTERNAL; - if (fs_free != XP_NULL) XP_AWK_FREE (run->awk, fs_free); - xp_printf (XP_T("MULTI-CHARACTER FS NOT READY..\n")); - return -1; + p = xp_awk_strxntokbyrex (run->awk, p, len, + run->rex.fs, &tok, &tok_len, &errnum); + if (p == XP_NULL && errnum != XP_AWK_ENOERR) + { + if (fs_free != XP_NULL) + XP_AWK_FREE (run->awk, fs_free); + PANIC_I (run, errnum); + } } xp_assert ((tok != XP_NULL && tok_len > 0) || tok_len == 0); diff --git a/ase/test/awk/t38.awk b/ase/test/awk/t38.awk index beb2e7ca..78ea5c19 100644 --- a/ase/test/awk/t38.awk +++ b/ase/test/awk/t38.awk @@ -1,6 +1,9 @@ BEGIN { - split ("a b c d e", x, ""); + split (" a b c d e ", x, ""); + for (j in x) print j "->" x[j]; + print "-------------------"; + split ("a b c d e", x, "b c"); for (j in x) print j "->" x[j]; print "-------------------"; } diff --git a/ase/test/awk/t39.awk b/ase/test/awk/t39.awk new file mode 100644 index 00000000..084544aa --- /dev/null +++ b/ase/test/awk/t39.awk @@ -0,0 +1,6 @@ +BEGIN { FS="a+"; } +{ + print "NF=" NF; + for (i = 0; i < NF; i++) print i " [" $(i+1) "]"; +} +