diff --git a/ase/awk/misc.c b/ase/awk/misc.c index 8ae8a0c1..068f34cf 100644 --- a/ase/awk/misc.c +++ b/ase/awk/misc.c @@ -1,5 +1,5 @@ /* - * $Id: misc.c,v 1.14 2006-09-05 04:10:24 bacon Exp $ + * $Id: misc.c,v 1.15 2006-09-05 15:18:15 bacon Exp $ */ #include @@ -659,9 +659,14 @@ xp_char_t* xp_awk_strxntokbyrex ( { int n; xp_char_t* match_ptr; - xp_size_t match_len; + xp_size_t match_len, i; + const xp_char_t* p = s; + xp_size_t left = len; - n = xp_awk_matchrex (awk, rex, s, len, &match_ptr, &match_len, errnum); +// TODO:... +while (len > 0) +{ + n = xp_awk_matchrex (awk, rex, p, left, &match_ptr, &match_len, errnum); if (n == -1) return XP_NULL; if (n == 0) { @@ -672,11 +677,46 @@ xp_char_t* xp_awk_strxntokbyrex ( *errnum = XP_AWK_ENOERR; return XP_NULL; } - + assert (n == 1); + + if (match_len == 0) + { + p++; + left--; + } + else break; +} + +if (len == 0) +{ + *tok = (xp_char_t*)s; + *tok_len = len; + *errnum = XP_AWK_ENOERR; + return XP_NULL; +} + +#if 0 +//xp_printf (XP_T("%d [%s]\n"), match_len, match_ptr); + if (match_len == 0 && s == match_ptr && len > 0) + { +//xp_printf (XP_T("%d [%s]\n"), match_len, match_ptr); + match_ptr++; + } +#endif + *tok = (xp_char_t*)s; *tok_len = match_ptr - s; + for (i = 0; i < match_len; i++) + { + if (!XP_AWK_ISSPACE(awk, match_ptr[i])) + { + *errnum = XP_AWK_ENOERR; + return match_ptr+match_len; + } + } + *errnum = XP_AWK_ENOERR; return (match_ptr+match_len >= s+len)? XP_NULL: (match_ptr+match_len); } diff --git a/ase/awk/rex.c b/ase/awk/rex.c index a037befe..89dc25f5 100644 --- a/ase/awk/rex.c +++ b/ase/awk/rex.c @@ -1,5 +1,5 @@ /* - * $Id: rex.c,v 1.27 2006-09-01 07:18:40 bacon Exp $ + * $Id: rex.c,v 1.28 2006-09-05 15:18:16 bacon Exp $ */ #include @@ -154,7 +154,7 @@ static int __build_pattern0 (__builder_t* rex); static int __build_branch (__builder_t* rex); static int __build_atom (__builder_t* rex); static int __build_charset (__builder_t* rex, struct __code_t* cmd); -static int __build_boundary (__builder_t* rex, struct __code_t* cmd); +static int __build_occurrences (__builder_t* rex, struct __code_t* cmd); static int __build_cclass (__builder_t* rex, xp_char_t* cc); static int __build_range (__builder_t* rex, struct __code_t* cmd); static int __next_char (__builder_t* rex, int level); @@ -186,7 +186,7 @@ static const xp_byte_t* __match_charset ( static const xp_byte_t* __match_group ( __matcher_t* matcher, const xp_byte_t* base, __match_t* mat); -static const xp_byte_t* __match_boundary ( +static const xp_byte_t* __match_occurrences ( __matcher_t* matcher, xp_size_t si, const xp_byte_t* p, xp_size_t lbound, xp_size_t ubound, __match_t* mat); @@ -308,6 +308,7 @@ int xp_awk_matchrex ( __matcher_t matcher; __match_t mat; xp_size_t offset = 0; + /*const xp_char_t* match_ptr_zero = XP_NULL;*/ matcher.awk = awk; @@ -321,7 +322,7 @@ int xp_awk_matchrex ( matcher.depth.cur = 0; mat.matched = xp_false; -/* TODO: shoud it allow an offset here??? */ +/* TODO: should it allow an offset here??? */ mat.match_ptr = str + offset; while (mat.match_ptr < matcher.match.str.end) @@ -334,14 +335,35 @@ int xp_awk_matchrex ( if (mat.matched) { + /* + if (mat.match_len == 0) + { + if (match_ptr_zero == XP_NULL) + match_ptr_zero = mat.match_ptr; + mat.match_ptr++; + continue; + } + */ + if (match_ptr != XP_NULL) *match_ptr = mat.match_ptr; if (match_len != XP_NULL) *match_len = mat.match_len; + + /*match_ptr_zero = XP_NULL;*/ break; } mat.match_ptr++; } + /* + if (match_ptr_zero != XP_NULL) + { + if (match_ptr != XP_NULL) *match_ptr = match_ptr_zero; + if (match_len != XP_NULL) *match_len = 0; + return 1; + } + */ + return (mat.matched)? 1: 0; } @@ -466,7 +488,7 @@ static int __build_branch (__builder_t* builder) if (n == 0) break; /* no atom */ - n = __build_boundary (builder, cmd); + n = __build_occurrences (builder, cmd); if (n == -1) { builder->code.size = old_size; @@ -474,7 +496,7 @@ static int __build_branch (__builder_t* builder) } /* n == 0 no bound character. just continue */ - /* n == 1 bound has been applied by build_boundary */ + /* n == 1 bound has been applied by build_occurrences */ CODEAT(builder,pos_na,xp_size_t) += 1; } @@ -738,7 +760,7 @@ static int __build_cclass (__builder_t* builder, xp_char_t* cc) } #endif -static int __build_boundary (__builder_t* builder, struct __code_t* cmd) +static int __build_occurrences (__builder_t* builder, struct __code_t* cmd) { if (builder->ptn.curc.type != CT_SPECIAL) return 0; @@ -826,7 +848,7 @@ static int __build_range (__builder_t* builder, struct __code_t* cmd) if (cmd->lbound > cmd->ubound) { - /* invalid boundary range */ + /* invalid occurrences range */ builder->errnum = XP_AWK_EREXBRANGE; return -1; } @@ -1165,7 +1187,7 @@ static const xp_byte_t* __match_any_char ( //xp_printf (XP_T("max si = %d\n"), si); if (si >= lbound && si <= ubound) { - p = __match_boundary (matcher, si, p, lbound, ubound, mat); + p = __match_occurrences (matcher, si, p, lbound, ubound, mat); } return p; @@ -1215,10 +1237,10 @@ static const xp_byte_t* __match_ord_char ( si++; } -//xp_printf (XP_T("max si = %d\n"), si); +//xp_printf (XP_T("max si = %d, lbound = %u, ubound = %u\n"), si, lbound, ubound); if (si >= lbound && si <= ubound) { - p = __match_boundary (matcher, si, p, lbound, ubound, mat); + p = __match_occurrences (matcher, si, p, lbound, ubound, mat); } return p; @@ -1259,7 +1281,7 @@ static const xp_byte_t* __match_charset ( if (si >= lbound && si <= ubound) { - p = __match_boundary (matcher, si, p, lbound, ubound, mat); + p = __match_occurrences (matcher, si, p, lbound, ubound, mat); } return p; @@ -1345,7 +1367,7 @@ static const xp_byte_t* __match_group ( /* increment p by the length of the subpattern */ p += *(xp_size_t*)(p+xp_sizeof(xp_size_t)); - /* check the boundary */ + /* check the occurrences */ if (si >= cp->lbound && si <= cp->ubound) { if (cp->lbound == cp->ubound || p >= mat->branch_end) @@ -1394,7 +1416,7 @@ static const xp_byte_t* __match_group ( return p; } -static const xp_byte_t* __match_boundary ( +static const xp_byte_t* __match_occurrences ( __matcher_t* matcher, xp_size_t si, const xp_byte_t* p, xp_size_t lbound, xp_size_t ubound, __match_t* mat) { diff --git a/ase/test/awk/t39.awk b/ase/test/awk/t39.awk index 084544aa..707d22a1 100644 --- a/ase/test/awk/t39.awk +++ b/ase/test/awk/t39.awk @@ -1,4 +1,4 @@ -BEGIN { FS="a+"; } +BEGIN { FS=":*"; } { print "NF=" NF; for (i = 0; i < NF; i++) print i " [" $(i+1) "]";