From fd91e6b244d9e300d46fbc4e106b97660dcb0fe4 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Sat, 16 Jul 2022 08:14:25 +0000 Subject: [PATCH] added hio_comp_ucstr_bcstr_limited() added hio_fnmat_uchars_i(), hio_fnmat_bchars_i() and other similar functions --- hio/lib/hio-str.h.m4 | 84 ++++++- hio/lib/hio-utl.h | 6 + hio/lib/utl-str.c | 587 +++++++++++++++++++++++++++++++++++++++++++ hio/lib/utl-str.c.m4 | 126 ++++++++++ hio/lib/utl-str.m4 | 235 +++++++++++++++++ hio/lib/utl.c | 27 ++ 6 files changed, 1055 insertions(+), 10 deletions(-) diff --git a/hio/lib/hio-str.h.m4 b/hio/lib/hio-str.h.m4 index 37dd6b3..5ded251 100644 --- a/hio/lib/hio-str.h.m4 +++ b/hio/lib/hio-str.h.m4 @@ -42,19 +42,41 @@ dnl --------------------------------------------------------------------------- /* ========================================================================= * STRING * ========================================================================= */ - -enum hio_trim_oochars_flag_t +enum hio_trim_flag_t { - HIO_TRIM_OOCHARS_LEFT = (1 << 0), /**< trim leading spaces */ -#define HIO_TRIM_OOCHARS_LEFT HIO_TRIM_OOCHARS_LEFT -#define HIO_TRIM_UCHARS_LEFT HIO_TRIM_OOCHARS_LEFT -#define HIO_TRIM_BCHARS_LEFT HIO_TRIM_OOCHARS_LEFT - HIO_TRIM_OOCHARS_RIGHT = (1 << 1) /**< trim trailing spaces */ -#define HIO_TRIM_OOCHARS_RIGHT HIO_TRIM_OOCHARS_RIGHT -#define HIO_TRIM_UCHARS_RIGHT HIO_TRIM_OOCHARS_RIGHT -#define HIO_TRIM_BCHARS_RIGHT HIO_TRIM_OOCHARS_RIGHT + HIO_TRIM_LEFT = (1 << 0), /**< trim leading spaces */ +#define HIO_TRIM_LEFT HIO_TRIM_LEFT +#define HIO_TRIM_OOCHARS_LEFT HIO_TRIM_LEFT +#define HIO_TRIM_UCHARS_LEFT HIO_TRIM_LEFT +#define HIO_TRIM_BCHARS_LEFT HIO_TRIM_LEFT + HIO_TRIM_RIGHT = (1 << 1) /**< trim trailing spaces */ +#define HIO_TRIM_RIGHT HIO_TRIM_RIGHT +#define HIO_TRIM_OOCHARS_RIGHT HIO_TRIM_RIGHT +#define HIO_TRIM_UCHARS_RIGHT HIO_TRIM_RIGHT +#define HIO_TRIM_BCHARS_RIGHT HIO_TRIM_RIGHT }; +enum hio_fnmat_flag_t +{ + HIO_FNMAT_PATHNAME = (1 << 0), +#define HIO_FNMAT_PATHNAME HIO_FNMAT_PATHNAME + HIO_FNMAT_NOESCAPE = (1 << 1), +#define HIO_FNMAT_NOESCAPE HIO_FNMAT_NOESCAPE + HIO_FNMAT_PERIOD = (1 << 2), +#define HIO_FNMAT_PERIOD HIO_FNMAT_PERIOD + HIO_FNMAT_IGNORECASE = (1 << 3) +#define HIO_FNMAT_IGNORECASE HIO_FNMAT_IGNORECASE +}; + +#if defined(_WIN32) || defined(__OS2__) || defined(__DOS__) + /* i don't support escaping in these systems */ +# define HIO_FNMAT_IS_ESC(c) (0) +# define HIO_FNMAT_IS_SEP(c) ((c) == '/' || (c) == '\\') +#else +# define HIO_FNMAT_IS_ESC(c) ((c) == '\\') +# define HIO_FNMAT_IS_SEP(c) ((c) == '/') +#endif + #if defined(__cplusplus) extern "C" { #endif @@ -477,6 +499,35 @@ HIO_EXPORT int hio_split_bcstr ( ); +HIO_EXPORT int hio_fnmat_uchars_i ( + const hio_uch_t* str, + hio_oow_t slen, + const hio_uch_t* ptn, + hio_oow_t plen, + int flags, + int no_first_period +); + +HIO_EXPORT int hio_fnmat_bchars_i ( + const hio_bch_t* str, + hio_oow_t slen, + const hio_bch_t* ptn, + hio_oow_t plen, + int flags, + int no_first_period +); + +#define hio_fnmat_uchars(str, slen, ptn, plen, flags) hio_fnmat_uchars_i(str, slen, ptn, plen, flags, 0) +#define hio_fnmat_ucstr(str, ptn, flags) hio_fnmat_uchars_i(str, hio_count_ucstr(str), ptn, hio_count_ucstr(ptn), flags, 0) +#define hio_fnmat_uchars_ucstr(str, slen, ptn, flags) hio_fnmat_uchars_i(str, slen, ptn, hio_count_ucstr(ptn), flags, 0) +#define hio_fnmat_ucstr_uchars(str, ptn, plen, flags) hio_fnmat_uchars_i(str, hio_count_ucstr(str), ptn, plen, flags, 0) + +#define hio_fnmat_bchars(str, slen, ptn, plen, flags) hio_fnmat_bchars_i(str, slen, ptn, plen, flags, 0) +#define hio_fnmat_bcstr(str, ptn, flags) hio_fnmat_bchars_i(str, hio_count_bcstr(str), ptn, hio_count_bcstr(ptn), flags, 0) +#define hio_fnmat_bchars_bcstr(str, slen, ptn, flags) hio_fnmat_bchars_i(str, slen, ptn, hio_count_bcstr(ptn), flags, 0) +#define hio_fnmat_bcstr_bchars(str, ptn, plen, flags) hio_fnmat_bchars_i(str, hio_count_bcstr(str), ptn, plen, flags, 0) + + #if defined(HIO_OOCH_IS_UCH) # define hio_count_oocstr hio_count_ucstr # define hio_count_oocstr_limited hio_count_ucstr_limited @@ -519,6 +570,13 @@ HIO_EXPORT int hio_split_bcstr ( # define hio_tokenize_oochars hio_tokenize_uchars # define hio_trim_oochars hio_trim_uchars # define hio_split_oocstr hio_split_ucstr + +# define hawk_fnmat_oochars_i hawk_fnmat_uchars_i +# define hawk_fnmat_oochars hawk_fnmat_uchars +# define hawk_fnmat_oocstr hawk_fnmat_ucstr +# define hawk_fnmat_oochars_oocstr hawk_fnmat_uchars_ucstr +# define hawk_fnmat_oocstr_oochars hawk_fnmat_ucstr_uchars + #else # define hio_count_oocstr hio_count_bcstr # define hio_count_oocstr_limited hio_count_bcstr_limited @@ -562,6 +620,12 @@ HIO_EXPORT int hio_split_bcstr ( # define hio_tokenize_oochars hio_tokenize_bchars # define hio_trim_oochars hio_trim_bchars # define hio_split_oocstr hio_split_bcstr + +# define hawk_fnmat_oochars_i hawk_fnmat_bchars_i +# define hawk_fnmat_oochars hawk_fnmat_bchars +# define hawk_fnmat_oocstr hawk_fnmat_bcstr +# define hawk_fnmat_oochars_oocstr hawk_fnmat_bchars_bcstr +# define hawk_fnmat_oocstr_oochars hawk_fnmat_bcstr_bchars #endif /* ------------------------------------------------------------------------- */ diff --git a/hio/lib/hio-utl.h b/hio/lib/hio-utl.h index 0ebb450..e71f111 100644 --- a/hio/lib/hio-utl.h +++ b/hio/lib/hio-utl.h @@ -234,6 +234,12 @@ HIO_EXPORT int hio_comp_ucstr_bcstr ( int ignorecase ); +HIO_EXPORT int hio_comp_ucstr_bcstr_limited ( + const hio_uch_t* str1, + const hio_bch_t* str2, + hio_oow_t maxlen, + int ignorecase +); HIO_EXPORT int hio_comp_uchars_bcstr ( const hio_uch_t* str1, diff --git a/hio/lib/utl-str.c b/hio/lib/utl-str.c index 8aa5f36..e31a082 100644 --- a/hio/lib/utl-str.c +++ b/hio/lib/utl-str.c @@ -32,6 +32,131 @@ #include "hio-prv.h" #include +static int match_uch_class (const hio_uch_t* pp, hio_uch_t sc, int* matched) +{ + if (hio_comp_ucstr_bcstr_limited(pp, "[:upper:]", 9, 0) == 0) + { + *matched = hio_is_uch_upper(sc); + return 9; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:lower:]", 9, 0) == 0) + { + *matched = hio_is_uch_lower(sc); + return 9; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:alpha:]", 9, 0) == 0) + { + *matched = hio_is_uch_alpha(sc); + return 9; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:digit:]", 9, 0) == 0) + { + *matched = hio_is_uch_digit(sc); + return 9; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:xdigit:]", 10, 0) == 0) + { + *matched = hio_is_uch_xdigit(sc); + return 10; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:alnum:]", 9, 0) == 0) + { + *matched = hio_is_uch_alnum(sc); + return 9; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:space:]", 9, 0) == 0) + { + *matched = hio_is_uch_space(sc); + return 9; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:print:]", 9, 0) == 0) + { + *matched = hio_is_uch_print(sc); + return 9; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:graph:]", 9, 0) == 0) + { + *matched = hio_is_uch_graph(sc); + return 9; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:cntrl:]", 9, 0) == 0) + { + *matched = hio_is_uch_cntrl(sc); + return 9; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:punct:]", 9, 0) == 0) + { + *matched = hio_is_uch_punct(sc); + return 9; + } + + return 0; +} + +static int match_bch_class (const hio_bch_t* pp, hio_bch_t sc, int* matched) +{ + if (hio_comp_bcstr_limited(pp, "[:upper:]", 9, 0) == 0) + { + *matched = hio_is_bch_upper(sc); + return 9; + } + else if (hio_comp_bcstr_limited(pp, "[:lower:]", 9, 0) == 0) + { + *matched = hio_is_bch_lower(sc); + return 9; + } + else if (hio_comp_bcstr_limited(pp, "[:alpha:]", 9, 0) == 0) + { + *matched = hio_is_bch_alpha(sc); + return 9; + } + else if (hio_comp_bcstr_limited(pp, "[:digit:]", 9, 0) == 0) + { + *matched = hio_is_bch_digit(sc); + return 9; + } + else if (hio_comp_bcstr_limited(pp, "[:xdigit:]", 10, 0) == 0) + { + *matched = hio_is_bch_xdigit(sc); + return 10; + } + else if (hio_comp_bcstr_limited(pp, "[:alnum:]", 9, 0) == 0) + { + *matched = hio_is_bch_alnum(sc); + return 9; + } + else if (hio_comp_bcstr_limited(pp, "[:space:]", 9, 0) == 0) + { + *matched = hio_is_bch_space(sc); + return 9; + } + else if (hio_comp_bcstr_limited(pp, "[:print:]", 9, 0) == 0) + { + *matched = hio_is_bch_print(sc); + return 9; + } + else if (hio_comp_bcstr_limited(pp, "[:graph:]", 9, 0) == 0) + { + *matched = hio_is_bch_graph(sc); + return 9; + } + else if (hio_comp_bcstr_limited(pp, "[:cntrl:]", 9, 0) == 0) + { + *matched = hio_is_bch_cntrl(sc); + return 9; + } + else if (hio_comp_bcstr_limited(pp, "[:punct:]", 9, 0) == 0) + { + *matched = hio_is_bch_punct(sc); + return 9; + } + + return 0; +} + + + + int hio_comp_uchars (const hio_uch_t* str1, hio_oow_t len1, const hio_uch_t* str2, hio_oow_t len2, int ignorecase) { @@ -3001,3 +3126,465 @@ hio_uintmax_t hio_bchars_to_uintmax (const hio_bch_t* str, hio_oow_t len, int op return n; } +int hio_fnmat_uchars_i (const hio_uch_t* str, hio_oow_t slen, const hio_uch_t* ptn, hio_oow_t plen, int flags, int no_first_period) +{ + const hio_uch_t* sp = str; + const hio_uch_t* pp = ptn; + const hio_uch_t* se = str + slen; + const hio_uch_t* pe = ptn + plen; + hio_uch_t sc, pc, pc2; + + while (1) + { + if (pp < pe && HIO_FNMAT_IS_ESC(*pp) && !(flags & HIO_FNMAT_NOESCAPE)) + { + /* pattern is escaped and escaping is allowed. */ + + if ((++pp) >= pe) + { + /* + * the last character of the pattern is an WCS_ESC. + * matching is performed as if the end of the pattern is + * reached just without an WCS_ESC. + */ + if (sp < se) return 0; + return 1; + } + + if (sp >= se) return 0; /* premature string termination */ + + sc = *sp; pc = *pp; /* pc is just a normal character */ + if ((flags & HIO_FNMAT_IGNORECASE) != 0) + { + /* make characters to lower-case */ + sc = hio_to_uch_lower(sc); + pc = hio_to_uch_lower(pc); + } + + if (sc != pc) return 0; + sp++; pp++; + continue; + } + if (pp >= pe) + { + /* + * the end of the pattern has been reached. + * the string must terminate too. + */ + return sp >= se; + } + + if (sp >= se) + { + /* the string terminats prematurely */ + while (pp < pe && *pp == '*') pp++; + return pp >= pe; + } + + sc = *sp; pc = *pp; + + if (sc == '.' && (flags & HIO_FNMAT_PERIOD)) + { + /* + * a leading period in the staring must match + * a period in the pattern explicitly + */ + if ((!no_first_period && sp == str) || + (HIO_FNMAT_IS_SEP(sp[-1]) && (flags & HIO_FNMAT_PATHNAME))) + { + if (pc != '.') return 0; + sp++; pp++; + continue; + } + } + else if (HIO_FNMAT_IS_SEP(sc) && (flags & HIO_FNMAT_PATHNAME)) + { + while (pc == '*') + { + if ((++pp) >= pe) return 0; + pc = *pp; + } + + /* a path separator must be matched explicitly */ + if (!HIO_FNMAT_IS_SEP(pc)) return 0; + sp++; pp++; + continue; + } + + /* the handling of special pattern characters begins here */ + if (pc == '?') + { + /* match any single character */ + sp++; pp++; + } + else if (pc == '*') + { + /* match zero or more characters */ + + /* compact asterisks */ + do { pp++; } while (pp < pe && *pp == '*'); + + if (pp >= pe) + { + /* + * if the last character in the pattern is an asterisk, + * the string should not have any directory separators + * when HIO_FNMAT_PATHNAME is set. + */ + if (flags & HIO_FNMAT_PATHNAME) + { + const hio_uch_t* s = sp; + for (s = sp; s < se; s++) + { + if (HIO_FNMAT_IS_SEP(*s)) return 0; + } + } + return 1; + } + else + { + do + { + if (hio_fnmat_uchars_i(sp, se - sp, pp, pe - pp, flags, 1)) return 1; + if (HIO_FNMAT_IS_SEP(*sp) && (flags & HIO_FNMAT_PATHNAME)) break; + sp++; + } + while (sp < se); + + return 0; + } + } + else if (pc == '[') + { + /* match range */ + int negate = 0; + int matched = 0; + + if ((++pp) >= pe) return 0; + if (*pp == '!') { negate = 1; pp++; } + + while (pp < pe && *pp != ']') + { + if (*pp == '[') + { + hio_oow_t pl = pe - pp; + + if (pl >= 9) /* assumption that [:class:] is at least 9 in match_uch_class */ + { + int x = match_uch_class(pp, sc, &matched); + if (x > 0) + { + pp += x; + continue; + } + } + + /* + * characters in an invalid class name are + * just treated as normal characters + */ + } + + if (HIO_FNMAT_IS_ESC(*pp) && !(flags & HIO_FNMAT_NOESCAPE)) pp++; + else if (*pp == ']') break; + + if (pp >= pe) break; + + pc = *pp; + if ((flags & HIO_FNMAT_IGNORECASE) != 0) + { + sc = hio_to_uch_lower(sc); + pc = hio_to_uch_lower(pc); + } + + if (pp + 1 < pe && pp[1] == '-') + { + pp += 2; /* move the a character next to a dash */ + + if (pp >= pe) + { + if (sc >= pc) matched = 1; + break; + } + + if (HIO_FNMAT_IS_ESC(*pp) && !(flags & HIO_FNMAT_NOESCAPE)) + { + if ((++pp) >= pe) + { + if (sc >= pc) matched = 1; + break; + } + } + else if (*pp == ']') + { + if (sc >= pc) matched = 1; + break; + } + + pc2 = *pp; + if ((flags & HIO_FNMAT_IGNORECASE) != 0) + pc2 = hio_to_uch_lower(pc2); + + if (sc >= pc && sc <= pc2) matched = 1; + pp++; + } + else + { + if (sc == pc) matched = 1; + pp++; + } + } + + if (negate) matched = !matched; + if (!matched) return 0; + sp++; if (pp < pe) pp++; + } + else + { + /* a normal character */ + if ((flags & HIO_FNMAT_IGNORECASE) != 0) + { + sc = hio_to_uch_lower(sc); + pc = hio_to_uch_lower(pc); + } + + if (sc != pc) return 0; + sp++; pp++; + } + } + + /* will never reach here. but make some immature compilers happy... */ + return 0; +} + +int hio_fnmat_bchars_i (const hio_bch_t* str, hio_oow_t slen, const hio_bch_t* ptn, hio_oow_t plen, int flags, int no_first_period) +{ + const hio_bch_t* sp = str; + const hio_bch_t* pp = ptn; + const hio_bch_t* se = str + slen; + const hio_bch_t* pe = ptn + plen; + hio_bch_t sc, pc, pc2; + + while (1) + { + if (pp < pe && HIO_FNMAT_IS_ESC(*pp) && !(flags & HIO_FNMAT_NOESCAPE)) + { + /* pattern is escaped and escaping is allowed. */ + + if ((++pp) >= pe) + { + /* + * the last character of the pattern is an WCS_ESC. + * matching is performed as if the end of the pattern is + * reached just without an WCS_ESC. + */ + if (sp < se) return 0; + return 1; + } + + if (sp >= se) return 0; /* premature string termination */ + + sc = *sp; pc = *pp; /* pc is just a normal character */ + if ((flags & HIO_FNMAT_IGNORECASE) != 0) + { + /* make characters to lower-case */ + sc = hio_to_bch_lower(sc); + pc = hio_to_bch_lower(pc); + } + + if (sc != pc) return 0; + sp++; pp++; + continue; + } + if (pp >= pe) + { + /* + * the end of the pattern has been reached. + * the string must terminate too. + */ + return sp >= se; + } + + if (sp >= se) + { + /* the string terminats prematurely */ + while (pp < pe && *pp == '*') pp++; + return pp >= pe; + } + + sc = *sp; pc = *pp; + + if (sc == '.' && (flags & HIO_FNMAT_PERIOD)) + { + /* + * a leading period in the staring must match + * a period in the pattern explicitly + */ + if ((!no_first_period && sp == str) || + (HIO_FNMAT_IS_SEP(sp[-1]) && (flags & HIO_FNMAT_PATHNAME))) + { + if (pc != '.') return 0; + sp++; pp++; + continue; + } + } + else if (HIO_FNMAT_IS_SEP(sc) && (flags & HIO_FNMAT_PATHNAME)) + { + while (pc == '*') + { + if ((++pp) >= pe) return 0; + pc = *pp; + } + + /* a path separator must be matched explicitly */ + if (!HIO_FNMAT_IS_SEP(pc)) return 0; + sp++; pp++; + continue; + } + + /* the handling of special pattern characters begins here */ + if (pc == '?') + { + /* match any single character */ + sp++; pp++; + } + else if (pc == '*') + { + /* match zero or more characters */ + + /* compact asterisks */ + do { pp++; } while (pp < pe && *pp == '*'); + + if (pp >= pe) + { + /* + * if the last character in the pattern is an asterisk, + * the string should not have any directory separators + * when HIO_FNMAT_PATHNAME is set. + */ + if (flags & HIO_FNMAT_PATHNAME) + { + const hio_bch_t* s = sp; + for (s = sp; s < se; s++) + { + if (HIO_FNMAT_IS_SEP(*s)) return 0; + } + } + return 1; + } + else + { + do + { + if (hio_fnmat_bchars_i(sp, se - sp, pp, pe - pp, flags, 1)) return 1; + if (HIO_FNMAT_IS_SEP(*sp) && (flags & HIO_FNMAT_PATHNAME)) break; + sp++; + } + while (sp < se); + + return 0; + } + } + else if (pc == '[') + { + /* match range */ + int negate = 0; + int matched = 0; + + if ((++pp) >= pe) return 0; + if (*pp == '!') { negate = 1; pp++; } + + while (pp < pe && *pp != ']') + { + if (*pp == '[') + { + hio_oow_t pl = pe - pp; + + if (pl >= 9) /* assumption that [:class:] is at least 9 in match_bch_class */ + { + int x = match_bch_class(pp, sc, &matched); + if (x > 0) + { + pp += x; + continue; + } + } + + /* + * characters in an invalid class name are + * just treated as normal characters + */ + } + + if (HIO_FNMAT_IS_ESC(*pp) && !(flags & HIO_FNMAT_NOESCAPE)) pp++; + else if (*pp == ']') break; + + if (pp >= pe) break; + + pc = *pp; + if ((flags & HIO_FNMAT_IGNORECASE) != 0) + { + sc = hio_to_bch_lower(sc); + pc = hio_to_bch_lower(pc); + } + + if (pp + 1 < pe && pp[1] == '-') + { + pp += 2; /* move the a character next to a dash */ + + if (pp >= pe) + { + if (sc >= pc) matched = 1; + break; + } + + if (HIO_FNMAT_IS_ESC(*pp) && !(flags & HIO_FNMAT_NOESCAPE)) + { + if ((++pp) >= pe) + { + if (sc >= pc) matched = 1; + break; + } + } + else if (*pp == ']') + { + if (sc >= pc) matched = 1; + break; + } + + pc2 = *pp; + if ((flags & HIO_FNMAT_IGNORECASE) != 0) + pc2 = hio_to_bch_lower(pc2); + + if (sc >= pc && sc <= pc2) matched = 1; + pp++; + } + else + { + if (sc == pc) matched = 1; + pp++; + } + } + + if (negate) matched = !matched; + if (!matched) return 0; + sp++; if (pp < pe) pp++; + } + else + { + /* a normal character */ + if ((flags & HIO_FNMAT_IGNORECASE) != 0) + { + sc = hio_to_bch_lower(sc); + pc = hio_to_bch_lower(pc); + } + + if (sc != pc) return 0; + sp++; pp++; + } + } + + /* will never reach here. but make some immature compilers happy... */ + return 0; +} + diff --git a/hio/lib/utl-str.c.m4 b/hio/lib/utl-str.c.m4 index 3ea4ba3..66a122d 100644 --- a/hio/lib/utl-str.c.m4 +++ b/hio/lib/utl-str.c.m4 @@ -31,6 +31,129 @@ #include "hio-prv.h" #include + +static int match_uch_class (const hio_uch_t* pp, hio_uch_t sc, int* matched) +{ + if (hio_comp_ucstr_bcstr_limited(pp, "[:upper:]", 9, 0) == 0) + { + *matched = hio_is_uch_upper(sc); + return 9; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:lower:]", 9, 0) == 0) + { + *matched = hio_is_uch_lower(sc); + return 9; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:alpha:]", 9, 0) == 0) + { + *matched = hio_is_uch_alpha(sc); + return 9; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:digit:]", 9, 0) == 0) + { + *matched = hio_is_uch_digit(sc); + return 9; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:xdigit:]", 10, 0) == 0) + { + *matched = hio_is_uch_xdigit(sc); + return 10; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:alnum:]", 9, 0) == 0) + { + *matched = hio_is_uch_alnum(sc); + return 9; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:space:]", 9, 0) == 0) + { + *matched = hio_is_uch_space(sc); + return 9; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:print:]", 9, 0) == 0) + { + *matched = hio_is_uch_print(sc); + return 9; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:graph:]", 9, 0) == 0) + { + *matched = hio_is_uch_graph(sc); + return 9; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:cntrl:]", 9, 0) == 0) + { + *matched = hio_is_uch_cntrl(sc); + return 9; + } + else if (hio_comp_ucstr_bcstr_limited(pp, "[:punct:]", 9, 0) == 0) + { + *matched = hio_is_uch_punct(sc); + return 9; + } + + return 0; +} + +static int match_bch_class (const hio_bch_t* pp, hio_bch_t sc, int* matched) +{ + if (hio_comp_bcstr_limited(pp, "[:upper:]", 9, 0) == 0) + { + *matched = hio_is_bch_upper(sc); + return 9; + } + else if (hio_comp_bcstr_limited(pp, "[:lower:]", 9, 0) == 0) + { + *matched = hio_is_bch_lower(sc); + return 9; + } + else if (hio_comp_bcstr_limited(pp, "[:alpha:]", 9, 0) == 0) + { + *matched = hio_is_bch_alpha(sc); + return 9; + } + else if (hio_comp_bcstr_limited(pp, "[:digit:]", 9, 0) == 0) + { + *matched = hio_is_bch_digit(sc); + return 9; + } + else if (hio_comp_bcstr_limited(pp, "[:xdigit:]", 10, 0) == 0) + { + *matched = hio_is_bch_xdigit(sc); + return 10; + } + else if (hio_comp_bcstr_limited(pp, "[:alnum:]", 9, 0) == 0) + { + *matched = hio_is_bch_alnum(sc); + return 9; + } + else if (hio_comp_bcstr_limited(pp, "[:space:]", 9, 0) == 0) + { + *matched = hio_is_bch_space(sc); + return 9; + } + else if (hio_comp_bcstr_limited(pp, "[:print:]", 9, 0) == 0) + { + *matched = hio_is_bch_print(sc); + return 9; + } + else if (hio_comp_bcstr_limited(pp, "[:graph:]", 9, 0) == 0) + { + *matched = hio_is_bch_graph(sc); + return 9; + } + else if (hio_comp_bcstr_limited(pp, "[:cntrl:]", 9, 0) == 0) + { + *matched = hio_is_bch_cntrl(sc); + return 9; + } + else if (hio_comp_bcstr_limited(pp, "[:punct:]", 9, 0) == 0) + { + *matched = hio_is_bch_punct(sc); + return 9; + } + + return 0; +} + dnl dnl --------------------------------------------------------------------------- include(`utl-str.m4')dnl @@ -136,3 +259,6 @@ fn_chars_to_int(hio_bchars_to_intmax, hio_bch_t, hio_intmax_t, hio_is_bch_space, dnl -- fn_chars_to_uint(hio_uchars_to_uintmax, hio_uch_t, hio_uintmax_t, hio_is_uch_space, HIO_UCHARS_TO_UINTMAX) fn_chars_to_uint(hio_bchars_to_uintmax, hio_bch_t, hio_uintmax_t, hio_is_bch_space, HIO_BCHARS_TO_UINTMAX) +dnl -- +fn_fnmat(hio_fnmat_uchars_i, hio_uch_t, hio_to_uch_lower, match_uch_class) +fn_fnmat(hio_fnmat_bchars_i, hio_bch_t, hio_to_bch_lower, match_bch_class) diff --git a/hio/lib/utl-str.m4 b/hio/lib/utl-str.m4 index d213292..562d4c8 100644 --- a/hio/lib/utl-str.m4 +++ b/hio/lib/utl-str.m4 @@ -1507,3 +1507,238 @@ _int_type_ _fn_name_ (const _char_type_* str, hio_oow_t len, int option, const _ } popdef([[_fn_name_]])popdef([[_char_type_]])popdef([[_int_type_]])popdef([[_is_space_]])popdef([[_prefix_]])dnl ]]) +dnl --------------------------------------------------------------------------- +define([[fn_fnmat]], [[pushdef([[_fn_name_]], $1)pushdef([[_char_type_]], $2)pushdef([[_to_lower_]], $3)pushdef([[_match_ch_class_]], $4)dnl +int _fn_name_ (const _char_type_* str, hio_oow_t slen, const _char_type_* ptn, hio_oow_t plen, int flags, int no_first_period) +{ + const _char_type_* sp = str; + const _char_type_* pp = ptn; + const _char_type_* se = str + slen; + const _char_type_* pe = ptn + plen; + _char_type_ sc, pc, pc2; + + while (1) + { + if (pp < pe && HIO_FNMAT_IS_ESC(*pp) && !(flags & HIO_FNMAT_NOESCAPE)) + { + /* pattern is escaped and escaping is allowed. */ + + if ((++pp) >= pe) + { + /* + * the last character of the pattern is an WCS_ESC. + * matching is performed as if the end of the pattern is + * reached just without an WCS_ESC. + */ + if (sp < se) return 0; + return 1; + } + + if (sp >= se) return 0; /* premature string termination */ + + sc = *sp; pc = *pp; /* pc is just a normal character */ + if ((flags & HIO_FNMAT_IGNORECASE) != 0) + { + /* make characters to lower-case */ + sc = _to_lower_()(sc); + pc = _to_lower_()(pc); + } + + if (sc != pc) return 0; + sp++; pp++; + continue; + } + if (pp >= pe) + { + /* + * the end of the pattern has been reached. + * the string must terminate too. + */ + return sp >= se; + } + + if (sp >= se) + { + /* the string terminats prematurely */ + while (pp < pe && *pp == '*') pp++; + return pp >= pe; + } + + sc = *sp; pc = *pp; + + if (sc == '.' && (flags & HIO_FNMAT_PERIOD)) + { + /* + * a leading period in the staring must match + * a period in the pattern explicitly + */ + if ((!no_first_period && sp == str) || + (HIO_FNMAT_IS_SEP(sp[-1]) && (flags & HIO_FNMAT_PATHNAME))) + { + if (pc != '.') return 0; + sp++; pp++; + continue; + } + } + else if (HIO_FNMAT_IS_SEP(sc) && (flags & HIO_FNMAT_PATHNAME)) + { + while (pc == '*') + { + if ((++pp) >= pe) return 0; + pc = *pp; + } + + /* a path separator must be matched explicitly */ + if (!HIO_FNMAT_IS_SEP(pc)) return 0; + sp++; pp++; + continue; + } + + /* the handling of special pattern characters begins here */ + if (pc == '?') + { + /* match any single character */ + sp++; pp++; + } + else if (pc == '*') + { + /* match zero or more characters */ + + /* compact asterisks */ + do { pp++; } while (pp < pe && *pp == '*'); + + if (pp >= pe) + { + /* + * if the last character in the pattern is an asterisk, + * the string should not have any directory separators + * when HIO_FNMAT_PATHNAME is set. + */ + if (flags & HIO_FNMAT_PATHNAME) + { + const _char_type_* s = sp; + for (s = sp; s < se; s++) + { + if (HIO_FNMAT_IS_SEP(*s)) return 0; + } + } + return 1; + } + else + { + do + { + if (_fn_name_()(sp, se - sp, pp, pe - pp, flags, 1)) return 1; + if (HIO_FNMAT_IS_SEP(*sp) && (flags & HIO_FNMAT_PATHNAME)) break; + sp++; + } + while (sp < se); + + return 0; + } + } + else if (pc == '[') + { + /* match range */ + int negate = 0; + int matched = 0; + + if ((++pp) >= pe) return 0; + if (*pp == '!') { negate = 1; pp++; } + + while (pp < pe && *pp != ']') + { + if (*pp == '[') + { + hio_oow_t pl = pe - pp; + + if (pl >= 9) /* assumption that [:class:] is at least 9 in _match_ch_class_ */ + { + int x = _match_ch_class_()(pp, sc, &matched); + if (x > 0) + { + pp += x; + continue; + } + } + + /* + * characters in an invalid class name are + * just treated as normal characters + */ + } + + if (HIO_FNMAT_IS_ESC(*pp) && !(flags & HIO_FNMAT_NOESCAPE)) pp++; + else if (*pp == ']') break; + + if (pp >= pe) break; + + pc = *pp; + if ((flags & HIO_FNMAT_IGNORECASE) != 0) + { + sc = _to_lower_()(sc); + pc = _to_lower_()(pc); + } + + if (pp + 1 < pe && pp[1] == '-') + { + pp += 2; /* move the a character next to a dash */ + + if (pp >= pe) + { + if (sc >= pc) matched = 1; + break; + } + + if (HIO_FNMAT_IS_ESC(*pp) && !(flags & HIO_FNMAT_NOESCAPE)) + { + if ((++pp) >= pe) + { + if (sc >= pc) matched = 1; + break; + } + } + else if (*pp == ']') + { + if (sc >= pc) matched = 1; + break; + } + + pc2 = *pp; + if ((flags & HIO_FNMAT_IGNORECASE) != 0) + pc2 = _to_lower_()(pc2); + + if (sc >= pc && sc <= pc2) matched = 1; + pp++; + } + else + { + if (sc == pc) matched = 1; + pp++; + } + } + + if (negate) matched = !matched; + if (!matched) return 0; + sp++; if (pp < pe) pp++; + } + else + { + /* a normal character */ + if ((flags & HIO_FNMAT_IGNORECASE) != 0) + { + sc = _to_lower_()(sc); + pc = _to_lower_()(pc); + } + + if (sc != pc) return 0; + sp++; pp++; + } + } + + /* will never reach here. but make some immature compilers happy... */ + return 0; +} +popdef([[_fn_name_]])popdef([[_char_type_]])popdef([[_int_type_]])popdef([[_match_ch_class_]])dnl +]]) + diff --git a/hio/lib/utl.c b/hio/lib/utl.c index 5d5c7b6..912a91b 100644 --- a/hio/lib/utl.c +++ b/hio/lib/utl.c @@ -51,6 +51,33 @@ int hio_comp_ucstr_bcstr (const hio_uch_t* str1, const hio_bch_t* str2, int igno } } +int hio_comp_ucstr_bcstr_limited (const hio_uch_t* str1, const hio_bch_t* str2, hio_oow_t maxlen, int ignorecase) +{ + if (maxlen == 0) return 0; + + if (ignorecase) + { + while (hio_to_uch_lower(*str1) == hio_to_bch_lower(*str2)) + { + if (*str1 == '\0' || maxlen == 1) return 0; + + str1++; str2++; maxlen--; + } + + return ((hio_uchu_t)hio_to_uch_lower(*str1) > (hio_bchu_t)hio_to_bch_lower(*str2))? 1: -1; + } + else + { + while (*str1 == *str2) + { + if (*str1 == '\0' || maxlen == 1) return 0; + str1++; str2++; maxlen--; + } + + return ((hio_uchu_t)*str1 > (hio_bchu_t)*str2)? 1: -1; + } +} + int hio_comp_uchars_bcstr (const hio_uch_t* str1, hio_oow_t len, const hio_bch_t* str2, int ignorecase) { if (ignorecase)