qse/lib/cmn/chr.c

514 lines
11 KiB
C
Raw Normal View History

2008-08-21 02:24:36 +00:00
/*
2012-08-16 03:47:55 +00:00
* $Id$
2008-08-21 02:24:36 +00:00
*
Copyright (c) 2006-2019 Chung, Hyung-Hwan. All rights reserved.
2014-11-19 14:42:24 +00:00
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
2014-11-19 14:42:24 +00:00
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2008-08-21 02:24:36 +00:00
*/
2008-12-21 21:35:07 +00:00
#include <qse/cmn/chr.h>
#include <qse/cmn/str.h>
2008-08-21 02:24:36 +00:00
#include <ctype.h>
#if defined(QSE_ENABLE_BUNDLED_UNICODE)
# include <qse/cmn/uni.h>
#else
# include <wctype.h>
#endif
2008-08-21 02:24:36 +00:00
2011-08-25 00:54:19 +00:00
static QSE_INLINE int is_malpha (qse_mcint_t c) { return isalpha(c); }
static QSE_INLINE int is_malnum (qse_mcint_t c) { return isalnum(c); }
static QSE_INLINE int is_mblank (qse_mcint_t c)
{
#if defined(HAVE_ISBLANK)
return isblank(c);
#else
return c == QSE_MT(' ') || c == QSE_MT('\t');
#endif
}
2011-08-25 00:54:19 +00:00
static QSE_INLINE int is_mcntrl (qse_mcint_t c) { return iscntrl(c); }
static QSE_INLINE int is_mdigit (qse_mcint_t c) { return isdigit(c); }
static QSE_INLINE int is_mgraph (qse_mcint_t c) { return isgraph(c); }
static QSE_INLINE int is_mlower (qse_mcint_t c) { return islower(c); }
static QSE_INLINE int is_mprint (qse_mcint_t c) { return isprint(c); }
static QSE_INLINE int is_mpunct (qse_mcint_t c) { return ispunct(c); }
static QSE_INLINE int is_mspace (qse_mcint_t c) { return isspace(c); }
static QSE_INLINE int is_mupper (qse_mcint_t c) { return isupper(c); }
static QSE_INLINE int is_mxdigit (qse_mcint_t c) { return isxdigit(c); }
static QSE_INLINE int is_walpha (qse_wcint_t c)
{
#if defined(QSE_ENABLE_BUNDLED_UNICODE)
return qse_isunialpha (c);
#else
return iswalpha(c);
#endif
}
static QSE_INLINE int is_walnum (qse_wcint_t c)
{
#if defined(QSE_ENABLE_BUNDLED_UNICODE)
return qse_isunialnum (c);
#else
return iswalnum(c);
#endif
}
2011-08-25 00:54:19 +00:00
static QSE_INLINE int is_wblank (qse_wcint_t c)
{
#if defined(QSE_ENABLE_BUNDLED_UNICODE)
return qse_isuniblank (c);
#else
#if defined(HAVE_ISWBLANK)
return iswblank(c);
#else
return c == QSE_WT(' ') || c == QSE_WT('\t');
#endif
#endif
}
static QSE_INLINE int is_wcntrl (qse_wcint_t c)
{
#if defined(QSE_ENABLE_BUNDLED_UNICODE)
return qse_isunicntrl (c);
#else
return iswcntrl(c);
#endif
}
static QSE_INLINE int is_wdigit (qse_wcint_t c)
{
#if defined(QSE_ENABLE_BUNDLED_UNICODE)
return qse_isunidigit (c);
#else
return iswdigit(c);
#endif
}
static QSE_INLINE int is_wgraph (qse_wcint_t c)
{
#if defined(QSE_ENABLE_BUNDLED_UNICODE)
return qse_isunigraph (c);
#else
return iswgraph(c);
#endif
}
static QSE_INLINE int is_wlower (qse_wcint_t c)
{
#if defined(QSE_ENABLE_BUNDLED_UNICODE)
return qse_isunilower (c);
#else
return iswlower(c);
#endif
}
static QSE_INLINE int is_wprint (qse_wcint_t c)
{
#if defined(QSE_ENABLE_BUNDLED_UNICODE)
return qse_isuniprint (c);
#else
return iswprint(c);
#endif
}
static QSE_INLINE int is_wpunct (qse_wcint_t c)
{
#if defined(QSE_ENABLE_BUNDLED_UNICODE)
return qse_isunipunct (c);
#else
return iswpunct(c);
#endif
}
static QSE_INLINE int is_wspace (qse_wcint_t c)
{
#if defined(QSE_ENABLE_BUNDLED_UNICODE)
return qse_isunispace (c);
#else
return iswspace(c);
#endif
}
static QSE_INLINE int is_wupper (qse_wcint_t c)
{
#if defined(QSE_ENABLE_BUNDLED_UNICODE)
return qse_isuniupper (c);
#else
return iswupper(c);
#endif
}
static QSE_INLINE int is_wxdigit (qse_wcint_t c)
{
#if defined(QSE_ENABLE_BUNDLED_UNICODE)
return qse_isunixdigit (c);
#else
return iswxdigit(c);
#endif
}
2011-08-25 00:54:19 +00:00
int qse_ismctype (qse_mcint_t c, qse_mctype_t type)
2008-08-21 02:24:36 +00:00
{
/* TODO: use GetStringTypeW/A for WIN32 to implement these */
2008-08-27 02:50:12 +00:00
2011-08-25 00:54:19 +00:00
static int (*f[]) (qse_mcint_t) =
2008-08-27 02:50:12 +00:00
{
#if 1
is_malnum,
is_malpha,
is_mblank,
is_mcntrl,
is_mdigit,
is_mgraph,
is_mlower,
is_mprint,
is_mpunct,
is_mspace,
is_mupper,
is_mxdigit
#else
2011-08-25 00:54:19 +00:00
isalnum,
isalpha,
is_mblank,
iscntrl,
isdigit,
isgraph,
islower,
isprint,
ispunct,
isspace,
isupper,
isxdigit
#endif
2008-08-27 02:50:12 +00:00
};
2011-08-25 00:54:19 +00:00
QSE_ASSERTX (type >= QSE_WCTYPE_ALNUM && type <= QSE_WCTYPE_XDIGIT,
"The character type should be one of qse_mctype_t values");
return f[type-1] (c);
2008-08-21 02:24:36 +00:00
}
2011-08-25 00:54:19 +00:00
qse_mcint_t qse_tomctype (qse_mcint_t c, qse_mctype_t type)
2008-08-21 02:24:36 +00:00
{
2011-08-25 00:54:19 +00:00
QSE_ASSERTX (type == QSE_MCTYPE_UPPER || type == QSE_MCTYPE_LOWER,
"The character type should be one of QSE_MCTYPE_UPPER and QSE_MCTYPE_LOWER");
2008-08-21 02:24:36 +00:00
2011-08-25 00:54:19 +00:00
if (type == QSE_MCTYPE_UPPER) return toupper(c);
if (type == QSE_MCTYPE_LOWER) return tolower(c);
2008-08-21 02:24:36 +00:00
return c;
}
2011-08-25 00:54:19 +00:00
int qse_iswctype (qse_wcint_t c, qse_wctype_t type)
2008-08-21 02:24:36 +00:00
{
/*
#if defined(HAVE_WCTYPE)
2008-08-21 03:58:42 +00:00
static const char* name[] =
{
"upper",
"lower",
"alpha",
"digit",
"xdigit",
"alnum",
"space",
"print",
"graph",
"cntrl",
"punct"
};
2008-08-21 02:24:36 +00:00
static wctype_t desc[] =
{
2008-08-21 03:58:42 +00:00
(wctype_t)0,
(wctype_t)0,
(wctype_t)0,
(wctype_t)0,
(wctype_t)0,
(wctype_t)0,
(wctype_t)0,
(wctype_t)0,
(wctype_t)0,
(wctype_t)0,
(wctype_t)0
2008-08-21 02:24:36 +00:00
};
2011-08-25 00:54:19 +00:00
QSE_ASSERTX (type >= QSE_CTYPE_UPPER && type <= QSE_CTYPE_PUNCT,
"The character type should be one of qse_wctype_t values");
2008-08-21 02:24:36 +00:00
2008-08-21 03:58:42 +00:00
if (desc[type] == (wctype_t)0) desc[type] = wctype(name[type]);
2008-08-21 02:24:36 +00:00
return iswctype (c, desc[type]);
#else
*/
2011-08-25 00:54:19 +00:00
static int (*f[]) (qse_wcint_t) =
{
2011-08-25 00:54:19 +00:00
#if 0
is_walnum,
is_walpha,
is_wblank,
is_wcntrl,
is_wdigit,
is_wgraph,
is_wlower,
is_wprint,
is_wpunct,
is_wspace,
is_wupper,
is_wxdigit
2011-08-25 00:54:19 +00:00
#endif
#if defined(QSE_ENABLE_BUNDLED_UNICODE)
qse_isunialnum,
qse_isunialpha,
is_wblank,
qse_isunicntrl,
qse_isunidigit,
qse_isunigraph,
qse_isunilower,
qse_isuniprint,
qse_isunipunct,
qse_isunispace,
qse_isuniupper,
qse_isunixdigit
#else
2011-08-25 00:54:19 +00:00
iswalnum,
iswalpha,
is_wblank,
iswcntrl,
iswdigit,
iswgraph,
iswlower,
iswprint,
iswpunct,
iswspace,
iswupper,
iswxdigit
#endif
};
2011-08-25 00:54:19 +00:00
QSE_ASSERTX (type >= QSE_WCTYPE_ALNUM && type <= QSE_WCTYPE_XDIGIT,
"The character type should be one of qse_wctype_t values");
return f[type-1] (c);
/*
#endif
*/
2008-08-21 02:24:36 +00:00
}
2011-08-25 00:54:19 +00:00
qse_wcint_t qse_towctype (qse_wcint_t c, qse_wctype_t type)
2008-08-21 02:24:36 +00:00
{
/*
#if defined(HAVE_WCTRANS)
2008-08-21 03:58:42 +00:00
static const char* name[] =
{
2008-09-26 22:31:40 +00:00
"toupper",
"tolower"
2008-08-21 03:58:42 +00:00
};
static wctrans_t desc[] =
2008-08-21 02:24:36 +00:00
{
2008-08-21 03:58:42 +00:00
(wctrans_t)0,
(wctrans_t)0
2008-08-21 02:24:36 +00:00
};
2011-08-25 00:54:19 +00:00
QSE_ASSERTX (type >= QSE_WCTYPE_UPPER && type <= QSE_WCTYPE_LOWER,
"The type should be one of QSE_WCTYPE_UPPER and QSE_WCTYPE_LOWER");
2008-08-21 02:24:36 +00:00
2008-08-21 03:58:42 +00:00
if (desc[type] == (wctrans_t)0) desc[type] = wctrans(name[type]);
2008-08-21 02:24:36 +00:00
return towctrans (c, desc[type]);
#else
*/
2011-08-25 00:54:19 +00:00
QSE_ASSERTX (type == QSE_WCTYPE_UPPER || type == QSE_WCTYPE_LOWER,
"The type should be one of QSE_WCTYPE_UPPER and QSE_WCTYPE_LOWER");
#if defined(QSE_ENABLE_BUNDLED_UNICODE)
if (type == QSE_WCTYPE_UPPER) return qse_touniupper(c);
if (type == QSE_WCTYPE_LOWER) return qse_tounilower(c);
#else
2011-08-25 00:54:19 +00:00
if (type == QSE_WCTYPE_UPPER) return towupper(c);
if (type == QSE_WCTYPE_LOWER) return towlower(c);
#endif
return c;
/*
#endif
*/
2008-08-21 02:24:36 +00:00
}
static struct wtab_t
{
const qse_wchar_t* name;
int class;
} wtab[] =
{
2011-08-25 00:54:19 +00:00
{ QSE_WT("alnum"), QSE_WCTYPE_ALNUM },
{ QSE_WT("alpha"), QSE_WCTYPE_ALPHA },
{ QSE_WT("blank"), QSE_WCTYPE_BLANK },
{ QSE_WT("cntrl"), QSE_WCTYPE_CNTRL },
{ QSE_WT("digit"), QSE_WCTYPE_DIGIT },
{ QSE_WT("graph"), QSE_WCTYPE_GRAPH },
{ QSE_WT("lower"), QSE_WCTYPE_LOWER },
{ QSE_WT("print"), QSE_WCTYPE_PRINT },
{ QSE_WT("punct"), QSE_WCTYPE_PUNCT },
{ QSE_WT("space"), QSE_WCTYPE_SPACE },
{ QSE_WT("upper"), QSE_WCTYPE_UPPER },
{ QSE_WT("xdigit"), QSE_WCTYPE_XDIGIT }
};
2012-02-27 15:44:57 +00:00
int qse_wcstoctype (const qse_wchar_t* name, qse_wctype_t* id)
{
int left = 0, right = QSE_COUNTOF(wtab) - 1, mid;
while (left <= right)
{
int n;
struct wtab_t* kwp;
/*mid = (left + right) / 2;*/
mid = left + (right - left) / 2;
kwp = &wtab[mid];
n = qse_wcscmp(name, kwp->name);
if (n > 0)
{
/* if left, right, mid were of qse_size_t,
* you would need the following line.
if (mid == 0) break;
*/
right = mid - 1;
}
else if (n < 0) left = mid + 1;
else
{
*id = kwp->class;
return 0;
}
}
return -1;
}
2012-02-27 15:44:57 +00:00
int qse_wcsntoctype (const qse_wchar_t* name, qse_size_t len, qse_wctype_t* id)
{
int left = 0, right = QSE_COUNTOF(wtab) - 1, mid;
while (left <= right)
{
int n;
struct wtab_t* kwp;
/*mid = (left + right) / 2;*/
mid = left + (right - left) / 2;
kwp = &wtab[mid];
n = qse_wcsxcmp(name, len, kwp->name);
2011-09-01 09:43:46 +00:00
if (n < 0)
{
/* if left, right, mid were of qse_size_t,
* you would need the following line.
if (mid == 0) break;
*/
right = mid - 1;
}
2011-09-01 09:43:46 +00:00
else if (n > 0) left = mid + 1;
else
{
*id = kwp->class;
return 0;
}
}
return -1;
}
static struct mtab_t
{
const qse_mchar_t* name;
int class;
} mtab[] =
{
2011-08-25 00:54:19 +00:00
{ QSE_MT("alnum"), QSE_MCTYPE_ALNUM },
{ QSE_MT("alpha"), QSE_MCTYPE_ALPHA },
{ QSE_MT("blank"), QSE_MCTYPE_BLANK },
{ QSE_MT("cntrl"), QSE_MCTYPE_CNTRL },
{ QSE_MT("digit"), QSE_MCTYPE_DIGIT },
{ QSE_MT("graph"), QSE_MCTYPE_GRAPH },
{ QSE_MT("lower"), QSE_MCTYPE_LOWER },
{ QSE_MT("print"), QSE_MCTYPE_PRINT },
{ QSE_MT("punct"), QSE_MCTYPE_PUNCT },
{ QSE_MT("space"), QSE_MCTYPE_SPACE },
{ QSE_MT("upper"), QSE_MCTYPE_UPPER },
2011-09-01 09:43:46 +00:00
{ QSE_MT("xdigit"), QSE_MCTYPE_XDIGIT }
};
2012-02-27 15:44:57 +00:00
int qse_mbstoctype (const qse_mchar_t* name, qse_mctype_t* id)
{
int left = 0, right = QSE_COUNTOF(mtab) - 1, mid;
while (left <= right)
{
int n;
struct mtab_t* kwp;
/*mid = (left + right) / 2;*/
mid = left + (right - left) / 2;
kwp = &mtab[mid];
n = qse_mbscmp(name, kwp->name);
if (n > 0)
{
/* if left, right, mid were of qse_size_t,
* you would need the following line.
if (mid == 0) break;
*/
right = mid - 1;
}
else if (n < 0) left = mid + 1;
else
{
*id = kwp->class;
return 0;
}
}
2011-09-01 09:43:46 +00:00
return -1;
}
2012-02-27 15:44:57 +00:00
int qse_mbsntoctype (const qse_mchar_t* name, qse_size_t len, qse_mctype_t* id)
{
int left = 0, right = QSE_COUNTOF(mtab) - 1, mid;
while (left <= right)
{
int n;
struct mtab_t* kwp;
/*mid = (left + right) / 2;*/
mid = left + (right - left) / 2;
kwp = &mtab[mid];
n = qse_mbsxcmp(name, len, kwp->name);
2011-09-01 09:43:46 +00:00
if (n < 0)
{
/* if left, right, mid were of qse_size_t,
* you would need the following line.
if (mid == 0) break;
*/
right = mid - 1;
}
2011-09-01 09:43:46 +00:00
else if (n > 0) left = mid + 1;
else
{
*id = kwp->class;
return 0;
}
}
2011-09-01 09:43:46 +00:00
return -1;
}