separated slmb from mbwc

This commit is contained in:
2012-01-03 14:41:15 +00:00
parent dd02292cc4
commit 42431d2642
39 changed files with 890 additions and 331 deletions

View File

@ -38,7 +38,6 @@ libqsecmn_la_SOURCES = \
fs-move.c \
main.c \
mbwc.c \
mbwc-chr.c \
mbwc-str.c \
mem.c \
oht.c \
@ -51,6 +50,7 @@ libqsecmn_la_SOURCES = \
rex.c \
sio.c \
sll.c \
slmb.c \
stdio.c \
str-beg.c \
str-cat.c \

View File

@ -78,9 +78,9 @@ libqsecmn_la_DEPENDENCIES =
am_libqsecmn_la_OBJECTS = alg-rand.lo alg-search.lo alg-sort.lo \
assert.lo chr.lo dll.lo env.lo gdl.lo htb.lo lda.lo fio.lo \
fma.lo fmt.lo fs.lo fs-err.lo fs-move.lo main.lo mbwc.lo \
mbwc-chr.lo mbwc-str.lo mem.lo oht.lo opt.lo path-basename.lo \
mbwc-str.lo mem.lo oht.lo opt.lo path-basename.lo \
path-canon.lo pio.lo pma.lo rbt.lo rex.lo sio.lo sll.lo \
stdio.lo str-beg.lo str-cat.lo str-chr.lo str-cnv.lo \
slmb.lo stdio.lo str-beg.lo str-cat.lo str-chr.lo str-cnv.lo \
str-cmp.lo str-cpy.lo str-del.lo str-dup.lo str-dynm.lo \
str-dynw.lo str-end.lo str-excl.lo str-fcpy.lo str-fnmat.lo \
str-incl.lo str-len.lo str-pac.lo str-pbrk.lo str-put.lo \
@ -308,7 +308,6 @@ libqsecmn_la_SOURCES = \
fs-move.c \
main.c \
mbwc.c \
mbwc-chr.c \
mbwc-str.c \
mem.c \
oht.c \
@ -321,6 +320,7 @@ libqsecmn_la_SOURCES = \
rex.c \
sio.c \
sll.c \
slmb.c \
stdio.c \
str-beg.c \
str-cat.c \
@ -466,7 +466,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htb.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lda.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbwc-chr.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbwc-str.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbwc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mem.Plo@am__quote@
@ -480,6 +479,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rex.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sio.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sll.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slmb.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stdio.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-beg.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-cat.Plo@am__quote@

View File

@ -352,9 +352,9 @@ int qse_env_insertm (
qse_wchar_t* namedup, * valuedup;
int n;
namedup = qse_mbstowcsdup (name, env->mmgr);
namedup = qse_mbstowcsdup (name, env->mmgr); /* TODO: ignroe mbwcerr */
if (namedup == QSE_NULL) return -1;
valuedup = qse_mbstowcsdup (value, env->mmgr);
valuedup = qse_mbstowcsdup (value, env->mmgr); /* TODO: ignroe mbwcerr */
if (valuedup == QSE_NULL)
{
QSE_MMGR_FREE (env->mmgr, namedup);
@ -398,7 +398,7 @@ int qse_env_deletem (qse_env_t* env, const qse_mchar_t* name)
qse_wchar_t* namedup;
int n;
namedup = qse_mbstowcsdup (name, env->mmgr);
namedup = qse_mbstowcsdup (name, env->mmgr); /* TODO: ignroe mbwcerr */
if (namedup == QSE_NULL) return -1;
n = deletew (env, namedup);
@ -466,7 +466,7 @@ static qse_wchar_t* get_env (qse_env_t* env, const qse_wchar_t* name, int* free)
qse_wchar_t* dup;
qse_wchar_t* eq;
dup = qse_mbstowcsdup (*p, env->mmgr);
dup = qse_mbstowcsdup (*p, env->mmgr); /* TODO: ignroe mbwcerr */
if (dup == QSE_NULL) return QSE_NULL;
eq = qse_wcsbeg (dup, name);
@ -544,7 +544,7 @@ int qse_env_insertsysm (qse_env_t* env, const qse_mchar_t* name)
qse_wchar_t* namedup;
int ret = -1;
namedup = qse_mbstowcsdup (name, env->mmgr);
namedup = qse_mbstowcsdup (name, env->mmgr); /* TODO: ignroe mbwcerr */
if (namedup)
{
ret = qse_env_insertsysw (env, namedup);
@ -627,7 +627,7 @@ done:
qse_wchar_t* dup;
int n;
dup = qse_mbstowcsdup (*p, env->mmgr);
dup = qse_mbstowcsdup (*p, env->mmgr); /* TODO: ignroe mbwcerr */
if (dup == QSE_NULL) return -1;
n = add_envstrw (env, dup);
QSE_MMGR_FREE (env->mmgr, dup);

View File

@ -20,15 +20,11 @@
#include <qse/cmn/main.h>
#include <qse/cmn/str.h>
#include <locale.h>
#include "mem.h"
int qse_runmain (
int argc, qse_achar_t* argv[], qse_runmain_handler_t handler)
{
/* TODO: remove dependency on setlocale */
setlocale (LC_ALL, "");
#if (defined(QSE_ACHAR_IS_MCHAR) && defined(QSE_CHAR_IS_MCHAR)) || \
(defined(QSE_ACHAR_IS_WCHAR) && defined(QSE_CHAR_IS_WCHAR))
{
@ -75,8 +71,6 @@ int qse_runmainwithenv (
int argc, qse_achar_t* argv[],
qse_achar_t* envp[], qse_runmainwithenv_handler_t handler)
{
setlocale (LC_ALL, ""); /* TODO: remove dependency on setlocale */
#if (defined(QSE_ACHAR_IS_MCHAR) && defined(QSE_CHAR_IS_MCHAR)) || \
(defined(QSE_ACHAR_IS_WCHAR) && defined(QSE_CHAR_IS_WCHAR))
{

View File

@ -19,13 +19,21 @@
*/
#include <qse/cmn/mbwc.h>
#include <qse/cmn/slmb.h>
#include <qse/cmn/utf8.h>
/* TODO: there is no guarantee that slwc is a unicode charater or vice versa.
* the ctype handling functions should be made wide-character
* dependent.
*/
/* TODO: binary cmgr -> simply expands a byte to wchar and vice versa. */
static qse_cmgr_t builtin_cmgr[] =
{
{
qse_mbtowc,
qse_wctomb
qse_slmbtoslwc,
qse_slwctoslmb
},
{
@ -34,7 +42,7 @@ static qse_cmgr_t builtin_cmgr[] =
}
};
qse_cmgr_t* qse_loccmgr = &builtin_cmgr[0];
qse_cmgr_t* qse_slmbcmgr = &builtin_cmgr[0];
qse_cmgr_t* qse_utf8cmgr = &builtin_cmgr[1];
static qse_cmgr_t* dfl_cmgr = &builtin_cmgr[0];

View File

@ -1,5 +1,5 @@
/*
* $Id: chr-cnv.c 556 2011-08-31 15:43:46Z hyunghwan.chung $
* $Id$
*
Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE.
@ -18,8 +18,7 @@
License along with QSE. If not, see <http://www.gnu.org/licenses/>.
*/
#include <qse/cmn/mbwc.h>
#include <qse/cmn/utf8.h>
#include <qse/cmn/slmb.h>
#include "mem.h"
#if !defined(QSE_HAVE_CONFIG_H)
@ -39,7 +38,7 @@
# include <stdlib.h>
#endif
qse_size_t qse_mbrlen (
qse_size_t qse_slmbrlen (
const qse_mchar_t* mb, qse_size_t mbl, qse_mbstate_t* state)
{
#if defined(HAVE_MBRLEN)
@ -64,7 +63,7 @@ qse_size_t qse_mbrlen (
#endif
}
qse_size_t qse_mbrtowc (
qse_size_t qse_slmbrtoslwc (
const qse_mchar_t* mb, qse_size_t mbl,
qse_wchar_t* wc, qse_mbstate_t* state)
{
@ -86,7 +85,7 @@ qse_size_t qse_mbrtowc (
#endif
}
qse_size_t qse_wcrtomb (
qse_size_t qse_slwcrtoslmb (
qse_wchar_t wc, qse_mchar_t* mb,
qse_size_t mbl, qse_mbstate_t* state)
{
@ -132,26 +131,25 @@ qse_size_t qse_wcrtomb (
* mbrtowc(3) do produce non-initial states when interrupted in the middle
* of a character.
*/
qse_size_t qse_mblen (const qse_mchar_t* mb, qse_size_t mbl)
qse_size_t qse_slmblen (const qse_mchar_t* mb, qse_size_t mbl)
{
qse_mbstate_t state = { { 0, } };
return qse_mbrlen (mb, mbl, &state);
return qse_slmbrlen (mb, mbl, &state);
}
qse_size_t qse_mbtowc (const qse_mchar_t* mb, qse_size_t mbl, qse_wchar_t* wc)
qse_size_t qse_slmbtoslwc (const qse_mchar_t* mb, qse_size_t mbl, qse_wchar_t* wc)
{
qse_mbstate_t state = { { 0, } };
return qse_mbrtowc (mb, mbl, wc, &state);
return qse_slmbrtoslwc (mb, mbl, wc, &state);
}
qse_size_t qse_wctomb (qse_wchar_t wc, qse_mchar_t* mb, qse_size_t mbl)
qse_size_t qse_slwctoslmb (qse_wchar_t wc, qse_mchar_t* mb, qse_size_t mbl)
{
qse_mbstate_t state = { { 0, } };
return qse_wcrtomb (wc, mb, mbl, &state);
return qse_slwcrtoslmb (wc, mb, mbl, &state);
}
int qse_mbcurmax (void)
int qse_slmblenmax (void)
{
/* TODO: consider other encodings */
return (QSE_UTF8LEN_MAX > MB_CUR_MAX)? QSE_UTF8LEN_MAX: MB_CUR_MAX;
return MB_CUR_MAX;
}

View File

@ -19,7 +19,7 @@
*/
#include <qse/cmn/tio.h>
#include <qse/cmn/utf8.h>
#include <qse/cmn/mbwc.h>
#include "mem.h"
QSE_IMPLEMENT_COMMON_FUNCTIONS (tio)
@ -52,18 +52,10 @@ int qse_tio_close (qse_tio_t* tio)
int qse_tio_init (qse_tio_t* tio, qse_mmgr_t* mmgr, int flags)
{
/* TODO: set this default_cmgr differently depending on
* build options and platforms */
static qse_cmgr_t default_cmgr =
{
qse_utf8touc,
qse_uctoutf8
};
QSE_MEMSET (tio, 0, QSE_SIZEOF(*tio));
tio->mmgr = mmgr;
tio->cmgr = &default_cmgr;
tio->cmgr = qse_getdflcmgr();
/* mask off internal bits when storing the flags for safety */
tio->flags = flags & ~(QSE_TIO_DYNINBUF | QSE_TIO_DYNOUTBUF);

View File

@ -2083,6 +2083,9 @@ int tre_compile (regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
/* If in eight bit mode, compute a table of characters that can be the
first character of a match. */
tnfa->first_char = -1;
/* QSE: deleted */
/*
if (TRE_MB_CUR_MAX == 1 && !tmp_ast_l->nullable)
{
int count = 0;
@ -2125,6 +2128,8 @@ int tre_compile (regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
}
else
tnfa->firstpos_chars = NULL;
*/
/* END QSE */
p = tree->firstpos;
@ -2273,8 +2278,12 @@ void tre_free (regex_t *preg)
if (tnfa->tag_directions)
xfree(preg->mmgr,tnfa->tag_directions);
/* QSE: deleted */
/*
if (tnfa->firstpos_chars)
xfree(preg->mmgr,tnfa->firstpos_chars);
*/
/* END QSE */
if (tnfa->minimal_tags)
xfree(preg->mmgr,tnfa->minimal_tags);
xfree(preg->mmgr,tnfa);

View File

@ -166,6 +166,7 @@ tre_new_item(tre_mem_t mem, int min, int max, int *i, int *max_i,
}
#if defined(QSE_CHAR_IS_MCHAR)
/* Expands a character class to character ranges. */
static reg_errcode_t
tre_expand_ctype(tre_mem_t mem, tre_ctype_t class, tre_ast_node_t ***items,
@ -174,7 +175,9 @@ tre_expand_ctype(tre_mem_t mem, tre_ctype_t class, tre_ast_node_t ***items,
reg_errcode_t status = REG_OK;
tre_cint_t c;
int j, min = -1, max = 0;
assert(TRE_MB_CUR_MAX == 1);
/* QSE: deleted */
/*assert(TRE_MB_CUR_MAX == 1);*/
/* END QSE */
DPRINT((" expanding class to character ranges\n"));
for (j = 0; (j < 256) && (status == REG_OK); j++)
@ -198,6 +201,7 @@ tre_expand_ctype(tre_mem_t mem, tre_ctype_t class, tre_ast_node_t ***items,
status = tre_new_item(mem, min, max, i, max_i, items);
return status;
}
#endif
static int
@ -294,13 +298,20 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
if (qse_getctypebyxname (re + 2, len, &class) <= -1) status = REG_ECTYPE;
/* Optimize character classes for 8 bit character sets. */
if (status == REG_OK && TRE_MB_CUR_MAX == 1)
#if defined(QSE_CHAR_IS_MCHAR)
/* QSE: not possible to count on MB_CUR_MAX since
* this library is designed to support per-object
* or per-context character encoding using qse_cmgr_t */
/* if (status == REG_OK && TRE_MB_CUR_MAX == 1) */
/* END QSE */
if (status == REG_OK)
{
status = tre_expand_ctype(ctx->mem, class, items,
&i, &max_i, ctx->cflags);
class = (tre_ctype_t)0;
skip = 1;
}
#endif
re = endptr + 2;
}
}

View File

@ -135,10 +135,9 @@ SUBMATCH[4] = [defg]
#ifdef QSE_CHAR_IS_WCHAR
# define TRE_WCHAR
/*
# define TRE_MULTIBYTE
# define TRE_MBSTATE
*/
/*# define TRE_MULTIBYTE*/
/*# define TRE_MBSTATE*/
#endif
#define TRE_REGEX_T_FIELD value
@ -261,14 +260,16 @@ typedef qse_pma_t* tre_mem_t;
/* Define the character types and functions. */
#ifdef TRE_WCHAR
# define TRE_CHAR_MAX QSE_TYPE_MAX(qse_wchar_t)
/*
# ifdef TRE_MULTIBYTE
# define TRE_MB_CUR_MAX (qse_getmbcurmax())
# else /* !TRE_MULTIBYTE */
# else
# define TRE_MB_CUR_MAX 1
# endif /* !TRE_MULTIBYTE */
# endif
*/
#else /* !TRE_WCHAR */
# define TRE_CHAR_MAX 255
# define TRE_MB_CUR_MAX 1
/*# define TRE_MB_CUR_MAX 1*/
#endif /* !TRE_WCHAR */
#define DPRINT(msg)
@ -394,7 +395,9 @@ struct tnfa
tre_tnfa_transition_t *initial;
tre_tnfa_transition_t *final;
tre_submatch_data_t *submatch_data;
#if 0
char *firstpos_chars;
#endif
int first_char;
unsigned int num_submatches;
tre_tag_direction_t *tag_directions;