enhanced lda with binary heap functions

added more wide character handling functions
This commit is contained in:
2010-04-06 06:50:01 +00:00
parent aaa1097128
commit 2a045b7ff0
22 changed files with 846 additions and 458 deletions

View File

@ -1,5 +1,5 @@
/*
* $Id: parse.c 312 2009-12-10 13:03:54Z hyunghwan.chung $
* $Id: parse.c 323 2010-04-05 12:50:01Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -226,8 +226,6 @@ static int skip_spaces (qse_awk_t* awk);
static int skip_comment (qse_awk_t* awk);
static int classify_ident (
qse_awk_t* awk, const qse_char_t* name, qse_size_t len);
static int is_plain_var (qse_awk_nde_t* nde);
static int is_var (qse_awk_nde_t* nde);
static int deparse (qse_awk_t* awk);
static qse_map_walk_t deparse_func (
@ -400,6 +398,26 @@ static global_t gtab[] =
#define SETERR_ARG(awk,code,ep,el) SETERR_ARG_LOC(awk,code,ep,el,QSE_NULL)
static QSE_INLINE int is_plain_var (qse_awk_nde_t* nde)
{
return nde->type == QSE_AWK_NDE_GBL ||
nde->type == QSE_AWK_NDE_LCL ||
nde->type == QSE_AWK_NDE_ARG ||
nde->type == QSE_AWK_NDE_NAMED;
}
static QSE_INLINE int is_var (qse_awk_nde_t* nde)
{
return nde->type == QSE_AWK_NDE_GBL ||
nde->type == QSE_AWK_NDE_LCL ||
nde->type == QSE_AWK_NDE_ARG ||
nde->type == QSE_AWK_NDE_NAMED ||
nde->type == QSE_AWK_NDE_GBLIDX ||
nde->type == QSE_AWK_NDE_LCLIDX ||
nde->type == QSE_AWK_NDE_ARGIDX ||
nde->type == QSE_AWK_NDE_NAMEDIDX;
}
static int get_char (qse_awk_t* awk)
{
qse_ssize_t n;
@ -5762,26 +5780,6 @@ static int classify_ident (
return TOK_IDENT;
}
static int is_plain_var (qse_awk_nde_t* nde)
{
return nde->type == QSE_AWK_NDE_GBL ||
nde->type == QSE_AWK_NDE_LCL ||
nde->type == QSE_AWK_NDE_ARG ||
nde->type == QSE_AWK_NDE_NAMED;
}
static int is_var (qse_awk_nde_t* nde)
{
return nde->type == QSE_AWK_NDE_GBL ||
nde->type == QSE_AWK_NDE_LCL ||
nde->type == QSE_AWK_NDE_ARG ||
nde->type == QSE_AWK_NDE_NAMED ||
nde->type == QSE_AWK_NDE_GBLIDX ||
nde->type == QSE_AWK_NDE_LCLIDX ||
nde->type == QSE_AWK_NDE_ARGIDX ||
nde->type == QSE_AWK_NDE_NAMEDIDX;
}
struct deparse_func_t
{
qse_awk_t* awk;

View File

@ -1,5 +1,5 @@
/*
* $Id: run.c 312 2009-12-10 13:03:54Z hyunghwan.chung $
* $Id: run.c 323 2010-04-05 12:50:01Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -273,17 +273,17 @@ static qse_cstr_t* xstr_to_cstr (qse_xstr_t* xstr)
}
#endif
qse_size_t qse_awk_rtx_getnargs (qse_awk_rtx_t* run)
QSE_INLINE qse_size_t qse_awk_rtx_getnargs (qse_awk_rtx_t* run)
{
return (qse_size_t) STACK_NARGS (run);
}
qse_awk_val_t* qse_awk_rtx_getarg (qse_awk_rtx_t* run, qse_size_t idx)
QSE_INLINE qse_awk_val_t* qse_awk_rtx_getarg (qse_awk_rtx_t* run, qse_size_t idx)
{
return STACK_ARG (run, idx);
}
qse_awk_val_t* qse_awk_rtx_getgbl (qse_awk_rtx_t* run, int id)
QSE_INLINE qse_awk_val_t* qse_awk_rtx_getgbl (qse_awk_rtx_t* run, int id)
{
QSE_ASSERT (id >= 0 && id < (int)QSE_LDA_SIZE(run->awk->parse.gbls));
return STACK_GBL (run, id);
@ -618,7 +618,7 @@ static int set_global (
return 0;
}
void qse_awk_rtx_setretval (qse_awk_rtx_t* rtx, qse_awk_val_t* val)
QSE_INLINE void qse_awk_rtx_setretval (qse_awk_rtx_t* rtx, qse_awk_val_t* val)
{
qse_awk_rtx_refdownval (rtx, STACK_RETVAL(rtx));
STACK_RETVAL(rtx) = val;
@ -626,7 +626,7 @@ void qse_awk_rtx_setretval (qse_awk_rtx_t* rtx, qse_awk_val_t* val)
qse_awk_rtx_refupval (rtx, val);
}
int qse_awk_rtx_setgbl (qse_awk_rtx_t* rtx, int id, qse_awk_val_t* val)
QSE_INLINE int qse_awk_rtx_setgbl (qse_awk_rtx_t* rtx, int id, qse_awk_val_t* val)
{
QSE_ASSERT (id >= 0 && id < (int)QSE_LDA_SIZE(rtx->awk->parse.gbls));
return set_global (rtx, (qse_size_t)id, QSE_NULL, val);

View File

@ -1,5 +1,5 @@
/*
* $Id: chr_cnv.c 287 2009-09-15 10:01:02Z hyunghwan.chung $
* $Id: chr_cnv.c 323 2010-04-05 12:50:01Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -41,6 +41,13 @@ qse_size_t qse_mblen (const qse_mchar_t* mb, qse_size_t mblen)
if (n == (size_t)-2) return mblen + 1; /* incomplete sequence */
return (qse_size_t)n;
#if 0
n = mblen (mb, mblen, &mbs);
if (n == 0) return 1; /* a null character */
if (n == (size_t)-1) return 0; /* invalid or incomplete sequence */
return (qse_size_t)n;
#endif
#else
#error #### NOT SUPPORTED ####
#endif

View File

@ -1,5 +1,5 @@
/*
* $Id: fio.c 287 2009-09-15 10:01:02Z hyunghwan.chung $
* $Id: fio.c 323 2010-04-05 12:50:01Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -213,16 +213,16 @@ qse_fio_t* qse_fio_init (
qse_tio_t* tio;
tio = qse_tio_open (fio->mmgr, 0);
if (tio == QSE_NULL) QSE_ERR_THROW (tio);
if (tio == QSE_NULL) QSE_THROW_ERR (tio);
if (qse_tio_attachin (tio, fio_input, fio) == -1 ||
qse_tio_attachout (tio, fio_output, fio) == -1)
{
qse_tio_close (tio);
QSE_ERR_THROW (tio);
QSE_THROW_ERR (tio);
}
QSE_ERR_CATCH (tio)
QSE_CATCH_ERR (tio)
{
#ifdef _WIN32
CloseHandle (handle);

View File

@ -1,5 +1,5 @@
/*
* $Id: lda.c 287 2009-09-15 10:01:02Z hyunghwan.chung $
* $Id: lda.c 323 2010-04-05 12:50:01Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -39,7 +39,7 @@ QSE_IMPLEMENT_COMMON_FUNCTIONS (lda)
#define DPTR(node) ((node)->dptr)
#define DLEN(node) ((node)->dlen)
static int comp_data (lda_t* lda,
static int default_comparator (lda_t* lda,
const void* dptr1, size_t dlen1,
const void* dptr2, size_t dlen2)
{
@ -58,7 +58,7 @@ static int comp_data (lda_t* lda,
return n;
}
static node_t* alloc_node (lda_t* lda, void* dptr, size_t dlen)
static QSE_INLINE node_t* alloc_node (lda_t* lda, void* dptr, size_t dlen)
{
node_t* n;
@ -136,7 +136,7 @@ lda_t* qse_lda_init (lda_t* lda, mmgr_t* mmgr, size_t capa)
lda->node = QSE_NULL;
lda->copier = QSE_LDA_COPIER_SIMPLE;
lda->comper = comp_data;
lda->comper = default_comparator;
if (qse_lda_setcapa (lda, capa) == QSE_NULL) return QSE_NULL;
return lda;
@ -162,7 +162,8 @@ int qse_lda_getscale (lda_t* lda)
void qse_lda_setscale (lda_t* lda, int scale)
{
QSE_ASSERTX (scale > 0 && scale <= QSE_TYPE_MAX(qse_byte_t),
"The scale should be larger than 0 and less than or equal to the maximum value that the qse_byte_t type can hold");
"The scale should be larger than 0 and less than or "
"equal to the maximum value that the qse_byte_t type can hold");
if (scale <= 0) scale = 1;
if (scale > QSE_TYPE_MAX(qse_byte_t)) scale = QSE_TYPE_MAX(qse_byte_t);
@ -198,7 +199,7 @@ comper_t qse_lda_getcomper (lda_t* lda)
void qse_lda_setcomper (lda_t* lda, comper_t comper)
{
if (comper == QSE_NULL) comper = comp_data;
if (comper == QSE_NULL) comper = default_comparator;
lda->comper = comper;
}
@ -236,8 +237,11 @@ lda_t* qse_lda_setcapa (lda_t* lda, size_t capa)
{
void* tmp;
if (capa == lda->capa) return lda;
if (lda->size > capa)
{
/* to trigger freeers on the items truncated */
qse_lda_delete (lda, capa, lda->size - capa);
QSE_ASSERT (lda->size <= capa);
}
@ -246,13 +250,14 @@ lda_t* qse_lda_setcapa (lda_t* lda, size_t capa)
{
if (lda->mmgr->realloc != QSE_NULL && lda->node != QSE_NULL)
{
tmp = (qse_lda_node_t**)QSE_MMGR_REALLOC (
lda->mmgr, lda->node, QSE_SIZEOF(*lda->node)*capa);
tmp = (node_t**) QSE_MMGR_REALLOC (
lda->mmgr, lda->node,
QSE_SIZEOF(*lda->node)*capa);
if (tmp == QSE_NULL) return QSE_NULL;
}
else
{
tmp = (qse_lda_node_t**) QSE_MMGR_ALLOC (
tmp = (node_t**) QSE_MMGR_ALLOC (
lda->mmgr, QSE_SIZEOF(*lda->node)*capa);
if (tmp == QSE_NULL) return QSE_NULL;
@ -261,7 +266,7 @@ lda_t* qse_lda_setcapa (lda_t* lda, size_t capa)
size_t x;
x = (capa > lda->capa)? lda->capa: capa;
QSE_MEMCPY (tmp, lda->node,
QSE_SIZEOF(*lda->node) * x);
QSE_SIZEOF(*lda->node)*x);
QSE_MMGR_FREE (lda->mmgr, lda->node);
}
}
@ -514,6 +519,8 @@ void qse_lda_walk (lda_t* lda, walker_t walker, void* arg)
qse_lda_walk_t w = QSE_LDA_WALK_FORWARD;
size_t i = 0;
if (lda->size <= 0) return;
while (1)
{
if (lda->node[i] != QSE_NULL)
@ -562,3 +569,108 @@ void qse_lda_rwalk (lda_t* lda, walker_t walker, void* arg)
}
}
size_t qse_lda_pushstack (lda_t* lda, void* dptr, size_t dlen)
{
return qse_lda_insert (lda, lda->size, dptr, dlen);
}
void qse_lda_popstack (lda_t* lda)
{
QSE_ASSERT (lda->size > 0);
qse_lda_delete (lda, lda->size - 1, 1);
}
#define HEAP_PARENT(x) (((x)-1) / 2)
#define HEAP_LEFT(x) ((x)*2 + 1)
#define HEAP_RIGHT(x) ((x)*2 + 2)
size_t qse_lda_pushheap (lda_t* lda, void* dptr, size_t dlen)
{
size_t cur, par;
int n;
/* add a value to the bottom */
cur = lda->size;
if (qse_lda_insert (lda, cur, dptr, dlen) == QSE_LDA_NIL)
return QSE_LDA_NIL;
while (cur != 0)
{
node_t* tmp;
/* compare with the parent */
par = HEAP_PARENT(cur);
n = lda->comper (lda,
DPTR(lda->node[cur]), DLEN(lda->node[cur]),
DPTR(lda->node[par]), DLEN(lda->node[par]));
if (n <= 0) break; /* ok */
/* swap the current with the parent */
tmp = lda->node[cur];
lda->node[cur] = lda->node[par];
lda->node[par] = tmp;
cur = par;
}
return lda->size;
}
void qse_lda_popheap (lda_t* lda)
{
size_t cur, child;
node_t* tmp;
QSE_ASSERT (lda->size > 0);
/* destroy the top */
tmp = lda->node[0];
if (lda->freeer) lda->freeer (lda, DPTR(tmp), DLEN(tmp));
QSE_MMGR_FREE (lda->mmgr, tmp);
/* move the last item to the top position also shrink the size */
lda->node[0] = lda->node[--lda->size];
if (lda->size <= 1) return; /* only 1 element. nothing further to do */
for (cur = 0; cur < lda->size; cur = child)
{
size_t left, right;
int n;
left = HEAP_LEFT(cur);
right = HEAP_RIGHT(cur);
if (left >= lda->size)
{
/* the left child does not exist.
* reached the bottom. abort exchange */
break;
}
if (right >= lda->size)
{
/* the right child does not exist. only the left */
child = left;
}
else
{
/* get the larger child of the two */
n = lda->comper (lda,
DPTR(lda->node[left]), DLEN(lda->node[left]),
DPTR(lda->node[right]), DLEN(lda->node[right]));
child = (n > 0)? left: right;
}
/* compare the current one with the child */
n = lda->comper (lda,
DPTR(lda->node[cur]), DLEN(lda->node[cur]),
DPTR(lda->node[child]), DLEN(lda->node[child]));
if (n > 0) break; /* current one is larger. stop exchange */
/* swap the current with the child */
tmp = lda->node[cur];
lda->node[cur] = lda->node[child];
lda->node[child] = tmp;
}
}

View File

@ -19,10 +19,9 @@
*/
#include <qse/cmn/main.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <qse/cmn/str.h>
#include <locale.h>
#include "mem.h"
#if defined(_WIN32) && !defined(__MINGW32__)
@ -37,62 +36,62 @@ int qse_runmain (int argc, qse_achar_t* argv[], int(*mf) (int,qse_char_t*[]))
{
int i, ret;
qse_char_t** v;
qse_mmgr_t* mmgr = QSE_MMGR_GETDFL ();
setlocale (LC_ALL, "");
v = (qse_char_t**) malloc (argc * QSE_SIZEOF(qse_char_t*));
if (v == NULL) return -1;
v = (qse_char_t**) QSE_MMGR_ALLOC (
mmgr, argc * QSE_SIZEOF(qse_char_t*));
if (v == QSE_NULL) return -1;
for (i = 0; i < argc; i++) v[i] = QSE_NULL;
for (i = 0; i < argc; i++) v[i] = NULL;
for (i = 0; i < argc; i++)
{
qse_size_t n, len, rem;
char* p = argv[i];
qse_size_t n, len, nlen;
qse_size_t mbslen;
len = 0; rem = strlen (p);
while (*p != '\0')
mbslen = qse_mbslen (argv[i]);
n = qse_mbstowcslen (argv[i], &len);
if (n < mbslen)
{
int x = mblen (p, rem);
if (x == -1)
{
ret = -1;
goto exit_main;
}
if (x == 0) break;
p += x; rem -= x; len++;
ret = -1; goto oops;
}
#if (defined(vms) || defined(__vms)) && (QSE_SIZEOF_VOID_P >= 8)
v[i] = (qse_char_t*) _malloc32 ((len+1)*QSE_SIZEOF(qse_char_t));
#else
v[i] = (qse_char_t*) malloc ((len+1)*QSE_SIZEOF(qse_char_t));
#endif
if (v[i] == NULL)
len++; /* include the terminating null */
v[i] = (qse_char_t*) QSE_MMGR_ALLOC (
mmgr, len*QSE_SIZEOF(qse_char_t));
if (v[i] == QSE_NULL)
{
ret = -1;
goto exit_main;
ret = -1; goto oops;
}
n = mbstowcs (v[i], argv[i], len);
if (n == (size_t)-1)
nlen = len;
n = qse_mbstowcs (argv[i], v[i], &nlen);
if (nlen >= len)
{
/* error */
return -1;
/* no null-termination */
ret = -1; goto oops;
}
if (argv[i][n] != '\0')
{
/* partial processing */
ret = -1; goto oops;
}
if (n == len) v[i][len] = QSE_T('\0');
}
/* TODO: envp... */
//ret = mf (argc, v, NULL);
//ret = mf (argc, v, QSE_NULL);
ret = mf (argc, v);
exit_main:
oops:
for (i = 0; i < argc; i++)
{
if (v[i] != NULL) free (v[i]);
if (v[i] != QSE_NULL) QSE_MMGR_FREE (mmgr, v[i]);
}
free (v);
QSE_MMGR_FREE (mmgr, v);
return ret;
}

View File

@ -1,5 +1,5 @@
/*
* $Id: str_bas.c 320 2009-12-21 12:29:52Z hyunghwan.chung $
* $Id: str_bas.c 323 2010-04-05 12:50:01Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -36,6 +36,20 @@ qse_size_t qse_strbytes (const qse_char_t* str)
return (p - str) * QSE_SIZEOF(qse_char_t);
}
qse_size_t qse_mbslen (const qse_mchar_t* mbs)
{
const qse_mchar_t* p = mbs;
while (*p != QSE_T('\0')) p++;
return p - mbs;
}
qse_size_t qse_wcslen (const qse_wchar_t* wcs)
{
const qse_wchar_t* p = wcs;
while (*p != QSE_T('\0')) p++;
return p - wcs;
}
qse_size_t qse_strcpy (qse_char_t* buf, const qse_char_t* str)
{
qse_char_t* org = buf;

View File

@ -1,5 +1,5 @@
/*
* $Id: str_cnv.c 287 2009-09-15 10:01:02Z hyunghwan.chung $
* $Id: str_cnv.c 323 2010-04-05 12:50:01Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -133,6 +133,45 @@ qse_ulong_t qse_strxtoulong (const qse_char_t* str, qse_size_t len)
return v;
}
qse_size_t qse_mbstowcslen (const qse_mchar_t* mcs, qse_size_t* wcslen)
{
qse_wchar_t wc;
qse_size_t n, ml, wl = 0;
const qse_mchar_t* p = mcs;
while (*p != '\0') p++;
ml = p - mcs;
for (p = mcs; ml > 0; p += n, ml -= n)
{
n = qse_mbtowc (p, ml, &wc);
/* insufficient input or wrong sequence */
if (n == 0 || n > ml) break;
wl++;
}
if (wcslen) *wcslen = wl;
return p - mcs;
}
qse_size_t qse_mbsntowcsnlen (const qse_mchar_t* mcs, qse_size_t mcslen, qse_size_t* wcslen)
{
qse_wchar_t wc;
qse_size_t n, ml = mcslen, wl = 0;
const qse_mchar_t* p = mcs;
for (p = mcs; ml > 0; p += n, ml -= n)
{
n = qse_mbtowc (p, ml, &wc);
/* insufficient or invalid sequence */
if (n == 0 || n > ml) break;
wl++;
}
if (wcslen) *wcslen = wl;
return mcslen - ml;
}
qse_size_t qse_mbstowcs (
const qse_mchar_t* mbs, qse_wchar_t* wcs, qse_size_t* wcslen)
{
@ -141,24 +180,26 @@ qse_size_t qse_mbstowcs (
/* get the length of mbs and pass it to qse_mbsntowcsn as
* qse_mbtowc called by qse_mbsntowcsn needs it. */
wlen = *wcslen;
if (wlen <= 0)
{
/* buffer too small. also cannot null-terminate it */
*wcslen = 0;
return 0; /* 0 byte processed */
}
for (mp = mbs; *mp != '\0'; mp++);
if (*wcslen <= 0)
{
/* buffer too small. cannot null-terminate it */
return 0;
}
if (*wcslen == 1)
{
wcs[0] = L'\0';
return 0;
}
wlen = *wcslen - 1;
mlen = qse_mbsntowcsn (mbs, mp - mbs, wcs, &wlen);
if (wlen < *wcslen)
{
/* null-terminate wcs if it is large enough. */
wcs[wlen] = L'\0';
}
wcs[wlen] = L'\0';
*wcslen = wlen;
/* if null-terminated properly, the input wcslen must be less than
* the output wcslen. (input length includs the terminating null
* while the output length excludes the terminating null) */
*wcslen = wlen;
return mlen;
}
@ -305,6 +346,29 @@ qse_size_t qse_wcsntombsn (
return p - wcs;
}
int qse_mbstowcs_strict (
const qse_mchar_t* mbs, qse_wchar_t* wcs, qse_size_t wcslen)
{
qse_size_t n;
qse_size_t wn = wcslen;
n = qse_mbstowcs (mbs, wcs, &wn);
if (wn >= wcslen)
{
/* wcs not big enough to be null-terminated.
* if it has been null-terminated properly,
* wn should be less than wcslen. */
return -1;
}
if (mbs[n] != QSE_MT('\0'))
{
/* incomplete sequence or invalid sequence */
return -1;
}
return 0;
}
int qse_wcstombs_strict (
const qse_wchar_t* wcs, qse_mchar_t* mbs, qse_size_t mbslen)
{
@ -312,13 +376,6 @@ int qse_wcstombs_strict (
qse_size_t mn = mbslen;
n = qse_wcstombs (wcs, mbs, &mn);
if (wcs[n] != QSE_WT('\0'))
{
/* if qse_wcstombs() processed all wide characters,
* the character at position 'n' should be a null character
* as 'n' is the number of wide characters processed. */
return -1;
}
if (mn >= mbslen)
{
/* mbs not big enough to be null-terminated.
@ -326,6 +383,14 @@ int qse_wcstombs_strict (
* mn should be less than mbslen. */
return -1;
}
if (wcs[n] != QSE_WT('\0'))
{
/* if qse_wcstombs() processed all wide characters,
* the character at position 'n' should be a null character
* as 'n' is the number of wide characters processed. */
return -1;
}
return 0;
}