enhanced string cache for awk

This commit is contained in:
hyung-hwan 2009-09-19 22:28:49 +00:00
parent ae7b0a5bdd
commit bc43362962
9 changed files with 234 additions and 167 deletions

View File

@ -804,7 +804,7 @@ HTML_ALIGN_MEMBERS = YES
# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
HTML_DYNAMIC_SECTIONS = YES
HTML_DYNAMIC_SECTIONS = NO
# If the GENERATE_DOCSET tag is set to YES, additional index files
# will be generated that can be used as input for Apple's Xcode 3

View File

@ -88,6 +88,24 @@ The return statement is valid in BEGIN blocks, END blocks, and pattern-action
blocks as well as in functions. The execution of a calling block is aborted
once the return statement is executed.
If #QSE_AWK_MAPTOVAR is on, you can return an arrayed value from a function.
@code
function getarray() {
local a;
a["one"] = 1;
a["two"] = 2;
a["three"] = 3;
return a;
}
BEGIN {
local x;
x = getarray();
for (i in x) print i, x[i];
}
@endcode
@subsection awk_ext_comment COMMENT
You can use the C-style comment as well as the pound comment.
@ -110,4 +128,19 @@ BEGIN {
}
@endcode
@subsection awk_ext_binnum BINARY NUMBER
Use 0b to begin a binary number sequence.
@code
BEGIN { print 0b1101; }
@endcode
@subsection awk_ext_unicode UNICODE ESCAPE SEQUENCE
If QSE is compiled for #QSE_CHAR_IS_WCHAR, you can use \\u and \\U in a
string to specify a character by unicode.
@code
BEGIN { print "string=>[\uB313\U0000B313]"; }
@endcode
*/

View File

@ -1,5 +1,5 @@
/*
* $Id: str.h 289 2009-09-16 06:35:29Z hyunghwan.chung $
* $Id: str.h 290 2009-09-19 04:28:49Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -32,11 +32,11 @@
* dealing with a string pointer and length.
*/
#define QSE_STR_LEN(s) ((s)->len)
#define QSE_STR_PTR(s) ((s)->ptr)
#define QSE_STR_CAPA(s) ((s)->capa)
#define QSE_STR_CHAR(s,idx) ((s)->ptr[idx])
#define QSE_STR_SIZER(s) ((s)->sizer)
#define QSE_STR_LEN(s) ((s)->len) /**< string length */
#define QSE_STR_PTR(s) ((s)->ptr) /**< string/buffer pointer */
#define QSE_STR_CAPA(s) ((s)->capa) /**< buffer capacity */
#define QSE_STR_CHAR(s,idx) ((s)->ptr[idx]) /**< character at given position */
#define QSE_STR_SIZER(s) ((s)->sizer) /**< buffer resizer function */
typedef struct qse_str_t qse_str_t;
typedef qse_size_t (*qse_str_sizer_t) (qse_str_t* data, qse_size_t hint);
@ -47,10 +47,10 @@ typedef qse_size_t (*qse_str_sizer_t) (qse_str_t* data, qse_size_t hint);
struct qse_str_t
{
QSE_DEFINE_COMMON_FIELDS (str)
qse_str_sizer_t sizer;
qse_char_t* ptr;
qse_size_t len;
qse_size_t capa;
qse_str_sizer_t sizer; /**< buffer resizer function */
qse_char_t* ptr; /**< buffer/string pointer */
qse_size_t len; /**< string length */
qse_size_t capa; /**< buffer capacity */
};
/* int qse_chartonum (qse_char_t c, int base) */
@ -291,7 +291,7 @@ qse_char_t* qse_strxdup2 (
/**
* The qse_strstr() function searchs a string @a str for the first occurrence
* of a substring @a sub
* of a substring @a sub.
* @return pointer to the first occurrence in @a str if @a sub is found,
* QSE_NULL if not.
*/
@ -313,9 +313,27 @@ qse_char_t* qse_strxnstr (
qse_size_t subsz
);
qse_char_t* qse_strcasestr (
const qse_char_t* str,
const qse_char_t* sub
);
qse_char_t* qse_strxcasestr (
const qse_char_t* str,
qse_size_t size,
const qse_char_t* sub
);
qse_char_t* qse_strxncasestr (
const qse_char_t* str,
qse_size_t strsz,
const qse_char_t* sub,
qse_size_t subsz
);
/**
* The qse_strstr() function searchs a string @a str for the last occurrence
* of a substring @a sub
* The qse_strrstr() function searchs a string @a str for the last occurrence
* of a substring @a sub.
* @return pointer to the last occurrence in @a str if @a sub is found,
* QSE_NULL if not.
*/

View File

@ -1,5 +1,5 @@
/*
* $Id: awk.h 287 2009-09-15 10:01:02Z hyunghwan.chung $
* $Id: awk.h 290 2009-09-19 04:28:49Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -42,6 +42,11 @@ typedef struct qse_awk_tree_t qse_awk_tree_t;
#include "err.h"
#include "misc.h"
#define ENABLE_FEATURE_SCACHE
#define FEATURE_SCACHE_NUM_BLOCKS 16
#define FEATURE_SCACHE_BLOCK_UNIT 16
#define FEATURE_SCACHE_BLOCK_SIZE 128
#define QSE_AWK_MAX_GBLS 9999
#define QSE_AWK_MAX_LCLS 9999
#define QSE_AWK_MAX_PARAMS 9999
@ -256,11 +261,13 @@ struct qse_awk_rtx_t
int exit_level;
qse_awk_val_ref_t* fcache[128];
/*qse_awk_val_str_t* scache32[128];
qse_awk_val_str_t* scache64[128];*/
qse_size_t fcache_count;
/*qse_size_t scache32_count;
qse_size_t scache64_count;*/
#ifdef ENABLE_FEATURE_SCACHE
qse_awk_val_str_t* scache
[FEATURE_SCACHE_NUM_BLOCKS][FEATURE_SCACHE_BLOCK_SIZE];
qse_size_t scache_count[FEATURE_SCACHE_NUM_BLOCKS];
#endif
struct
{

View File

@ -1,5 +1,5 @@
/*
* $Id: fnc.c 289 2009-09-16 06:35:29Z hyunghwan.chung $
* $Id: fnc.c 290 2009-09-19 04:28:49Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -491,12 +491,12 @@ static int fnc_index (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
}
}
/* TODO: ignorecase... */
if (start == 0) start = 1;
else if (start < 0) start = len0 + start + 1;
ptr = (start > len0 || start <= 0)?
QSE_NULL:
ptr = (start > len0 || start <= 0)? QSE_NULL:
(rtx->gbl.ignorecase)?
qse_strxncasestr (&str0[start-1], len0-start+1, str1, len1):
qse_strxnstr (&str0[start-1], len0-start+1, str1, len1);
idx = (ptr == QSE_NULL)? 0: ((qse_long_t)(ptr-str0) + 1);

View File

@ -1,5 +1,5 @@
/*
* $Id: run.c 288 2009-09-15 14:03:15Z hyunghwan.chung $
* $Id: run.c 290 2009-09-19 04:28:49Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -801,9 +801,6 @@ static int init_rtx (qse_awk_rtx_t* rtx, qse_awk_t* awk, qse_awk_rio_t* rio)
rtx->exit_level = EXIT_NONE;
rtx->fcache_count = 0;
/*rtx->scache32_count = 0;
rtx->scache64_count = 0;*/
rtx->vmgr.ichunk = QSE_NULL;
rtx->vmgr.ifree = QSE_NULL;
rtx->vmgr.rchunk = QSE_NULL;
@ -910,6 +907,7 @@ static int init_rtx (qse_awk_rtx_t* rtx, qse_awk_t* awk, qse_awk_rio_t* rio)
static void fini_rtx (qse_awk_rtx_t* rtx, int fini_globals)
{
if (rtx->pattern_range_state != QSE_NULL)
QSE_AWK_FREE (rtx->awk, rtx->pattern_range_state);
@ -1012,17 +1010,21 @@ static void fini_rtx (qse_awk_rtx_t* rtx, int fini_globals)
qse_awk_rtx_freeval (rtx, (qse_awk_val_t*)tmp, QSE_FALSE);
}
/*while (rtx->scache32_count > 0)
#ifdef ENABLE_FEATURE_SCACHE
{
qse_awk_val_str_t* tmp = rtx->scache32[--rtx->scache32_count];
qse_awk_rtx_freeval (rtx, (qse_awk_val_t*)tmp, QSE_FALSE);
int i;
for (i = 0; i < QSE_COUNTOF(rtx->scache_count); i++)
{
while (rtx->scache_count[i] > 0)
{
qse_awk_val_str_t* t =
rtx->scache[i][--rtx->scache_count[i]];
qse_awk_rtx_freeval (
rtx, (qse_awk_val_t*)t, QSE_FALSE);
}
while (rtx->scache64_count > 0)
{
qse_awk_val_str_t* tmp = rtx->scache64[--rtx->scache64_count];
qse_awk_rtx_freeval (rtx, (qse_awk_val_t*)tmp, QSE_FALSE);
}*/
}
}
#endif
qse_awk_rtx_freevalchunk (rtx, rtx->vmgr.ichunk);
qse_awk_rtx_freevalchunk (rtx, rtx->vmgr.rchunk);

View File

@ -1,5 +1,5 @@
/*
* $Id: val.c 287 2009-09-15 10:01:02Z hyunghwan.chung $
* $Id: val.c 290 2009-09-19 04:28:49Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -72,23 +72,6 @@ qse_awk_val_t* qse_awk_rtx_makeintval (qse_awk_rtx_t* rtx, qse_long_t v)
return (qse_awk_val_t*)&awk_int[v-awk_int[0].val];
}
/*
if (run->icache_count > 0)
{
val = run->icache[--run->icache_count];
}
else
{
val = (qse_awk_val_int_t*) QSE_AWK_ALLOC (
run->awk, QSE_SIZEOF(qse_awk_val_int_t));
if (val == QSE_NULL)
{
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
return QSE_NULL;
}
}
*/
if (rtx->vmgr.ifree == QSE_NULL)
{
qse_awk_val_ichunk_t* c;
@ -148,23 +131,6 @@ qse_awk_val_t* qse_awk_rtx_makerealval (qse_awk_rtx_t* rtx, qse_real_t v)
{
qse_awk_val_real_t* val;
/*
if (run->rcache_count > 0)
{
val = run->rcache[--run->rcache_count];
}
else
{
val = (qse_awk_val_real_t*) QSE_AWK_ALLOC (
run->awk, QSE_SIZEOF(qse_awk_val_real_t));
if (val == QSE_NULL)
{
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
return QSE_NULL;
}
}
*/
if (rtx->vmgr.rfree == QSE_NULL)
{
qse_awk_val_rchunk_t* c;
@ -225,27 +191,23 @@ qse_awk_val_t* qse_awk_rtx_makestrval0 (
qse_awk_val_t* qse_awk_rtx_makestrval (
qse_awk_rtx_t* rtx, const qse_char_t* str, qse_size_t len)
{
qse_awk_val_str_t* val;
qse_awk_val_str_t* val = QSE_NULL;
qse_size_t rlen = len;
/*if (rlen <= 32)
#ifdef ENABLE_FEATURE_SCACHE
qse_size_t i;
i = rlen / FEATURE_SCACHE_BLOCK_UNIT;
if (i < QSE_COUNTOF(rtx->scache_count))
{
if (run->scache32_count > 0)
rlen = (i + 1) * FEATURE_SCACHE_BLOCK_UNIT - 1;
if (rtx->scache_count[i] > 0)
{
val = run->scache32[--run->scache32_count];
val = rtx->scache[i][--rtx->scache_count[i]];
goto init;
}
rlen = 32;
}
else if (rlen <= 64)
{
if (run->scache64_count > 0)
{
val = run->scache64[--run->scache64_count];
goto init;
}
rlen = 64;
}*/
#endif
val = (qse_awk_val_str_t*) QSE_AWK_ALLOC (
rtx->awk,
@ -256,15 +218,15 @@ qse_awk_val_t* qse_awk_rtx_makestrval (
qse_awk_rtx_seterrnum (rtx, QSE_AWK_ENOMEM, QSE_NULL);
return QSE_NULL;
}
/*
#ifdef ENABLE_FEATURE_SCACHE
init:
*/
#endif
val->type = QSE_AWK_VAL_STR;
val->ref = 0;
val->nstr = 0;
val->len = len;
val->ptr = (qse_char_t*)(val + 1);
/*qse_strxncpy (val->ptr, len+1, str, len);*/
qse_strncpy (val->ptr, str, len);
#ifdef DEBUG_VAL
@ -302,24 +264,20 @@ qse_awk_val_t* qse_awk_rtx_makestrval2 (
qse_awk_val_str_t* val;
qse_size_t rlen = len1 + len2;
/*if (rlen <= 32)
#ifdef ENABLE_FEATURE_SCACHE
int i;
i = rlen / FEATURE_SCACHE_BLOCK_UNIT;
if (i < QSE_COUNTOF(rtx->scache_count))
{
if (run->scache32_count > 0)
rlen = (i + 1) * FEATURE_SCACHE_BLOCK_UNIT - 1;
if (rtx->scache_count[i] > 0)
{
val = run->scache32[--run->scache32_count];
val = rtx->scache[i][--rtx->scache_count[i]];
goto init;
}
rlen = 32;
}
else if (rlen <= 64)
{
if (run->scache64_count > 0)
{
val = run->scache64[--run->scache64_count];
goto init;
}
rlen = 64;
}*/
#endif
val = (qse_awk_val_str_t*) QSE_AWK_ALLOC (
rtx->awk,
@ -331,16 +289,14 @@ qse_awk_val_t* qse_awk_rtx_makestrval2 (
return QSE_NULL;
}
/*
#ifdef ENABLE_FEATURE_SCACHE
init:
*/
#endif
val->type = QSE_AWK_VAL_STR;
val->ref = 0;
val->nstr = 0;
val->len = len1 + len2;
val->ptr = (qse_char_t*)(val + 1);
/*qse_strxncpy (val->ptr, len1+1, str1, len1);
qse_strxncpy (val->ptr[len1], len2+1, str2, len2);*/
qse_strncpy (val->ptr, str1, len1);
qse_strncpy (&val->ptr[len1], str2, len2);
@ -413,15 +369,15 @@ qse_awk_val_t* qse_awk_rtx_makerexval (
static void free_mapval (qse_map_t* map, void* dptr, qse_size_t dlen)
{
qse_awk_rtx_t* run = *(qse_awk_rtx_t**)QSE_XTN(map);
qse_awk_rtx_t* rtx = *(qse_awk_rtx_t**)QSE_XTN(map);
#ifdef DEBUG_VAL
qse_dprintf (QSE_T("refdown in map free..."));
qse_awk_dprintval (run, dptr);
qse_awk_dprintval (rtx, dptr);
qse_dprintf (QSE_T("\n"));
#endif
qse_awk_rtx_refdownval (run, dptr);
qse_awk_rtx_refdownval (rtx, dptr);
}
static void same_mapval (qse_map_t* map, void* dptr, qse_size_t dlen)
@ -545,7 +501,8 @@ qse_bool_t qse_awk_rtx_isstaticval (qse_awk_rtx_t* rtx, qse_awk_val_t* val)
return IS_STATICVAL(val);
}
void qse_awk_rtx_freeval (qse_awk_rtx_t* rtx, qse_awk_val_t* val, qse_bool_t cache)
void qse_awk_rtx_freeval (
qse_awk_rtx_t* rtx, qse_awk_val_t* val, qse_bool_t cache)
{
if (IS_STATICVAL(val)) return;
@ -561,52 +518,36 @@ void qse_awk_rtx_freeval (qse_awk_rtx_t* rtx, qse_awk_val_t* val, qse_bool_t cac
}
else if (val->type == QSE_AWK_VAL_INT)
{
/*
if (cache && rtx->icache_count < QSE_COUNTOF(rtx->icache))
{
rtx->icache[rtx->icache_count++] =
(qse_awk_val_int_t*)val;
}
else QSE_AWK_FREE (rtx->awk, val);
*/
((qse_awk_val_int_t*)val)->nde = (qse_awk_nde_int_t*)rtx->vmgr.ifree;
((qse_awk_val_int_t*)val)->nde =
(qse_awk_nde_int_t*)rtx->vmgr.ifree;
rtx->vmgr.ifree = (qse_awk_val_int_t*)val;
}
else if (val->type == QSE_AWK_VAL_REAL)
{
/*
if (cache && rtx->rcache_count < QSE_COUNTOF(rtx->rcache))
{
rtx->rcache[rtx->rcache_count++] =
(qse_awk_val_real_t*)val;
}
else QSE_AWK_FREE (rtx->awk, val);
*/
((qse_awk_val_real_t*)val)->nde = (qse_awk_nde_real_t*)rtx->vmgr.rfree;
((qse_awk_val_real_t*)val)->nde =
(qse_awk_nde_real_t*)rtx->vmgr.rfree;
rtx->vmgr.rfree = (qse_awk_val_real_t*)val;
}
else if (val->type == QSE_AWK_VAL_STR)
{
/*
#ifdef ENABLE_FEATURE_SCACHE
if (cache)
{
qse_awk_val_str_t* v = (qse_awk_val_str_t*)val;
if (v->len <= 32 &&
rtx->scache32_count<QSE_COUNTOF(rtx->scache32))
int i;
i = v->len / FEATURE_SCACHE_BLOCK_UNIT;
if (i < QSE_COUNTOF(rtx->scache_count) &&
rtx->scache_count[i] < QSE_COUNTOF(rtx->scache[i]))
{
rtx->scache32[rtx->scache32_count++] = v;
v->nstr = 0;
}
else if (v->len <= 64 &&
rtx->scache64_count<QSE_COUNTOF(rtx->scache64))
{
rtx->scache64[rtx->scache64_count++] = v;
rtx->scache[i][rtx->scache_count[i]++] = v;
v->nstr = 0;
}
else QSE_AWK_FREE (rtx->awk, val);
}
else*/ QSE_AWK_FREE (rtx->awk, val);
else
#endif
QSE_AWK_FREE (rtx->awk, val);
}
else if (val->type == QSE_AWK_VAL_REX)
{

View File

@ -1,5 +1,5 @@
/*
* $Id: str_bas.c 289 2009-09-16 06:35:29Z hyunghwan.chung $
* $Id: str_bas.c 290 2009-09-19 04:28:49Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -543,6 +543,72 @@ qse_char_t* qse_strxnstr (
return QSE_NULL;
}
qse_char_t* qse_strcasestr (const qse_char_t* str, const qse_char_t* sub)
{
const qse_char_t* x, * y;
y = sub;
if (*y == QSE_T('\0')) return (qse_char_t*)str;
while (*str != QSE_T('\0'))
{
if (QSE_TOUPPER(*str) != QSE_TOUPPER(*y))
{
str++;
continue;
}
x = str;
while (1)
{
if (*y == QSE_T('\0')) return (qse_char_t*)str;
if (QSE_TOUPPER(*x) != QSE_TOUPPER(*y)) break;
x++; y++;
}
y = sub;
str++;
}
return QSE_NULL;
}
qse_char_t* qse_strxcasestr (
const qse_char_t* str, qse_size_t size, const qse_char_t* sub)
{
return qse_strxncasestr (str, size, sub, qse_strlen(sub));
}
qse_char_t* qse_strxncasestr (
const qse_char_t* str, qse_size_t strsz,
const qse_char_t* sub, qse_size_t subsz)
{
const qse_char_t* end, * subp;
if (subsz == 0) return (qse_char_t*)str;
if (strsz < subsz) return QSE_NULL;
end = str + strsz - subsz;
subp = sub + subsz;
while (str <= end)
{
const qse_char_t* x = str;
const qse_char_t* y = sub;
while (1)
{
if (y >= subp) return (qse_char_t*)str;
if (QSE_TOUPPER(*x) != QSE_TOUPPER(*y)) break;
x++; y++;
}
str++;
}
return QSE_NULL;
}
qse_char_t* qse_strrstr (const qse_char_t* str, const qse_char_t* sub)
{
return qse_strxnrstr (str, qse_strlen(str), sub, qse_strlen(sub));

View File

@ -1133,14 +1133,14 @@ BEGIN {
}
}
1
2
3
-------------------
4
5
6
-------------------
1
2
3
-------------------
(1,2,3) in a ==> 20
(4,5) not in a
--------------------------------------------------------------------------------
@ -2076,31 +2076,31 @@ int main ()
--------------------------------------------------------------------------------
../../cmd/awk/.libs/qseawk -f wordfreq.awk wordfreq.awk </dev/stdin 2>&1
--------------------------------------------------------------------------------
distinctions 1
list 1
0 3
1 1
end 1
punctuation 1
blank 2
remove 2
freq 3
for 2
_ 2
a 2
in 1
i 4
word 4
nf 1
of 1
gsub 2
tolower 1
alnum 2
wordfreq 1
awk 1
print 2
frequencies 1
case 1
tolower 1
i 4
freq 3
distinctions 1
frequencies 1
list 1
alnum 2
nf 1
punctuation 1
remove 2
awk 1
end 1
gsub 2
of 1
word 4
wordfreq 1
for 2
in 1
0 3
1 1
blank 2
--------------------------------------------------------------------------------
../../cmd/awk/.libs/qseawk -f hanoi.awk </dev/stdin 2>&1
--------------------------------------------------------------------------------