enhanced index and match

This commit is contained in:
hyung-hwan 2009-09-17 00:35:29 +00:00
parent 63c12720cb
commit ae7b0a5bdd
9 changed files with 222 additions and 37 deletions

View File

@ -13,10 +13,10 @@ into an application written in C and/or C++. A hosting application can
- change language features supported by setting options.
The interpreter implements the language described in the book
The AWK Programming Language (http://cm.bell-labs.com/cm/cs/awkbook/) with
The AWK Programming Language(http://cm.bell-labs.com/cm/cs/awkbook/) with
some extensions.
@section awk_ext EXTENSION
@section awk_ext EXTENSIONS
Some language extensions are implemented and they can be enabled by setting the
corresponding options.
@ -83,13 +83,30 @@ BEGIN {
}
@endcode
@subsectin awk_ext_fnc EXTENDED FUNCTIONS
index() and match() can accept the third parameter indicating the position
where the search should begin. The negative position enables backward search.
@subsection awk_ext_return RETURN
The return statement is valid in BEGIN blocks, END blocks, and pattern-action
blocks as well as in functions. The execution of a calling block is aborted
once the return statement is executed.
@subsection awk_ext_comment COMMENT
You can use the C-style comment as well as the pound comment.
@subsection awk_ext_fnc EXTENDED FUNCTIONS
index() and match() can accept the third parameter indicating the position
where the search begins. A negative value indicates a position from the back.
@code
BEGIN {
xstr = "abcdefabcdefabcdef";
xsub = "abc";
xlen = length(xsub);
i = 1;
while ((i = index(xstr, xsub, i)) > 0)
{
print i, substr(xstr, i, xlen);
i += xlen;
}
}
@endcode

View File

@ -1,5 +1,5 @@
/*
* $Id: str.h 287 2009-09-15 10:01:02Z hyunghwan.chung $
* $Id: str.h 289 2009-09-16 06:35:29Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -250,7 +250,7 @@ int qse_strcasecmp (const qse_char_t* s1, const qse_char_t* s2);
* represented by its beginning pointer and length.
*
* For two strings to be equal, they need to have the same length and all
* characters in the first string should be equal to their counterpart in the
* characters in the first string must be equal to their counterpart in the
* second string.
*
* The following code snippet compares "foo" and "FoO" case-insenstively.
@ -258,31 +258,84 @@ int qse_strcasecmp (const qse_char_t* s1, const qse_char_t* s2);
* qse_strxncasecmp (QSE_T("foo"), 3, QSE_T("FoO"), 3);
* @endcode
*
* @return
* The qse_strxncasecmp() returns 0 if two strings are equal, a positive
* number if the first string is larger, -1 if the second string is larger.
* @return 0 if two strings are equal,
* a positive number if the first string is larger,
* -1 if the second string is larger.
*
*/
int qse_strxncasecmp (
const qse_char_t* s1 /* the pointer to the first string */,
qse_size_t len1 /* the length of the first string */,
const qse_char_t* s2 /* the pointer to the second string */,
qse_size_t len2 /* the length of the second string */
const qse_char_t* s1, /**< pointer to the first string */
qse_size_t len1, /**< length of the first string */
const qse_char_t* s2, /**< pointer to the second string */
qse_size_t len2 /**< length of the second string */
);
qse_char_t* qse_strdup (const qse_char_t* str, qse_mmgr_t* mmgr);
qse_char_t* qse_strxdup (
const qse_char_t* str, qse_size_t len, qse_mmgr_t* mmgr);
qse_char_t* qse_strxdup2 (
const qse_char_t* str1, qse_size_t len1,
const qse_char_t* str2, qse_size_t len2, qse_mmgr_t* mmgr);
qse_char_t* qse_strdup (
const qse_char_t* str,
qse_mmgr_t* mmgr
);
qse_char_t* qse_strxdup (
const qse_char_t* str,
qse_size_t len,
qse_mmgr_t* mmgr
);
qse_char_t* qse_strxdup2 (
const qse_char_t* str1,
qse_size_t len1,
const qse_char_t* str2,
qse_size_t len2,
qse_mmgr_t* mmgr
);
/**
* The qse_strstr() function searchs a string @a str for the first occurrence
* of a substring @a sub
* @return pointer to the first occurrence in @a str if @a sub is found,
* QSE_NULL if not.
*/
qse_char_t* qse_strstr (
const qse_char_t* str,
const qse_char_t* sub
);
qse_char_t* qse_strstr (const qse_char_t* str, const qse_char_t* sub);
qse_char_t* qse_strxstr (
const qse_char_t* str, qse_size_t size, const qse_char_t* sub);
const qse_char_t* str,
qse_size_t size,
const qse_char_t* sub
);
qse_char_t* qse_strxnstr (
const qse_char_t* str, qse_size_t strsz,
const qse_char_t* sub, qse_size_t subsz);
const qse_char_t* str,
qse_size_t strsz,
const qse_char_t* sub,
qse_size_t subsz
);
/**
* The qse_strstr() function searchs a string @a str for the last occurrence
* of a substring @a sub
* @return pointer to the last occurrence in @a str if @a sub is found,
* QSE_NULL if not.
*/
qse_char_t* qse_strrstr (
const qse_char_t* str,
const qse_char_t* sub
);
qse_char_t* qse_strxrstr (
const qse_char_t* str,
qse_size_t size,
const qse_char_t* sub
);
qse_char_t* qse_strxnrstr (
const qse_char_t* str,
qse_size_t strsz,
const qse_char_t* sub,
qse_size_t subsz
);
qse_char_t* qse_strchr (const qse_char_t* str, qse_cint_t c);
qse_char_t* qse_strxchr (const qse_char_t* str, qse_size_t len, qse_cint_t c);

View File

@ -1,5 +1,5 @@
/*
* $Id: macros.h 287 2009-09-15 10:01:02Z hyunghwan.chung $
* $Id: macros.h 289 2009-09-16 06:35:29Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -45,8 +45,19 @@
# define QSE_NULL ((void*)0)
#endif
/**
* The QSE_MCHAR_EOF macro defines an EOF character.
*/
#define QSE_MCHAR_EOF ((qse_mcint_t)-1)
/**
* The QSE_WCHAR_EOF macro defines an EOF character.
*/
#define QSE_WCHAR_EOF ((qse_wcint_t)-1)
/**
* The QSE_CHAR_EOF macro defines an EOF character.
*/
#define QSE_CHAR_EOF ((qse_cint_t)-1)
/**

View File

@ -1,5 +1,5 @@
/*
* $Id: fnc.c 288 2009-09-15 14:03:15Z hyunghwan.chung $
* $Id: fnc.c 289 2009-09-16 06:35:29Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -462,7 +462,6 @@ static int fnc_index (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv);
if (n <= -1) return -1;
if (n >= 1) start = (qse_long_t)rv;
if (start < 1) start = 1;
}
if (a0->type == QSE_AWK_VAL_STR)
@ -492,9 +491,14 @@ static int fnc_index (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
}
}
ptr = (start > len0)?
/* TODO: ignorecase... */
if (start == 0) start = 1;
else if (start < 0) start = len0 + start + 1;
ptr = (start > len0 || start <= 0)?
QSE_NULL:
qse_strxnstr (&str0[start-1], len0-start+1, str1, len1);
idx = (ptr == QSE_NULL)? 0: ((qse_long_t)(ptr-str0) + 1);
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str0);
@ -1305,10 +1309,8 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv);
if (n <= -1) return -1;
if (n >= 1) start = (qse_long_t)rv;
if (start < 1) start = 1;
}
if (a0->type == QSE_AWK_VAL_STR)
{
str0 = ((qse_awk_val_str_t*)a0)->ptr;
@ -1356,7 +1358,10 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
if (a1->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str1);
}
if (start > len0) n = 0;
if (start == 0) start = 1;
else if (start < 0) start = len0 + start + 1;
if (start > len0 || start <= 0) n = 0;
else
{
n = QSE_AWK_MATCHREX (

View File

@ -1,5 +1,5 @@
/*
* $Id: map.c 287 2009-09-15 10:01:02Z hyunghwan.chung $
* $Id: map.c 289 2009-09-16 06:35:29Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -57,17 +57,20 @@ static int reorganize (map_t* map);
static size_t hash_key (map_t* map, const void* kptr, size_t klen)
{
size_t n = 0;
/*size_t h = 2166136261;*/
/*size_t h = 0;*/
size_t h = 5381;
const byte_t* p = (const byte_t*)kptr;
const byte_t* bound = p + klen;
while (p < bound)
{
n = n * 31 + *p++;
p++;
/*h = (h * 16777619) ^ *p++;*/
/*h = h * 31 + *p++;*/
h = ((h << 5) + h) + *p++;
}
return n;
return h ;
}
static int comp_key (map_t* map,

View File

@ -1,5 +1,5 @@
/*
* $Id: str_bas.c 287 2009-09-15 10:01:02Z hyunghwan.chung $
* $Id: str_bas.c 289 2009-09-16 06:35:29Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -543,6 +543,47 @@ qse_char_t* qse_strxnstr (
return QSE_NULL;
}
qse_char_t* qse_strrstr (const qse_char_t* str, const qse_char_t* sub)
{
return qse_strxnrstr (str, qse_strlen(str), sub, qse_strlen(sub));
}
qse_char_t* qse_strxrstr (
const qse_char_t* str, qse_size_t size, const qse_char_t* sub)
{
return qse_strxnrstr (str, size, sub, qse_strlen(sub));
}
qse_char_t* qse_strxnrstr (
const qse_char_t* str, qse_size_t strsz,
const qse_char_t* sub, qse_size_t subsz)
{
const qse_char_t* p = str + strsz;
const qse_char_t* subp = sub + subsz;
if (subsz == 0) return (qse_char_t*)p;
if (strsz < subsz) return QSE_NULL;
p = p - subsz;
while (p >= str)
{
const qse_char_t* x = p;
const qse_char_t* y = sub;
while (1)
{
if (y >= subp) return (qse_char_t*)p;
if (*x != *y) break;
x++; y++;
}
p--;
}
return QSE_NULL;
}
qse_char_t* qse_strchr (const qse_char_t* str, qse_cint_t c)
{
while (*str != QSE_T('\0'))

View File

@ -0,0 +1,25 @@
#
# test the third parameter(starting position) of index and match
#
BEGIN {
xstr = "abcdefabcdefabcdef";
xsub = "abc";
xlen = length(xsub);
i = 1;
while ((i = index(xstr, xsub, i)) > 0)
{
print i, substr(xstr, i, xlen);
i += xlen;
}
print "----------------";
i = 1;
while (match(xstr, xsub, i) > 0)
{
print RSTART, substr(xstr, RSTART, RLENGTH);
i = RSTART + RLENGTH;
}
}

View File

@ -1990,6 +1990,35 @@ pq...r AAA2
kbs ddd
dif cccc
--------------------------------------------------------------------------------
../../cmd/awk/.libs/qseawk --newline=on -o- -f lang-038.awk </dev/stdin 2>&1
--------------------------------------------------------------------------------
BEGIN {
xstr = "abcdefabcdefabcdef";
xsub = "abc";
xlen = length (xsub);
i = 1;
while ((i = index (xstr,xsub,i) > 0))
{
print i,substr (xstr,i,xlen);
i += xlen;
}
print "----------------";
i = 1;
while ((match (xstr,xsub,i) > 0))
{
print RSTART,substr (xstr,RSTART,RLENGTH);
i = (RSTART + RLENGTH);
}
}
1 abc
7 abc
13 abc
----------------
1 abc
7 abc
13 abc
--------------------------------------------------------------------------------
../../cmd/awk/.libs/qseawk -f quicksort.awk quicksort.dat </dev/stdin 2>&1
--------------------------------------------------------------------------------

View File

@ -148,6 +148,7 @@ PROGS="
lang-035.awk/lang-035.dat2//--newline=on -o- -vdatafile=lang-035.dat1 -vgroupname=lang-035
lang-036.awk/lang-036.dat//--newline=on -o-
lang-037.awk/lang-037.dat//--newline=on -o-
lang-038.awk///--newline=on -o-
quicksort.awk/quicksort.dat//
quicksort2.awk/quicksort2.dat//