enhanced index and match

This commit is contained in:
hyung-hwan 2009-09-17 00:35:29 +00:00
parent 63c12720cb
commit ae7b0a5bdd
9 changed files with 222 additions and 37 deletions

View File

@ -16,7 +16,7 @@ The interpreter implements the language described in the book
The AWK Programming Language(http://cm.bell-labs.com/cm/cs/awkbook/) with The AWK Programming Language(http://cm.bell-labs.com/cm/cs/awkbook/) with
some extensions. some extensions.
@section awk_ext EXTENSION @section awk_ext EXTENSIONS
Some language extensions are implemented and they can be enabled by setting the Some language extensions are implemented and they can be enabled by setting the
corresponding options. corresponding options.
@ -83,13 +83,30 @@ BEGIN {
} }
@endcode @endcode
@subsectin awk_ext_fnc EXTENDED FUNCTIONS @subsection awk_ext_return RETURN
The return statement is valid in BEGIN blocks, END blocks, and pattern-action
blocks as well as in functions. The execution of a calling block is aborted
once the return statement is executed.
@subsection awk_ext_comment COMMENT
You can use the C-style comment as well as the pound comment.
@subsection awk_ext_fnc EXTENDED FUNCTIONS
index() and match() can accept the third parameter indicating the position index() and match() can accept the third parameter indicating the position
where the search should begin. The negative position enables backward search. where the search begins. A negative value indicates a position from the back.
@code @code
BEGIN { BEGIN {
xstr = "abcdefabcdefabcdef";
xsub = "abc";
xlen = length(xsub);
i = 1;
while ((i = index(xstr, xsub, i)) > 0)
{
print i, substr(xstr, i, xlen);
i += xlen;
}
} }
@endcode @endcode

View File

@ -1,5 +1,5 @@
/* /*
* $Id: str.h 287 2009-09-15 10:01:02Z hyunghwan.chung $ * $Id: str.h 289 2009-09-16 06:35:29Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -250,7 +250,7 @@ int qse_strcasecmp (const qse_char_t* s1, const qse_char_t* s2);
* represented by its beginning pointer and length. * represented by its beginning pointer and length.
* *
* For two strings to be equal, they need to have the same length and all * For two strings to be equal, they need to have the same length and all
* characters in the first string should be equal to their counterpart in the * characters in the first string must be equal to their counterpart in the
* second string. * second string.
* *
* The following code snippet compares "foo" and "FoO" case-insenstively. * The following code snippet compares "foo" and "FoO" case-insenstively.
@ -258,31 +258,84 @@ int qse_strcasecmp (const qse_char_t* s1, const qse_char_t* s2);
* qse_strxncasecmp (QSE_T("foo"), 3, QSE_T("FoO"), 3); * qse_strxncasecmp (QSE_T("foo"), 3, QSE_T("FoO"), 3);
* @endcode * @endcode
* *
* @return * @return 0 if two strings are equal,
* The qse_strxncasecmp() returns 0 if two strings are equal, a positive * a positive number if the first string is larger,
* number if the first string is larger, -1 if the second string is larger. * -1 if the second string is larger.
* *
*/ */
int qse_strxncasecmp ( int qse_strxncasecmp (
const qse_char_t* s1 /* the pointer to the first string */, const qse_char_t* s1, /**< pointer to the first string */
qse_size_t len1 /* the length of the first string */, qse_size_t len1, /**< length of the first string */
const qse_char_t* s2 /* the pointer to the second string */, const qse_char_t* s2, /**< pointer to the second string */
qse_size_t len2 /* the length of the second string */ qse_size_t len2 /**< length of the second string */
); );
qse_char_t* qse_strdup (const qse_char_t* str, qse_mmgr_t* mmgr); qse_char_t* qse_strdup (
qse_char_t* qse_strxdup ( const qse_char_t* str,
const qse_char_t* str, qse_size_t len, qse_mmgr_t* mmgr); qse_mmgr_t* mmgr
qse_char_t* qse_strxdup2 ( );
const qse_char_t* str1, qse_size_t len1,
const qse_char_t* str2, qse_size_t len2, qse_mmgr_t* mmgr); qse_char_t* qse_strxdup (
const qse_char_t* str,
qse_size_t len,
qse_mmgr_t* mmgr
);
qse_char_t* qse_strxdup2 (
const qse_char_t* str1,
qse_size_t len1,
const qse_char_t* str2,
qse_size_t len2,
qse_mmgr_t* mmgr
);
/**
* The qse_strstr() function searchs a string @a str for the first occurrence
* of a substring @a sub
* @return pointer to the first occurrence in @a str if @a sub is found,
* QSE_NULL if not.
*/
qse_char_t* qse_strstr (
const qse_char_t* str,
const qse_char_t* sub
);
qse_char_t* qse_strstr (const qse_char_t* str, const qse_char_t* sub);
qse_char_t* qse_strxstr ( qse_char_t* qse_strxstr (
const qse_char_t* str, qse_size_t size, const qse_char_t* sub); const qse_char_t* str,
qse_size_t size,
const qse_char_t* sub
);
qse_char_t* qse_strxnstr ( qse_char_t* qse_strxnstr (
const qse_char_t* str, qse_size_t strsz, const qse_char_t* str,
const qse_char_t* sub, qse_size_t subsz); qse_size_t strsz,
const qse_char_t* sub,
qse_size_t subsz
);
/**
* The qse_strstr() function searchs a string @a str for the last occurrence
* of a substring @a sub
* @return pointer to the last occurrence in @a str if @a sub is found,
* QSE_NULL if not.
*/
qse_char_t* qse_strrstr (
const qse_char_t* str,
const qse_char_t* sub
);
qse_char_t* qse_strxrstr (
const qse_char_t* str,
qse_size_t size,
const qse_char_t* sub
);
qse_char_t* qse_strxnrstr (
const qse_char_t* str,
qse_size_t strsz,
const qse_char_t* sub,
qse_size_t subsz
);
qse_char_t* qse_strchr (const qse_char_t* str, qse_cint_t c); qse_char_t* qse_strchr (const qse_char_t* str, qse_cint_t c);
qse_char_t* qse_strxchr (const qse_char_t* str, qse_size_t len, qse_cint_t c); qse_char_t* qse_strxchr (const qse_char_t* str, qse_size_t len, qse_cint_t c);

View File

@ -1,5 +1,5 @@
/* /*
* $Id: macros.h 287 2009-09-15 10:01:02Z hyunghwan.chung $ * $Id: macros.h 289 2009-09-16 06:35:29Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -45,8 +45,19 @@
# define QSE_NULL ((void*)0) # define QSE_NULL ((void*)0)
#endif #endif
/**
* The QSE_MCHAR_EOF macro defines an EOF character.
*/
#define QSE_MCHAR_EOF ((qse_mcint_t)-1) #define QSE_MCHAR_EOF ((qse_mcint_t)-1)
/**
* The QSE_WCHAR_EOF macro defines an EOF character.
*/
#define QSE_WCHAR_EOF ((qse_wcint_t)-1) #define QSE_WCHAR_EOF ((qse_wcint_t)-1)
/**
* The QSE_CHAR_EOF macro defines an EOF character.
*/
#define QSE_CHAR_EOF ((qse_cint_t)-1) #define QSE_CHAR_EOF ((qse_cint_t)-1)
/** /**

View File

@ -1,5 +1,5 @@
/* /*
* $Id: fnc.c 288 2009-09-15 14:03:15Z hyunghwan.chung $ * $Id: fnc.c 289 2009-09-16 06:35:29Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -462,7 +462,6 @@ static int fnc_index (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv); n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv);
if (n <= -1) return -1; if (n <= -1) return -1;
if (n >= 1) start = (qse_long_t)rv; if (n >= 1) start = (qse_long_t)rv;
if (start < 1) start = 1;
} }
if (a0->type == QSE_AWK_VAL_STR) if (a0->type == QSE_AWK_VAL_STR)
@ -492,9 +491,14 @@ static int fnc_index (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
} }
} }
ptr = (start > len0)? /* TODO: ignorecase... */
if (start == 0) start = 1;
else if (start < 0) start = len0 + start + 1;
ptr = (start > len0 || start <= 0)?
QSE_NULL: QSE_NULL:
qse_strxnstr (&str0[start-1], len0-start+1, str1, len1); qse_strxnstr (&str0[start-1], len0-start+1, str1, len1);
idx = (ptr == QSE_NULL)? 0: ((qse_long_t)(ptr-str0) + 1); idx = (ptr == QSE_NULL)? 0: ((qse_long_t)(ptr-str0) + 1);
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str0); if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str0);
@ -1305,10 +1309,8 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv); n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv);
if (n <= -1) return -1; if (n <= -1) return -1;
if (n >= 1) start = (qse_long_t)rv; if (n >= 1) start = (qse_long_t)rv;
if (start < 1) start = 1;
} }
if (a0->type == QSE_AWK_VAL_STR) if (a0->type == QSE_AWK_VAL_STR)
{ {
str0 = ((qse_awk_val_str_t*)a0)->ptr; str0 = ((qse_awk_val_str_t*)a0)->ptr;
@ -1356,7 +1358,10 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
if (a1->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str1); if (a1->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str1);
} }
if (start > len0) n = 0; if (start == 0) start = 1;
else if (start < 0) start = len0 + start + 1;
if (start > len0 || start <= 0) n = 0;
else else
{ {
n = QSE_AWK_MATCHREX ( n = QSE_AWK_MATCHREX (

View File

@ -1,5 +1,5 @@
/* /*
* $Id: map.c 287 2009-09-15 10:01:02Z hyunghwan.chung $ * $Id: map.c 289 2009-09-16 06:35:29Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -57,17 +57,20 @@ static int reorganize (map_t* map);
static size_t hash_key (map_t* map, const void* kptr, size_t klen) static size_t hash_key (map_t* map, const void* kptr, size_t klen)
{ {
size_t n = 0; /*size_t h = 2166136261;*/
/*size_t h = 0;*/
size_t h = 5381;
const byte_t* p = (const byte_t*)kptr; const byte_t* p = (const byte_t*)kptr;
const byte_t* bound = p + klen; const byte_t* bound = p + klen;
while (p < bound) while (p < bound)
{ {
n = n * 31 + *p++; /*h = (h * 16777619) ^ *p++;*/
p++; /*h = h * 31 + *p++;*/
h = ((h << 5) + h) + *p++;
} }
return n; return h ;
} }
static int comp_key (map_t* map, static int comp_key (map_t* map,

View File

@ -1,5 +1,5 @@
/* /*
* $Id: str_bas.c 287 2009-09-15 10:01:02Z hyunghwan.chung $ * $Id: str_bas.c 289 2009-09-16 06:35:29Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -543,6 +543,47 @@ qse_char_t* qse_strxnstr (
return QSE_NULL; return QSE_NULL;
} }
qse_char_t* qse_strrstr (const qse_char_t* str, const qse_char_t* sub)
{
return qse_strxnrstr (str, qse_strlen(str), sub, qse_strlen(sub));
}
qse_char_t* qse_strxrstr (
const qse_char_t* str, qse_size_t size, const qse_char_t* sub)
{
return qse_strxnrstr (str, size, sub, qse_strlen(sub));
}
qse_char_t* qse_strxnrstr (
const qse_char_t* str, qse_size_t strsz,
const qse_char_t* sub, qse_size_t subsz)
{
const qse_char_t* p = str + strsz;
const qse_char_t* subp = sub + subsz;
if (subsz == 0) return (qse_char_t*)p;
if (strsz < subsz) return QSE_NULL;
p = p - subsz;
while (p >= str)
{
const qse_char_t* x = p;
const qse_char_t* y = sub;
while (1)
{
if (y >= subp) return (qse_char_t*)p;
if (*x != *y) break;
x++; y++;
}
p--;
}
return QSE_NULL;
}
qse_char_t* qse_strchr (const qse_char_t* str, qse_cint_t c) qse_char_t* qse_strchr (const qse_char_t* str, qse_cint_t c)
{ {
while (*str != QSE_T('\0')) while (*str != QSE_T('\0'))

View File

@ -0,0 +1,25 @@
#
# test the third parameter(starting position) of index and match
#
BEGIN {
xstr = "abcdefabcdefabcdef";
xsub = "abc";
xlen = length(xsub);
i = 1;
while ((i = index(xstr, xsub, i)) > 0)
{
print i, substr(xstr, i, xlen);
i += xlen;
}
print "----------------";
i = 1;
while (match(xstr, xsub, i) > 0)
{
print RSTART, substr(xstr, RSTART, RLENGTH);
i = RSTART + RLENGTH;
}
}

View File

@ -1990,6 +1990,35 @@ pq...r AAA2
kbs ddd kbs ddd
dif cccc dif cccc
--------------------------------------------------------------------------------
../../cmd/awk/.libs/qseawk --newline=on -o- -f lang-038.awk </dev/stdin 2>&1
--------------------------------------------------------------------------------
BEGIN {
xstr = "abcdefabcdefabcdef";
xsub = "abc";
xlen = length (xsub);
i = 1;
while ((i = index (xstr,xsub,i) > 0))
{
print i,substr (xstr,i,xlen);
i += xlen;
}
print "----------------";
i = 1;
while ((match (xstr,xsub,i) > 0))
{
print RSTART,substr (xstr,RSTART,RLENGTH);
i = (RSTART + RLENGTH);
}
}
1 abc
7 abc
13 abc
----------------
1 abc
7 abc
13 abc
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
../../cmd/awk/.libs/qseawk -f quicksort.awk quicksort.dat </dev/stdin 2>&1 ../../cmd/awk/.libs/qseawk -f quicksort.awk quicksort.dat </dev/stdin 2>&1
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------

View File

@ -148,6 +148,7 @@ PROGS="
lang-035.awk/lang-035.dat2//--newline=on -o- -vdatafile=lang-035.dat1 -vgroupname=lang-035 lang-035.awk/lang-035.dat2//--newline=on -o- -vdatafile=lang-035.dat1 -vgroupname=lang-035
lang-036.awk/lang-036.dat//--newline=on -o- lang-036.awk/lang-036.dat//--newline=on -o-
lang-037.awk/lang-037.dat//--newline=on -o- lang-037.awk/lang-037.dat//--newline=on -o-
lang-038.awk///--newline=on -o-
quicksort.awk/quicksort.dat// quicksort.awk/quicksort.dat//
quicksort2.awk/quicksort2.dat// quicksort2.awk/quicksort2.dat//