enhanced index and match
This commit is contained in:
parent
63c12720cb
commit
ae7b0a5bdd
@ -13,10 +13,10 @@ into an application written in C and/or C++. A hosting application can
|
||||
- change language features supported by setting options.
|
||||
|
||||
The interpreter implements the language described in the book
|
||||
The AWK Programming Language (http://cm.bell-labs.com/cm/cs/awkbook/) with
|
||||
The AWK Programming Language(http://cm.bell-labs.com/cm/cs/awkbook/) with
|
||||
some extensions.
|
||||
|
||||
@section awk_ext EXTENSION
|
||||
@section awk_ext EXTENSIONS
|
||||
Some language extensions are implemented and they can be enabled by setting the
|
||||
corresponding options.
|
||||
|
||||
@ -83,13 +83,30 @@ BEGIN {
|
||||
}
|
||||
@endcode
|
||||
|
||||
@subsectin awk_ext_fnc EXTENDED FUNCTIONS
|
||||
index() and match() can accept the third parameter indicating the position
|
||||
where the search should begin. The negative position enables backward search.
|
||||
@subsection awk_ext_return RETURN
|
||||
The return statement is valid in BEGIN blocks, END blocks, and pattern-action
|
||||
blocks as well as in functions. The execution of a calling block is aborted
|
||||
once the return statement is executed.
|
||||
|
||||
@subsection awk_ext_comment COMMENT
|
||||
You can use the C-style comment as well as the pound comment.
|
||||
|
||||
@subsection awk_ext_fnc EXTENDED FUNCTIONS
|
||||
index() and match() can accept the third parameter indicating the position
|
||||
where the search begins. A negative value indicates a position from the back.
|
||||
|
||||
@code
|
||||
BEGIN {
|
||||
xstr = "abcdefabcdefabcdef";
|
||||
xsub = "abc";
|
||||
xlen = length(xsub);
|
||||
|
||||
i = 1;
|
||||
while ((i = index(xstr, xsub, i)) > 0)
|
||||
{
|
||||
print i, substr(xstr, i, xlen);
|
||||
i += xlen;
|
||||
}
|
||||
}
|
||||
@endcode
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: str.h 287 2009-09-15 10:01:02Z hyunghwan.chung $
|
||||
* $Id: str.h 289 2009-09-16 06:35:29Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -250,7 +250,7 @@ int qse_strcasecmp (const qse_char_t* s1, const qse_char_t* s2);
|
||||
* represented by its beginning pointer and length.
|
||||
*
|
||||
* For two strings to be equal, they need to have the same length and all
|
||||
* characters in the first string should be equal to their counterpart in the
|
||||
* characters in the first string must be equal to their counterpart in the
|
||||
* second string.
|
||||
*
|
||||
* The following code snippet compares "foo" and "FoO" case-insenstively.
|
||||
@ -258,31 +258,84 @@ int qse_strcasecmp (const qse_char_t* s1, const qse_char_t* s2);
|
||||
* qse_strxncasecmp (QSE_T("foo"), 3, QSE_T("FoO"), 3);
|
||||
* @endcode
|
||||
*
|
||||
* @return
|
||||
* The qse_strxncasecmp() returns 0 if two strings are equal, a positive
|
||||
* number if the first string is larger, -1 if the second string is larger.
|
||||
* @return 0 if two strings are equal,
|
||||
* a positive number if the first string is larger,
|
||||
* -1 if the second string is larger.
|
||||
*
|
||||
*/
|
||||
int qse_strxncasecmp (
|
||||
const qse_char_t* s1 /* the pointer to the first string */,
|
||||
qse_size_t len1 /* the length of the first string */,
|
||||
const qse_char_t* s2 /* the pointer to the second string */,
|
||||
qse_size_t len2 /* the length of the second string */
|
||||
const qse_char_t* s1, /**< pointer to the first string */
|
||||
qse_size_t len1, /**< length of the first string */
|
||||
const qse_char_t* s2, /**< pointer to the second string */
|
||||
qse_size_t len2 /**< length of the second string */
|
||||
);
|
||||
|
||||
qse_char_t* qse_strdup (const qse_char_t* str, qse_mmgr_t* mmgr);
|
||||
qse_char_t* qse_strxdup (
|
||||
const qse_char_t* str, qse_size_t len, qse_mmgr_t* mmgr);
|
||||
qse_char_t* qse_strxdup2 (
|
||||
const qse_char_t* str1, qse_size_t len1,
|
||||
const qse_char_t* str2, qse_size_t len2, qse_mmgr_t* mmgr);
|
||||
qse_char_t* qse_strdup (
|
||||
const qse_char_t* str,
|
||||
qse_mmgr_t* mmgr
|
||||
);
|
||||
|
||||
qse_char_t* qse_strxdup (
|
||||
const qse_char_t* str,
|
||||
qse_size_t len,
|
||||
qse_mmgr_t* mmgr
|
||||
);
|
||||
|
||||
qse_char_t* qse_strxdup2 (
|
||||
const qse_char_t* str1,
|
||||
qse_size_t len1,
|
||||
const qse_char_t* str2,
|
||||
qse_size_t len2,
|
||||
qse_mmgr_t* mmgr
|
||||
);
|
||||
|
||||
/**
|
||||
* The qse_strstr() function searchs a string @a str for the first occurrence
|
||||
* of a substring @a sub
|
||||
* @return pointer to the first occurrence in @a str if @a sub is found,
|
||||
* QSE_NULL if not.
|
||||
*/
|
||||
qse_char_t* qse_strstr (
|
||||
const qse_char_t* str,
|
||||
const qse_char_t* sub
|
||||
);
|
||||
|
||||
qse_char_t* qse_strstr (const qse_char_t* str, const qse_char_t* sub);
|
||||
qse_char_t* qse_strxstr (
|
||||
const qse_char_t* str, qse_size_t size, const qse_char_t* sub);
|
||||
const qse_char_t* str,
|
||||
qse_size_t size,
|
||||
const qse_char_t* sub
|
||||
);
|
||||
|
||||
qse_char_t* qse_strxnstr (
|
||||
const qse_char_t* str, qse_size_t strsz,
|
||||
const qse_char_t* sub, qse_size_t subsz);
|
||||
const qse_char_t* str,
|
||||
qse_size_t strsz,
|
||||
const qse_char_t* sub,
|
||||
qse_size_t subsz
|
||||
);
|
||||
|
||||
/**
|
||||
* The qse_strstr() function searchs a string @a str for the last occurrence
|
||||
* of a substring @a sub
|
||||
* @return pointer to the last occurrence in @a str if @a sub is found,
|
||||
* QSE_NULL if not.
|
||||
*/
|
||||
qse_char_t* qse_strrstr (
|
||||
const qse_char_t* str,
|
||||
const qse_char_t* sub
|
||||
);
|
||||
|
||||
qse_char_t* qse_strxrstr (
|
||||
const qse_char_t* str,
|
||||
qse_size_t size,
|
||||
const qse_char_t* sub
|
||||
);
|
||||
|
||||
qse_char_t* qse_strxnrstr (
|
||||
const qse_char_t* str,
|
||||
qse_size_t strsz,
|
||||
const qse_char_t* sub,
|
||||
qse_size_t subsz
|
||||
);
|
||||
|
||||
qse_char_t* qse_strchr (const qse_char_t* str, qse_cint_t c);
|
||||
qse_char_t* qse_strxchr (const qse_char_t* str, qse_size_t len, qse_cint_t c);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: macros.h 287 2009-09-15 10:01:02Z hyunghwan.chung $
|
||||
* $Id: macros.h 289 2009-09-16 06:35:29Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -45,8 +45,19 @@
|
||||
# define QSE_NULL ((void*)0)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* The QSE_MCHAR_EOF macro defines an EOF character.
|
||||
*/
|
||||
#define QSE_MCHAR_EOF ((qse_mcint_t)-1)
|
||||
|
||||
/**
|
||||
* The QSE_WCHAR_EOF macro defines an EOF character.
|
||||
*/
|
||||
#define QSE_WCHAR_EOF ((qse_wcint_t)-1)
|
||||
|
||||
/**
|
||||
* The QSE_CHAR_EOF macro defines an EOF character.
|
||||
*/
|
||||
#define QSE_CHAR_EOF ((qse_cint_t)-1)
|
||||
|
||||
/**
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: fnc.c 288 2009-09-15 14:03:15Z hyunghwan.chung $
|
||||
* $Id: fnc.c 289 2009-09-16 06:35:29Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -462,7 +462,6 @@ static int fnc_index (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
|
||||
n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv);
|
||||
if (n <= -1) return -1;
|
||||
if (n >= 1) start = (qse_long_t)rv;
|
||||
if (start < 1) start = 1;
|
||||
}
|
||||
|
||||
if (a0->type == QSE_AWK_VAL_STR)
|
||||
@ -492,9 +491,14 @@ static int fnc_index (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
|
||||
}
|
||||
}
|
||||
|
||||
ptr = (start > len0)?
|
||||
/* TODO: ignorecase... */
|
||||
if (start == 0) start = 1;
|
||||
else if (start < 0) start = len0 + start + 1;
|
||||
|
||||
ptr = (start > len0 || start <= 0)?
|
||||
QSE_NULL:
|
||||
qse_strxnstr (&str0[start-1], len0-start+1, str1, len1);
|
||||
|
||||
idx = (ptr == QSE_NULL)? 0: ((qse_long_t)(ptr-str0) + 1);
|
||||
|
||||
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str0);
|
||||
@ -1305,10 +1309,8 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
|
||||
n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv);
|
||||
if (n <= -1) return -1;
|
||||
if (n >= 1) start = (qse_long_t)rv;
|
||||
if (start < 1) start = 1;
|
||||
}
|
||||
|
||||
|
||||
if (a0->type == QSE_AWK_VAL_STR)
|
||||
{
|
||||
str0 = ((qse_awk_val_str_t*)a0)->ptr;
|
||||
@ -1356,7 +1358,10 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
|
||||
if (a1->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str1);
|
||||
}
|
||||
|
||||
if (start > len0) n = 0;
|
||||
if (start == 0) start = 1;
|
||||
else if (start < 0) start = len0 + start + 1;
|
||||
|
||||
if (start > len0 || start <= 0) n = 0;
|
||||
else
|
||||
{
|
||||
n = QSE_AWK_MATCHREX (
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: map.c 287 2009-09-15 10:01:02Z hyunghwan.chung $
|
||||
* $Id: map.c 289 2009-09-16 06:35:29Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -57,17 +57,20 @@ static int reorganize (map_t* map);
|
||||
|
||||
static size_t hash_key (map_t* map, const void* kptr, size_t klen)
|
||||
{
|
||||
size_t n = 0;
|
||||
/*size_t h = 2166136261;*/
|
||||
/*size_t h = 0;*/
|
||||
size_t h = 5381;
|
||||
const byte_t* p = (const byte_t*)kptr;
|
||||
const byte_t* bound = p + klen;
|
||||
|
||||
while (p < bound)
|
||||
{
|
||||
n = n * 31 + *p++;
|
||||
p++;
|
||||
/*h = (h * 16777619) ^ *p++;*/
|
||||
/*h = h * 31 + *p++;*/
|
||||
h = ((h << 5) + h) + *p++;
|
||||
}
|
||||
|
||||
return n;
|
||||
return h ;
|
||||
}
|
||||
|
||||
static int comp_key (map_t* map,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: str_bas.c 287 2009-09-15 10:01:02Z hyunghwan.chung $
|
||||
* $Id: str_bas.c 289 2009-09-16 06:35:29Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -543,6 +543,47 @@ qse_char_t* qse_strxnstr (
|
||||
return QSE_NULL;
|
||||
}
|
||||
|
||||
qse_char_t* qse_strrstr (const qse_char_t* str, const qse_char_t* sub)
|
||||
{
|
||||
return qse_strxnrstr (str, qse_strlen(str), sub, qse_strlen(sub));
|
||||
}
|
||||
|
||||
qse_char_t* qse_strxrstr (
|
||||
const qse_char_t* str, qse_size_t size, const qse_char_t* sub)
|
||||
{
|
||||
return qse_strxnrstr (str, size, sub, qse_strlen(sub));
|
||||
}
|
||||
|
||||
qse_char_t* qse_strxnrstr (
|
||||
const qse_char_t* str, qse_size_t strsz,
|
||||
const qse_char_t* sub, qse_size_t subsz)
|
||||
{
|
||||
const qse_char_t* p = str + strsz;
|
||||
const qse_char_t* subp = sub + subsz;
|
||||
|
||||
if (subsz == 0) return (qse_char_t*)p;
|
||||
if (strsz < subsz) return QSE_NULL;
|
||||
|
||||
p = p - subsz;
|
||||
|
||||
while (p >= str)
|
||||
{
|
||||
const qse_char_t* x = p;
|
||||
const qse_char_t* y = sub;
|
||||
|
||||
while (1)
|
||||
{
|
||||
if (y >= subp) return (qse_char_t*)p;
|
||||
if (*x != *y) break;
|
||||
x++; y++;
|
||||
}
|
||||
|
||||
p--;
|
||||
}
|
||||
|
||||
return QSE_NULL;
|
||||
}
|
||||
|
||||
qse_char_t* qse_strchr (const qse_char_t* str, qse_cint_t c)
|
||||
{
|
||||
while (*str != QSE_T('\0'))
|
||||
|
25
qse/regress/awk/lang-038.awk
Normal file
25
qse/regress/awk/lang-038.awk
Normal file
@ -0,0 +1,25 @@
|
||||
#
|
||||
# test the third parameter(starting position) of index and match
|
||||
#
|
||||
|
||||
BEGIN {
|
||||
xstr = "abcdefabcdefabcdef";
|
||||
xsub = "abc";
|
||||
xlen = length(xsub);
|
||||
|
||||
i = 1;
|
||||
while ((i = index(xstr, xsub, i)) > 0)
|
||||
{
|
||||
print i, substr(xstr, i, xlen);
|
||||
i += xlen;
|
||||
}
|
||||
|
||||
print "----------------";
|
||||
|
||||
i = 1;
|
||||
while (match(xstr, xsub, i) > 0)
|
||||
{
|
||||
print RSTART, substr(xstr, RSTART, RLENGTH);
|
||||
i = RSTART + RLENGTH;
|
||||
}
|
||||
}
|
@ -1990,6 +1990,35 @@ pq...r AAA2
|
||||
|
||||
kbs ddd
|
||||
dif cccc
|
||||
--------------------------------------------------------------------------------
|
||||
../../cmd/awk/.libs/qseawk --newline=on -o- -f lang-038.awk </dev/stdin 2>&1
|
||||
--------------------------------------------------------------------------------
|
||||
BEGIN {
|
||||
xstr = "abcdefabcdefabcdef";
|
||||
xsub = "abc";
|
||||
xlen = length (xsub);
|
||||
i = 1;
|
||||
while ((i = index (xstr,xsub,i) > 0))
|
||||
{
|
||||
print i,substr (xstr,i,xlen);
|
||||
i += xlen;
|
||||
}
|
||||
print "----------------";
|
||||
i = 1;
|
||||
while ((match (xstr,xsub,i) > 0))
|
||||
{
|
||||
print RSTART,substr (xstr,RSTART,RLENGTH);
|
||||
i = (RSTART + RLENGTH);
|
||||
}
|
||||
}
|
||||
|
||||
1 abc
|
||||
7 abc
|
||||
13 abc
|
||||
----------------
|
||||
1 abc
|
||||
7 abc
|
||||
13 abc
|
||||
--------------------------------------------------------------------------------
|
||||
../../cmd/awk/.libs/qseawk -f quicksort.awk quicksort.dat </dev/stdin 2>&1
|
||||
--------------------------------------------------------------------------------
|
||||
|
@ -148,6 +148,7 @@ PROGS="
|
||||
lang-035.awk/lang-035.dat2//--newline=on -o- -vdatafile=lang-035.dat1 -vgroupname=lang-035
|
||||
lang-036.awk/lang-036.dat//--newline=on -o-
|
||||
lang-037.awk/lang-037.dat//--newline=on -o-
|
||||
lang-038.awk///--newline=on -o-
|
||||
|
||||
quicksort.awk/quicksort.dat//
|
||||
quicksort2.awk/quicksort2.dat//
|
||||
|
Loading…
Reference in New Issue
Block a user