enhanced index and match
This commit is contained in:
parent
63c12720cb
commit
ae7b0a5bdd
@ -13,10 +13,10 @@ into an application written in C and/or C++. A hosting application can
|
|||||||
- change language features supported by setting options.
|
- change language features supported by setting options.
|
||||||
|
|
||||||
The interpreter implements the language described in the book
|
The interpreter implements the language described in the book
|
||||||
The AWK Programming Language (http://cm.bell-labs.com/cm/cs/awkbook/) with
|
The AWK Programming Language(http://cm.bell-labs.com/cm/cs/awkbook/) with
|
||||||
some extensions.
|
some extensions.
|
||||||
|
|
||||||
@section awk_ext EXTENSION
|
@section awk_ext EXTENSIONS
|
||||||
Some language extensions are implemented and they can be enabled by setting the
|
Some language extensions are implemented and they can be enabled by setting the
|
||||||
corresponding options.
|
corresponding options.
|
||||||
|
|
||||||
@ -83,13 +83,30 @@ BEGIN {
|
|||||||
}
|
}
|
||||||
@endcode
|
@endcode
|
||||||
|
|
||||||
@subsectin awk_ext_fnc EXTENDED FUNCTIONS
|
@subsection awk_ext_return RETURN
|
||||||
|
The return statement is valid in BEGIN blocks, END blocks, and pattern-action
|
||||||
|
blocks as well as in functions. The execution of a calling block is aborted
|
||||||
|
once the return statement is executed.
|
||||||
|
|
||||||
|
@subsection awk_ext_comment COMMENT
|
||||||
|
You can use the C-style comment as well as the pound comment.
|
||||||
|
|
||||||
|
@subsection awk_ext_fnc EXTENDED FUNCTIONS
|
||||||
index() and match() can accept the third parameter indicating the position
|
index() and match() can accept the third parameter indicating the position
|
||||||
where the search should begin. The negative position enables backward search.
|
where the search begins. A negative value indicates a position from the back.
|
||||||
|
|
||||||
@code
|
@code
|
||||||
BEGIN {
|
BEGIN {
|
||||||
|
xstr = "abcdefabcdefabcdef";
|
||||||
|
xsub = "abc";
|
||||||
|
xlen = length(xsub);
|
||||||
|
|
||||||
|
i = 1;
|
||||||
|
while ((i = index(xstr, xsub, i)) > 0)
|
||||||
|
{
|
||||||
|
print i, substr(xstr, i, xlen);
|
||||||
|
i += xlen;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@endcode
|
@endcode
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: str.h 287 2009-09-15 10:01:02Z hyunghwan.chung $
|
* $Id: str.h 289 2009-09-16 06:35:29Z hyunghwan.chung $
|
||||||
*
|
*
|
||||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||||
This file is part of QSE.
|
This file is part of QSE.
|
||||||
@ -250,7 +250,7 @@ int qse_strcasecmp (const qse_char_t* s1, const qse_char_t* s2);
|
|||||||
* represented by its beginning pointer and length.
|
* represented by its beginning pointer and length.
|
||||||
*
|
*
|
||||||
* For two strings to be equal, they need to have the same length and all
|
* For two strings to be equal, they need to have the same length and all
|
||||||
* characters in the first string should be equal to their counterpart in the
|
* characters in the first string must be equal to their counterpart in the
|
||||||
* second string.
|
* second string.
|
||||||
*
|
*
|
||||||
* The following code snippet compares "foo" and "FoO" case-insenstively.
|
* The following code snippet compares "foo" and "FoO" case-insenstively.
|
||||||
@ -258,31 +258,84 @@ int qse_strcasecmp (const qse_char_t* s1, const qse_char_t* s2);
|
|||||||
* qse_strxncasecmp (QSE_T("foo"), 3, QSE_T("FoO"), 3);
|
* qse_strxncasecmp (QSE_T("foo"), 3, QSE_T("FoO"), 3);
|
||||||
* @endcode
|
* @endcode
|
||||||
*
|
*
|
||||||
* @return
|
* @return 0 if two strings are equal,
|
||||||
* The qse_strxncasecmp() returns 0 if two strings are equal, a positive
|
* a positive number if the first string is larger,
|
||||||
* number if the first string is larger, -1 if the second string is larger.
|
* -1 if the second string is larger.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
int qse_strxncasecmp (
|
int qse_strxncasecmp (
|
||||||
const qse_char_t* s1 /* the pointer to the first string */,
|
const qse_char_t* s1, /**< pointer to the first string */
|
||||||
qse_size_t len1 /* the length of the first string */,
|
qse_size_t len1, /**< length of the first string */
|
||||||
const qse_char_t* s2 /* the pointer to the second string */,
|
const qse_char_t* s2, /**< pointer to the second string */
|
||||||
qse_size_t len2 /* the length of the second string */
|
qse_size_t len2 /**< length of the second string */
|
||||||
);
|
);
|
||||||
|
|
||||||
qse_char_t* qse_strdup (const qse_char_t* str, qse_mmgr_t* mmgr);
|
qse_char_t* qse_strdup (
|
||||||
qse_char_t* qse_strxdup (
|
const qse_char_t* str,
|
||||||
const qse_char_t* str, qse_size_t len, qse_mmgr_t* mmgr);
|
qse_mmgr_t* mmgr
|
||||||
qse_char_t* qse_strxdup2 (
|
);
|
||||||
const qse_char_t* str1, qse_size_t len1,
|
|
||||||
const qse_char_t* str2, qse_size_t len2, qse_mmgr_t* mmgr);
|
qse_char_t* qse_strxdup (
|
||||||
|
const qse_char_t* str,
|
||||||
|
qse_size_t len,
|
||||||
|
qse_mmgr_t* mmgr
|
||||||
|
);
|
||||||
|
|
||||||
|
qse_char_t* qse_strxdup2 (
|
||||||
|
const qse_char_t* str1,
|
||||||
|
qse_size_t len1,
|
||||||
|
const qse_char_t* str2,
|
||||||
|
qse_size_t len2,
|
||||||
|
qse_mmgr_t* mmgr
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The qse_strstr() function searchs a string @a str for the first occurrence
|
||||||
|
* of a substring @a sub
|
||||||
|
* @return pointer to the first occurrence in @a str if @a sub is found,
|
||||||
|
* QSE_NULL if not.
|
||||||
|
*/
|
||||||
|
qse_char_t* qse_strstr (
|
||||||
|
const qse_char_t* str,
|
||||||
|
const qse_char_t* sub
|
||||||
|
);
|
||||||
|
|
||||||
qse_char_t* qse_strstr (const qse_char_t* str, const qse_char_t* sub);
|
|
||||||
qse_char_t* qse_strxstr (
|
qse_char_t* qse_strxstr (
|
||||||
const qse_char_t* str, qse_size_t size, const qse_char_t* sub);
|
const qse_char_t* str,
|
||||||
|
qse_size_t size,
|
||||||
|
const qse_char_t* sub
|
||||||
|
);
|
||||||
|
|
||||||
qse_char_t* qse_strxnstr (
|
qse_char_t* qse_strxnstr (
|
||||||
const qse_char_t* str, qse_size_t strsz,
|
const qse_char_t* str,
|
||||||
const qse_char_t* sub, qse_size_t subsz);
|
qse_size_t strsz,
|
||||||
|
const qse_char_t* sub,
|
||||||
|
qse_size_t subsz
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The qse_strstr() function searchs a string @a str for the last occurrence
|
||||||
|
* of a substring @a sub
|
||||||
|
* @return pointer to the last occurrence in @a str if @a sub is found,
|
||||||
|
* QSE_NULL if not.
|
||||||
|
*/
|
||||||
|
qse_char_t* qse_strrstr (
|
||||||
|
const qse_char_t* str,
|
||||||
|
const qse_char_t* sub
|
||||||
|
);
|
||||||
|
|
||||||
|
qse_char_t* qse_strxrstr (
|
||||||
|
const qse_char_t* str,
|
||||||
|
qse_size_t size,
|
||||||
|
const qse_char_t* sub
|
||||||
|
);
|
||||||
|
|
||||||
|
qse_char_t* qse_strxnrstr (
|
||||||
|
const qse_char_t* str,
|
||||||
|
qse_size_t strsz,
|
||||||
|
const qse_char_t* sub,
|
||||||
|
qse_size_t subsz
|
||||||
|
);
|
||||||
|
|
||||||
qse_char_t* qse_strchr (const qse_char_t* str, qse_cint_t c);
|
qse_char_t* qse_strchr (const qse_char_t* str, qse_cint_t c);
|
||||||
qse_char_t* qse_strxchr (const qse_char_t* str, qse_size_t len, qse_cint_t c);
|
qse_char_t* qse_strxchr (const qse_char_t* str, qse_size_t len, qse_cint_t c);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: macros.h 287 2009-09-15 10:01:02Z hyunghwan.chung $
|
* $Id: macros.h 289 2009-09-16 06:35:29Z hyunghwan.chung $
|
||||||
*
|
*
|
||||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||||
This file is part of QSE.
|
This file is part of QSE.
|
||||||
@ -45,8 +45,19 @@
|
|||||||
# define QSE_NULL ((void*)0)
|
# define QSE_NULL ((void*)0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The QSE_MCHAR_EOF macro defines an EOF character.
|
||||||
|
*/
|
||||||
#define QSE_MCHAR_EOF ((qse_mcint_t)-1)
|
#define QSE_MCHAR_EOF ((qse_mcint_t)-1)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The QSE_WCHAR_EOF macro defines an EOF character.
|
||||||
|
*/
|
||||||
#define QSE_WCHAR_EOF ((qse_wcint_t)-1)
|
#define QSE_WCHAR_EOF ((qse_wcint_t)-1)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The QSE_CHAR_EOF macro defines an EOF character.
|
||||||
|
*/
|
||||||
#define QSE_CHAR_EOF ((qse_cint_t)-1)
|
#define QSE_CHAR_EOF ((qse_cint_t)-1)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: fnc.c 288 2009-09-15 14:03:15Z hyunghwan.chung $
|
* $Id: fnc.c 289 2009-09-16 06:35:29Z hyunghwan.chung $
|
||||||
*
|
*
|
||||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||||
This file is part of QSE.
|
This file is part of QSE.
|
||||||
@ -462,7 +462,6 @@ static int fnc_index (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
|
|||||||
n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv);
|
n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv);
|
||||||
if (n <= -1) return -1;
|
if (n <= -1) return -1;
|
||||||
if (n >= 1) start = (qse_long_t)rv;
|
if (n >= 1) start = (qse_long_t)rv;
|
||||||
if (start < 1) start = 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (a0->type == QSE_AWK_VAL_STR)
|
if (a0->type == QSE_AWK_VAL_STR)
|
||||||
@ -492,9 +491,14 @@ static int fnc_index (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ptr = (start > len0)?
|
/* TODO: ignorecase... */
|
||||||
|
if (start == 0) start = 1;
|
||||||
|
else if (start < 0) start = len0 + start + 1;
|
||||||
|
|
||||||
|
ptr = (start > len0 || start <= 0)?
|
||||||
QSE_NULL:
|
QSE_NULL:
|
||||||
qse_strxnstr (&str0[start-1], len0-start+1, str1, len1);
|
qse_strxnstr (&str0[start-1], len0-start+1, str1, len1);
|
||||||
|
|
||||||
idx = (ptr == QSE_NULL)? 0: ((qse_long_t)(ptr-str0) + 1);
|
idx = (ptr == QSE_NULL)? 0: ((qse_long_t)(ptr-str0) + 1);
|
||||||
|
|
||||||
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str0);
|
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str0);
|
||||||
@ -1305,10 +1309,8 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
|
|||||||
n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv);
|
n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv);
|
||||||
if (n <= -1) return -1;
|
if (n <= -1) return -1;
|
||||||
if (n >= 1) start = (qse_long_t)rv;
|
if (n >= 1) start = (qse_long_t)rv;
|
||||||
if (start < 1) start = 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (a0->type == QSE_AWK_VAL_STR)
|
if (a0->type == QSE_AWK_VAL_STR)
|
||||||
{
|
{
|
||||||
str0 = ((qse_awk_val_str_t*)a0)->ptr;
|
str0 = ((qse_awk_val_str_t*)a0)->ptr;
|
||||||
@ -1356,7 +1358,10 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
|
|||||||
if (a1->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str1);
|
if (a1->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (start > len0) n = 0;
|
if (start == 0) start = 1;
|
||||||
|
else if (start < 0) start = len0 + start + 1;
|
||||||
|
|
||||||
|
if (start > len0 || start <= 0) n = 0;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
n = QSE_AWK_MATCHREX (
|
n = QSE_AWK_MATCHREX (
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: map.c 287 2009-09-15 10:01:02Z hyunghwan.chung $
|
* $Id: map.c 289 2009-09-16 06:35:29Z hyunghwan.chung $
|
||||||
*
|
*
|
||||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||||
This file is part of QSE.
|
This file is part of QSE.
|
||||||
@ -57,17 +57,20 @@ static int reorganize (map_t* map);
|
|||||||
|
|
||||||
static size_t hash_key (map_t* map, const void* kptr, size_t klen)
|
static size_t hash_key (map_t* map, const void* kptr, size_t klen)
|
||||||
{
|
{
|
||||||
size_t n = 0;
|
/*size_t h = 2166136261;*/
|
||||||
|
/*size_t h = 0;*/
|
||||||
|
size_t h = 5381;
|
||||||
const byte_t* p = (const byte_t*)kptr;
|
const byte_t* p = (const byte_t*)kptr;
|
||||||
const byte_t* bound = p + klen;
|
const byte_t* bound = p + klen;
|
||||||
|
|
||||||
while (p < bound)
|
while (p < bound)
|
||||||
{
|
{
|
||||||
n = n * 31 + *p++;
|
/*h = (h * 16777619) ^ *p++;*/
|
||||||
p++;
|
/*h = h * 31 + *p++;*/
|
||||||
|
h = ((h << 5) + h) + *p++;
|
||||||
}
|
}
|
||||||
|
|
||||||
return n;
|
return h ;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int comp_key (map_t* map,
|
static int comp_key (map_t* map,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: str_bas.c 287 2009-09-15 10:01:02Z hyunghwan.chung $
|
* $Id: str_bas.c 289 2009-09-16 06:35:29Z hyunghwan.chung $
|
||||||
*
|
*
|
||||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||||
This file is part of QSE.
|
This file is part of QSE.
|
||||||
@ -543,6 +543,47 @@ qse_char_t* qse_strxnstr (
|
|||||||
return QSE_NULL;
|
return QSE_NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
qse_char_t* qse_strrstr (const qse_char_t* str, const qse_char_t* sub)
|
||||||
|
{
|
||||||
|
return qse_strxnrstr (str, qse_strlen(str), sub, qse_strlen(sub));
|
||||||
|
}
|
||||||
|
|
||||||
|
qse_char_t* qse_strxrstr (
|
||||||
|
const qse_char_t* str, qse_size_t size, const qse_char_t* sub)
|
||||||
|
{
|
||||||
|
return qse_strxnrstr (str, size, sub, qse_strlen(sub));
|
||||||
|
}
|
||||||
|
|
||||||
|
qse_char_t* qse_strxnrstr (
|
||||||
|
const qse_char_t* str, qse_size_t strsz,
|
||||||
|
const qse_char_t* sub, qse_size_t subsz)
|
||||||
|
{
|
||||||
|
const qse_char_t* p = str + strsz;
|
||||||
|
const qse_char_t* subp = sub + subsz;
|
||||||
|
|
||||||
|
if (subsz == 0) return (qse_char_t*)p;
|
||||||
|
if (strsz < subsz) return QSE_NULL;
|
||||||
|
|
||||||
|
p = p - subsz;
|
||||||
|
|
||||||
|
while (p >= str)
|
||||||
|
{
|
||||||
|
const qse_char_t* x = p;
|
||||||
|
const qse_char_t* y = sub;
|
||||||
|
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
if (y >= subp) return (qse_char_t*)p;
|
||||||
|
if (*x != *y) break;
|
||||||
|
x++; y++;
|
||||||
|
}
|
||||||
|
|
||||||
|
p--;
|
||||||
|
}
|
||||||
|
|
||||||
|
return QSE_NULL;
|
||||||
|
}
|
||||||
|
|
||||||
qse_char_t* qse_strchr (const qse_char_t* str, qse_cint_t c)
|
qse_char_t* qse_strchr (const qse_char_t* str, qse_cint_t c)
|
||||||
{
|
{
|
||||||
while (*str != QSE_T('\0'))
|
while (*str != QSE_T('\0'))
|
||||||
|
25
qse/regress/awk/lang-038.awk
Normal file
25
qse/regress/awk/lang-038.awk
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
#
|
||||||
|
# test the third parameter(starting position) of index and match
|
||||||
|
#
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
xstr = "abcdefabcdefabcdef";
|
||||||
|
xsub = "abc";
|
||||||
|
xlen = length(xsub);
|
||||||
|
|
||||||
|
i = 1;
|
||||||
|
while ((i = index(xstr, xsub, i)) > 0)
|
||||||
|
{
|
||||||
|
print i, substr(xstr, i, xlen);
|
||||||
|
i += xlen;
|
||||||
|
}
|
||||||
|
|
||||||
|
print "----------------";
|
||||||
|
|
||||||
|
i = 1;
|
||||||
|
while (match(xstr, xsub, i) > 0)
|
||||||
|
{
|
||||||
|
print RSTART, substr(xstr, RSTART, RLENGTH);
|
||||||
|
i = RSTART + RLENGTH;
|
||||||
|
}
|
||||||
|
}
|
@ -1990,6 +1990,35 @@ pq...r AAA2
|
|||||||
|
|
||||||
kbs ddd
|
kbs ddd
|
||||||
dif cccc
|
dif cccc
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
../../cmd/awk/.libs/qseawk --newline=on -o- -f lang-038.awk </dev/stdin 2>&1
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
BEGIN {
|
||||||
|
xstr = "abcdefabcdefabcdef";
|
||||||
|
xsub = "abc";
|
||||||
|
xlen = length (xsub);
|
||||||
|
i = 1;
|
||||||
|
while ((i = index (xstr,xsub,i) > 0))
|
||||||
|
{
|
||||||
|
print i,substr (xstr,i,xlen);
|
||||||
|
i += xlen;
|
||||||
|
}
|
||||||
|
print "----------------";
|
||||||
|
i = 1;
|
||||||
|
while ((match (xstr,xsub,i) > 0))
|
||||||
|
{
|
||||||
|
print RSTART,substr (xstr,RSTART,RLENGTH);
|
||||||
|
i = (RSTART + RLENGTH);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
1 abc
|
||||||
|
7 abc
|
||||||
|
13 abc
|
||||||
|
----------------
|
||||||
|
1 abc
|
||||||
|
7 abc
|
||||||
|
13 abc
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
../../cmd/awk/.libs/qseawk -f quicksort.awk quicksort.dat </dev/stdin 2>&1
|
../../cmd/awk/.libs/qseawk -f quicksort.awk quicksort.dat </dev/stdin 2>&1
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
|
@ -148,6 +148,7 @@ PROGS="
|
|||||||
lang-035.awk/lang-035.dat2//--newline=on -o- -vdatafile=lang-035.dat1 -vgroupname=lang-035
|
lang-035.awk/lang-035.dat2//--newline=on -o- -vdatafile=lang-035.dat1 -vgroupname=lang-035
|
||||||
lang-036.awk/lang-036.dat//--newline=on -o-
|
lang-036.awk/lang-036.dat//--newline=on -o-
|
||||||
lang-037.awk/lang-037.dat//--newline=on -o-
|
lang-037.awk/lang-037.dat//--newline=on -o-
|
||||||
|
lang-038.awk///--newline=on -o-
|
||||||
|
|
||||||
quicksort.awk/quicksort.dat//
|
quicksort.awk/quicksort.dat//
|
||||||
quicksort2.awk/quicksort2.dat//
|
quicksort2.awk/quicksort2.dat//
|
||||||
|
Loading…
Reference in New Issue
Block a user