diff --git a/qse/doc/page/awk.doc b/qse/doc/page/awk.doc index 46f2e596..8f457334 100644 --- a/qse/doc/page/awk.doc +++ b/qse/doc/page/awk.doc @@ -13,10 +13,10 @@ into an application written in C and/or C++. A hosting application can - change language features supported by setting options. The interpreter implements the language described in the book -The AWK Programming Language (http://cm.bell-labs.com/cm/cs/awkbook/) with +The AWK Programming Language(http://cm.bell-labs.com/cm/cs/awkbook/) with some extensions. -@section awk_ext EXTENSION +@section awk_ext EXTENSIONS Some language extensions are implemented and they can be enabled by setting the corresponding options. @@ -83,13 +83,30 @@ BEGIN { } @endcode -@subsectin awk_ext_fnc EXTENDED FUNCTIONS -index() and match() can accept the third parameter indicating the position -where the search should begin. The negative position enables backward search. +@subsection awk_ext_return RETURN +The return statement is valid in BEGIN blocks, END blocks, and pattern-action +blocks as well as in functions. The execution of a calling block is aborted +once the return statement is executed. + +@subsection awk_ext_comment COMMENT +You can use the C-style comment as well as the pound comment. + +@subsection awk_ext_fnc EXTENDED FUNCTIONS +index() and match() can accept the third parameter indicating the position +where the search begins. A negative value indicates a position from the back. @code BEGIN { + xstr = "abcdefabcdefabcdef"; + xsub = "abc"; + xlen = length(xsub); + i = 1; + while ((i = index(xstr, xsub, i)) > 0) + { + print i, substr(xstr, i, xlen); + i += xlen; + } } @endcode diff --git a/qse/include/qse/cmn/str.h b/qse/include/qse/cmn/str.h index 1e87010c..9766acca 100644 --- a/qse/include/qse/cmn/str.h +++ b/qse/include/qse/cmn/str.h @@ -1,5 +1,5 @@ /* - * $Id: str.h 287 2009-09-15 10:01:02Z hyunghwan.chung $ + * $Id: str.h 289 2009-09-16 06:35:29Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -250,7 +250,7 @@ int qse_strcasecmp (const qse_char_t* s1, const qse_char_t* s2); * represented by its beginning pointer and length. * * For two strings to be equal, they need to have the same length and all - * characters in the first string should be equal to their counterpart in the + * characters in the first string must be equal to their counterpart in the * second string. * * The following code snippet compares "foo" and "FoO" case-insenstively. @@ -258,31 +258,84 @@ int qse_strcasecmp (const qse_char_t* s1, const qse_char_t* s2); * qse_strxncasecmp (QSE_T("foo"), 3, QSE_T("FoO"), 3); * @endcode * - * @return - * The qse_strxncasecmp() returns 0 if two strings are equal, a positive - * number if the first string is larger, -1 if the second string is larger. + * @return 0 if two strings are equal, + * a positive number if the first string is larger, + * -1 if the second string is larger. * */ int qse_strxncasecmp ( - const qse_char_t* s1 /* the pointer to the first string */, - qse_size_t len1 /* the length of the first string */, - const qse_char_t* s2 /* the pointer to the second string */, - qse_size_t len2 /* the length of the second string */ + const qse_char_t* s1, /**< pointer to the first string */ + qse_size_t len1, /**< length of the first string */ + const qse_char_t* s2, /**< pointer to the second string */ + qse_size_t len2 /**< length of the second string */ ); -qse_char_t* qse_strdup (const qse_char_t* str, qse_mmgr_t* mmgr); -qse_char_t* qse_strxdup ( - const qse_char_t* str, qse_size_t len, qse_mmgr_t* mmgr); -qse_char_t* qse_strxdup2 ( - const qse_char_t* str1, qse_size_t len1, - const qse_char_t* str2, qse_size_t len2, qse_mmgr_t* mmgr); +qse_char_t* qse_strdup ( + const qse_char_t* str, + qse_mmgr_t* mmgr +); + +qse_char_t* qse_strxdup ( + const qse_char_t* str, + qse_size_t len, + qse_mmgr_t* mmgr +); + +qse_char_t* qse_strxdup2 ( + const qse_char_t* str1, + qse_size_t len1, + const qse_char_t* str2, + qse_size_t len2, + qse_mmgr_t* mmgr +); + +/** + * The qse_strstr() function searchs a string @a str for the first occurrence + * of a substring @a sub + * @return pointer to the first occurrence in @a str if @a sub is found, + * QSE_NULL if not. + */ +qse_char_t* qse_strstr ( + const qse_char_t* str, + const qse_char_t* sub +); -qse_char_t* qse_strstr (const qse_char_t* str, const qse_char_t* sub); qse_char_t* qse_strxstr ( - const qse_char_t* str, qse_size_t size, const qse_char_t* sub); + const qse_char_t* str, + qse_size_t size, + const qse_char_t* sub +); + qse_char_t* qse_strxnstr ( - const qse_char_t* str, qse_size_t strsz, - const qse_char_t* sub, qse_size_t subsz); + const qse_char_t* str, + qse_size_t strsz, + const qse_char_t* sub, + qse_size_t subsz +); + +/** + * The qse_strstr() function searchs a string @a str for the last occurrence + * of a substring @a sub + * @return pointer to the last occurrence in @a str if @a sub is found, + * QSE_NULL if not. + */ +qse_char_t* qse_strrstr ( + const qse_char_t* str, + const qse_char_t* sub +); + +qse_char_t* qse_strxrstr ( + const qse_char_t* str, + qse_size_t size, + const qse_char_t* sub +); + +qse_char_t* qse_strxnrstr ( + const qse_char_t* str, + qse_size_t strsz, + const qse_char_t* sub, + qse_size_t subsz +); qse_char_t* qse_strchr (const qse_char_t* str, qse_cint_t c); qse_char_t* qse_strxchr (const qse_char_t* str, qse_size_t len, qse_cint_t c); diff --git a/qse/include/qse/macros.h b/qse/include/qse/macros.h index edfffe2a..584a0097 100644 --- a/qse/include/qse/macros.h +++ b/qse/include/qse/macros.h @@ -1,5 +1,5 @@ /* - * $Id: macros.h 287 2009-09-15 10:01:02Z hyunghwan.chung $ + * $Id: macros.h 289 2009-09-16 06:35:29Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -45,8 +45,19 @@ # define QSE_NULL ((void*)0) #endif +/** + * The QSE_MCHAR_EOF macro defines an EOF character. + */ #define QSE_MCHAR_EOF ((qse_mcint_t)-1) + +/** + * The QSE_WCHAR_EOF macro defines an EOF character. + */ #define QSE_WCHAR_EOF ((qse_wcint_t)-1) + +/** + * The QSE_CHAR_EOF macro defines an EOF character. + */ #define QSE_CHAR_EOF ((qse_cint_t)-1) /** diff --git a/qse/lib/awk/fnc.c b/qse/lib/awk/fnc.c index 373c1b5d..a2799d53 100644 --- a/qse/lib/awk/fnc.c +++ b/qse/lib/awk/fnc.c @@ -1,5 +1,5 @@ /* - * $Id: fnc.c 288 2009-09-15 14:03:15Z hyunghwan.chung $ + * $Id: fnc.c 289 2009-09-16 06:35:29Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -462,7 +462,6 @@ static int fnc_index (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm) n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv); if (n <= -1) return -1; if (n >= 1) start = (qse_long_t)rv; - if (start < 1) start = 1; } if (a0->type == QSE_AWK_VAL_STR) @@ -492,9 +491,14 @@ static int fnc_index (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm) } } - ptr = (start > len0)? +/* TODO: ignorecase... */ + if (start == 0) start = 1; + else if (start < 0) start = len0 + start + 1; + + ptr = (start > len0 || start <= 0)? QSE_NULL: qse_strxnstr (&str0[start-1], len0-start+1, str1, len1); + idx = (ptr == QSE_NULL)? 0: ((qse_long_t)(ptr-str0) + 1); if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str0); @@ -1305,10 +1309,8 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm) n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv); if (n <= -1) return -1; if (n >= 1) start = (qse_long_t)rv; - if (start < 1) start = 1; } - if (a0->type == QSE_AWK_VAL_STR) { str0 = ((qse_awk_val_str_t*)a0)->ptr; @@ -1356,7 +1358,10 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm) if (a1->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str1); } - if (start > len0) n = 0; + if (start == 0) start = 1; + else if (start < 0) start = len0 + start + 1; + + if (start > len0 || start <= 0) n = 0; else { n = QSE_AWK_MATCHREX ( diff --git a/qse/lib/cmn/map.c b/qse/lib/cmn/map.c index dd98c961..7473b13a 100644 --- a/qse/lib/cmn/map.c +++ b/qse/lib/cmn/map.c @@ -1,5 +1,5 @@ /* - * $Id: map.c 287 2009-09-15 10:01:02Z hyunghwan.chung $ + * $Id: map.c 289 2009-09-16 06:35:29Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -57,17 +57,20 @@ static int reorganize (map_t* map); static size_t hash_key (map_t* map, const void* kptr, size_t klen) { - size_t n = 0; + /*size_t h = 2166136261;*/ + /*size_t h = 0;*/ + size_t h = 5381; const byte_t* p = (const byte_t*)kptr; const byte_t* bound = p + klen; while (p < bound) { - n = n * 31 + *p++; - p++; + /*h = (h * 16777619) ^ *p++;*/ + /*h = h * 31 + *p++;*/ + h = ((h << 5) + h) + *p++; } - return n; + return h ; } static int comp_key (map_t* map, diff --git a/qse/lib/cmn/str_bas.c b/qse/lib/cmn/str_bas.c index d4cb84ab..e8163827 100644 --- a/qse/lib/cmn/str_bas.c +++ b/qse/lib/cmn/str_bas.c @@ -1,5 +1,5 @@ /* - * $Id: str_bas.c 287 2009-09-15 10:01:02Z hyunghwan.chung $ + * $Id: str_bas.c 289 2009-09-16 06:35:29Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -543,6 +543,47 @@ qse_char_t* qse_strxnstr ( return QSE_NULL; } +qse_char_t* qse_strrstr (const qse_char_t* str, const qse_char_t* sub) +{ + return qse_strxnrstr (str, qse_strlen(str), sub, qse_strlen(sub)); +} + +qse_char_t* qse_strxrstr ( + const qse_char_t* str, qse_size_t size, const qse_char_t* sub) +{ + return qse_strxnrstr (str, size, sub, qse_strlen(sub)); +} + +qse_char_t* qse_strxnrstr ( + const qse_char_t* str, qse_size_t strsz, + const qse_char_t* sub, qse_size_t subsz) +{ + const qse_char_t* p = str + strsz; + const qse_char_t* subp = sub + subsz; + + if (subsz == 0) return (qse_char_t*)p; + if (strsz < subsz) return QSE_NULL; + + p = p - subsz; + + while (p >= str) + { + const qse_char_t* x = p; + const qse_char_t* y = sub; + + while (1) + { + if (y >= subp) return (qse_char_t*)p; + if (*x != *y) break; + x++; y++; + } + + p--; + } + + return QSE_NULL; +} + qse_char_t* qse_strchr (const qse_char_t* str, qse_cint_t c) { while (*str != QSE_T('\0')) diff --git a/qse/regress/awk/lang-038.awk b/qse/regress/awk/lang-038.awk new file mode 100644 index 00000000..53f80313 --- /dev/null +++ b/qse/regress/awk/lang-038.awk @@ -0,0 +1,25 @@ +# +# test the third parameter(starting position) of index and match +# + +BEGIN { + xstr = "abcdefabcdefabcdef"; + xsub = "abc"; + xlen = length(xsub); + + i = 1; + while ((i = index(xstr, xsub, i)) > 0) + { + print i, substr(xstr, i, xlen); + i += xlen; + } + + print "----------------"; + + i = 1; + while (match(xstr, xsub, i) > 0) + { + print RSTART, substr(xstr, RSTART, RLENGTH); + i = RSTART + RLENGTH; + } +} diff --git a/qse/regress/awk/regress.out b/qse/regress/awk/regress.out index 30f1b6f8..f882c8d1 100644 --- a/qse/regress/awk/regress.out +++ b/qse/regress/awk/regress.out @@ -1990,6 +1990,35 @@ pq...r AAA2 kbs ddd dif cccc +-------------------------------------------------------------------------------- + ../../cmd/awk/.libs/qseawk --newline=on -o- -f lang-038.awk &1 +-------------------------------------------------------------------------------- +BEGIN { + xstr = "abcdefabcdefabcdef"; + xsub = "abc"; + xlen = length (xsub); + i = 1; + while ((i = index (xstr,xsub,i) > 0)) + { + print i,substr (xstr,i,xlen); + i += xlen; + } + print "----------------"; + i = 1; + while ((match (xstr,xsub,i) > 0)) + { + print RSTART,substr (xstr,RSTART,RLENGTH); + i = (RSTART + RLENGTH); + } +} + +1 abc +7 abc +13 abc +---------------- +1 abc +7 abc +13 abc -------------------------------------------------------------------------------- ../../cmd/awk/.libs/qseawk -f quicksort.awk quicksort.dat &1 -------------------------------------------------------------------------------- diff --git a/qse/regress/awk/regress.sh b/qse/regress/awk/regress.sh index 2ac2e173..d2b5855e 100755 --- a/qse/regress/awk/regress.sh +++ b/qse/regress/awk/regress.sh @@ -148,6 +148,7 @@ PROGS=" lang-035.awk/lang-035.dat2//--newline=on -o- -vdatafile=lang-035.dat1 -vgroupname=lang-035 lang-036.awk/lang-036.dat//--newline=on -o- lang-037.awk/lang-037.dat//--newline=on -o- + lang-038.awk///--newline=on -o- quicksort.awk/quicksort.dat// quicksort2.awk/quicksort2.dat//