diff --git a/qse/doc/page/awk-lang.md b/qse/doc/page/awk-lang.md index 791da53c..7b55d7ca 100644 --- a/qse/doc/page/awk-lang.md +++ b/qse/doc/page/awk-lang.md @@ -30,6 +30,41 @@ The following code snippet is a valid QSEAWK program that print the string } ~~~~~ +In general, QSEAWK starts executing the *BEGIN* blocks. For each input record +from an input stream, it executes the pattern-action blocks if the pattern +evaluates to true. Finally, it executes the *END* blocks. By default, each +line in the input stream is an input record. None of these blocks are +mandatory. However, a useful program needs at least 1 block to be present. + +For the following input records, +~~~~~{.txt} + abcdefgahijklmn + 1234567890 + opqrstuvwxyz +~~~~~ + +this AWK program produces +~~~~~{.awk} + BEGIN { mr=0; } + /abc|vwx/ { print $0; mr++; } + END { + print "total records: " NR; + print "matching records: " mr; + } +~~~~~ + +this output text. +~~~~~{.txt} + abcdefgahijklmn + opqrstuvwxyz + total records: 3 + matching records: 2 +~~~~~ + +The QSEAWK library provides a capability to use a use a user-defined function +as an entry point instead of executing these blocks. See \ref awk-embed for +how to change the entry point. + Comments -------- @@ -221,15 +256,15 @@ to a non-zero value. This is possible if you allow assigning a map to another non-map variable with #QSE_AWK_MAPTOVAR. In this case, a map is not deep-copied but the reference to it is copied. -@code -BEGIN { - a[10]=20; - b=a; - b[20]=40; - for (i in a) print i, a[i]; - print a===b; -} -@endcode +~~~~~{.awk} + BEGIN { + a[10]=20; + b=a; + b[20]=40; + for (i in a) print i, a[i]; + print a===b; + } +~~~~~ The === operator may be also useful when you want to indicate an error @@ -237,21 +272,21 @@ with an uninitialized variable. The following code check if the function returned a map. Since the variable 'nil' has never been assigned, its internal type is 'NIL' and -@code -function a () -{ - x[10] = 2; - return x; -} +~~~~~{.awk} + function a () + { + x[10] = 2; + return x; + } -BEGIN { - t = a(); - if (t === nil) - print "nil"; - else - print "ok"; -} -@endcode. + BEGIN { + t = a(); + if (t === nil) + print "nil"; + else + print "ok"; + } +~~~~~ The !== operator is a negated form of the === operator. @@ -287,14 +322,18 @@ stream being processed. The directive can only be used at the outmost scope where global variable declarations, *BEGIN*, *END*, and/or pattern-action blocks appear. - @include "abc.awk" - BEGIN { func_in_abc (); } +~~~~~{.awk} + @include "abc.awk" + BEGIN { func_in_abc (); } +~~~~~ A semicolon is optional after the included file name. The following is the same as the sample above. - @include "abc.awk"; - BEGIN { func_in_abc(); } +~~~~~{.awk} + @include "abc.awk"; + BEGIN { func_in_abc(); } +~~~~~ If #QSE_AWK_NEWLINE is off, the semicolon is required. @@ -330,41 +369,42 @@ of expressions separated with a comma. Each expression in the group is evaluated in the appearing order. The evaluation result of the last expression in the group is returned as that of the group. -@code -BEGIN { - c = (1, 2, 9); - a=((1*c, 3*c), (3 - c), ((k = 6+(c+1, c+2)), (-7 * c))); - print c; # 9; - print a; # -63 - print k; # 17 -} -@endcode +~~~~~{.awk} + BEGIN { + c = (1, 2, 9); + a=((1*c, 3*c), (3 - c), ((k = 6+(c+1, c+2)), (-7 * c))); + print c; # 9; + print a; # -63 + print k; # 17 + } +~~~~~ ### RETURN ### The return statement is valid in pattern-action blocks as well as in functions. The execution of a calling block is aborted once the return statement is executed. -@code -$ qseawk 'BEGIN { return 20; }' ; echo $? -20 -#endcode +~~~~~ + $ qseawk 'BEGIN { return 20; }' ; echo $? + 20 +~~~~~ If #QSE_AWK_MAPTOVAR is on, you can return an arrayed value from a function. -@code -function getarray() { - @local a; - a["one"] = 1; - a["two"] = 2; - a["three"] = 3; - return a; -} -BEGIN { - @local x; - x = getarray(); - for (i in x) print i, x[i]; -} -@endcode +~~~~~{.awk} + function getarray() { + @local a; + a["one"] = 1; + a["two"] = 2; + a["three"] = 3; + return a; + } + + BEGIN { + @local x; + x = getarray(); + for (i in x) print i, x[i]; + } +~~~~~ ### RESET ### @@ -373,14 +413,14 @@ After that, the array variable can also be used as a scalar variable again. You must have #QSE_AWK_RESET on to be able to be able to use this statement. -@code -BEGIN { - a[1] = 20; - reset a; - a = 20; # this is legal - print a; -} -@endcode +~~~~~{.awk} + BEGIN { + a[1] = 20; + reset a; + a = 20; # this is legal + print a; + } +~~~~~ ### ABORT ### The abort statment is similar to the exit statement except that @@ -551,9 +591,11 @@ For this reason, you are advised to parenthesize *getline* and its related components to avoid confusion whenever necessary. The example reading into the variable *line* can be made clearer with parenthesization. - BEGIN { - while ((getline line) > 0) print line; - } +~~~~~{.awk} + BEGIN { + while ((getline line) > 0) print line; + } +~~~~~ ### print ### **TODO** @@ -565,30 +607,34 @@ they are function calls. In this mode, they return a negative number on failure and a zero on success and any I/O failure doesn't abort a running program. - BEGIN { - a = print "hello, world" > "/dev/null"; - print a; - a = print ("hello, world") > "/dev/null"; - print a; - } +~~~~~{.awk} + BEGIN { + a = print "hello, world" > "/dev/null"; + print a; + a = print ("hello, world") > "/dev/null"; + print a; + } +~~~~~ Since print and printf are like function calls, you can use them in any context where a normal expression is allowed. For example, printf is used as a conditional expression in an 'if' statement in the sample code below. - BEGIN { - if ((printf "hello, world\n" || "tcp://127.0.0.1:9999") <= -1) - print "FAILURE"; - else - print "SUCCESS"; - } +~~~~~{.awk} + BEGIN { + if ((printf "hello, world\n" || "tcp://127.0.0.1:9999") <= -1) + print "FAILURE"; + else + print "SUCCESS"; + } +~~~~~ ### close (io-name, what) ### -The *close* function closes a stream indicated by the name *io-name*. It takes -an optional parameter *what* indicating whether input or output should be -closed. +The *close* function closes a stream indicated by the name *io-name*. +It takes an optional parameter *what* indicating whether input or output +should be closed. If *io-name* is a file, it closes the file handle associated; If *io-name* is a command, it may kill the running process from the command, @@ -597,12 +643,64 @@ If *io-name* is a network stream, it tears down connections to the network peer and closes the socket handles. The optional paramenter *what* must be one of *r* or *w* when used is useful -when *io-name* is a command invoked for the two-way operator. The value of -*r* causes the function to close the read-end of the pipe and the value of +when *io-name* is a command invoked for the two-way pipe operator. The value +of *r* causes the function to close the read-end of the pipe and the value of *w* causes the function to close the write-end of the pipe. The function returns 0 on success and -1 on failure. +Though not so useful, it is possible to create more than 1 streams of different +kinds under the same name. It is undefined which stream *close* +should close in the following program. + +~~~~~{.awk} + BEGIN { + "/tmp/x" || getline y; # rwpipe stream + print 1 | "/tmp/x"; # pipe stream + print 1 > "/tmp/x"; # file stream + close ("/tmp/x"); + } +~~~~~ + +### fflush (io-name) ### + +The *fflush* function flushes the output stream indicated by *io-name*. +If *io-name* is not specified, it flushes the open console output stream. +If *io-name* is an empty stream, it flushes all open output streams. +It returns 0 on success and -1 on failure. + +QSEAWK doesn't open the console output stream before it executes any output +commands like *print* or *printf*. so fflush() returns -1 in the following +program. + +~~~~~{.awk} + BEGIN { + fflush(); + } +~~~~~ + +The *print* command is executed before fflush() in the following program. +When fflush() is executed, the output stream is open. so fflush() returns 0. + +~~~~~{.awk} + BEGIN { + print 1; + fflush(); + } +~~~~~ + +Though not so useful, it is possible to create more than 1 output streams +of different kinds under the same name. *fflush* in the following program +flushes both the file stream and the pipe stream. + +~~~~~{.awk} + BEGIN { + print 1 | "/tmp/x"; # file stream + print 1 > "/tmp/x"; # pipe stream + fflush ("/tmp/x"); + } +~~~~~ + ### setioattr (io-name, attr-name, attr-value) ### The *setioattr* function changes the I/O attribute of the name *attr-name* to @@ -614,17 +712,19 @@ success and -1 on failure. - *attr-name* is one of *codepage*, *ctimeout*, *atimeout*, *rtimeout*, *wtimeout*. - *attr-value* varies depending on *attr-name*. - + codepage: *cp949*, *cp950*, *utf8* + + codepage: *cp949*, *cp950*, *utf8*, *slmb*, *mb8* + ctimeout, atimeout, rtimeout, wtimeout: the number of seconds. effective on socket based streams only. you may use a floating-point number for lower resoluation than a second. a negative value turns off timeout. See this sample that prints the contents of a document encoded in cp949. - BEGIN { - setioattr ("README.TXT", "codepage", "cp949"); - while ((getline x < "README.TXT") > 0) print x; - } +~~~~~{.awk} + BEGIN { + setioattr ("README.TXT", "codepage", "cp949"); + while ((getline x < "README.TXT") > 0) print x; + } +~~~~~ ### getioattr (io-name, attr-name, attr-value) ### @@ -634,12 +734,14 @@ is set to the variable referenced by *attr-value*. See *setioattr* for description on *io-name* and *attr-name*. It returns 0 on success and -1 on failure. - BEGIN { - setioattr ("README.TXT", "codepage", "cp949"); - if (getioattr ("README.TXT", "codepage", codepage) <= -1) - print "codepage unknown"; - else print "codepage: " codepage; - } +~~~~~{.awk} + BEGIN { + setioattr ("README.TXT", "codepage", "cp949"); + if (getioattr ("README.TXT", "codepage", codepage) <= -1) + print "codepage unknown"; + else print "codepage: " codepage; + } +~~~~~ ### Two-way Pipe ### @@ -649,17 +751,19 @@ must be set with #QSE_AWK_RWPIPE to be able to use the two-way pipe. The example redirects the output of *print* to the external *sort* command and reads back the output. - BEGIN { - print "15" || "sort"; - print "14" || "sort"; - print "13" || "sort"; - print "12" || "sort"; - print "11" || "sort"; - # close the input side of the pipe as 'sort' starts emitting result - # once the input is closed. - close ("sort", "r"); - while (("sort" || getline x) > 0) print x; - } +~~~~~{.awk} + BEGIN { + print "15" || "sort"; + print "14" || "sort"; + print "13" || "sort"; + print "12" || "sort"; + print "11" || "sort"; + # close the input side of the pipe as 'sort' starts emitting result + # once the input is closed. + close ("sort", "r"); + while (("sort" || getline x) > 0) print x; + } +~~~~~ This two-way pipe can create a TCP or UDP connection if the pipe command string is prefixed with one of the followings: @@ -671,100 +775,106 @@ string is prefixed with one of the followings: See this example. - BEGIN { - # it binds a TCP socket to the IPv6 address :: and the port number - # 9999 and waits for the first coming connection. It repeats writing - # "hello world" to the first connected peer and reading a line from - # it until the session is torn down. - do { - print "hello world" || "tcpd://[::]:9999"; - if (("tcpd://[::]:9999" || getline x) <= 0) break; - print x; - } - while(1); - } +~~~~~{.awk} + BEGIN { + # it binds a TCP socket to the IPv6 address :: and the port number + # 9999 and waits for the first coming connection. It repeats writing + # "hello world" to the first connected peer and reading a line from + # it until the session is torn down. + do { + print "hello world" || "tcpd://[::]:9999"; + if (("tcpd://[::]:9999" || getline x) <= 0) break; + print x; + } + while(1); + } +~~~~~ You can manipulate TCP or UDP timeouts for connection, accepting, reading, and writing with the *setioattr* function and the *getioattr* function. See the example below. - BEGIN { - setioattr ("tcp://127.0.0.1:9999", "ctimeout", 3); - setioattr ("tcp://127.0.0.1:9999", "rtimeout", 5.5); - print "hello world" || "tcp://127.0.0.1:9999"; - "tcp://127.0.0.1:9999" || getline x; - print x; - } +~~~~~{.awk} + BEGIN { + setioattr ("tcp://127.0.0.1:9999", "ctimeout", 3); + setioattr ("tcp://127.0.0.1:9999", "rtimeout", 5.5); + print "hello world" || "tcp://127.0.0.1:9999"; + "tcp://127.0.0.1:9999" || getline x; + print x; + } +~~~~~ Here is an interesting example adopting Michael Sanders' AWK web server, modified for QSEAWK. - # - # Michael Sanders' AWK web server for QSEAWK. - # Orginal code in http://awk.info/?tools/server - # - # qseawk --tolerant=on --rwpipe=on webserver.awk - # - BEGIN { - x = 1 # script exits if x < 1 - port = 8080 # port number - host = "tcpd://0.0.0.0:" port # host string - url = "http://localhost:" port # server url - status = 200 # 200 == OK - reason = "OK" # server response - RS = ORS = "\r\n" # header line terminators - doc = Setup() # html document - len = length(doc) + length(ORS) # length of document - while (x) { - if ($1 == "GET") RunApp(substr($2, 2)) - if (! x) break - print "HTTP/1.0", status, reason || host - print "Connection: Close" || host - print "Pragma: no-cache" || host - print "Content-length:", len || host - print ORS doc || host - close(host) # close client connection - host || getline # wait for new client request - } - # server terminated... - doc = Bye() - len = length(doc) + length(ORS) +~~~~~{.awk} + # + # Michael Sanders' AWK web server for QSEAWK. + # Orginal code in http://awk.info/?tools/server + # + # qseawk --tolerant=on --rwpipe=on webserver.awk + # + BEGIN { + x = 1 # script exits if x < 1 + port = 8080 # port number + host = "tcpd://0.0.0.0:" port # host string + url = "http://localhost:" port # server url + status = 200 # 200 == OK + reason = "OK" # server response + RS = ORS = "\r\n" # header line terminators + doc = Setup() # html document + len = length(doc) + length(ORS) # length of document + while (x) { + if ($1 == "GET") RunApp(substr($2, 2)) + if (! x) break print "HTTP/1.0", status, reason || host print "Connection: Close" || host print "Pragma: no-cache" || host print "Content-length:", len || host print ORS doc || host - close(host) - } - - function Setup() { - tmp = "\ - Simple gawk server\ - \ -

xterm\ -

xcalc\ -

xload\ -

terminate script\ - \ - " - return tmp - } - - function Bye() { - tmp = "\ - Simple gawk server\ -

Script Terminated...\ - " - return tmp - } - - function RunApp(app) { - if (app == "xterm") {system("xterm&"); return} - if (app == "xcalc" ) {system("xcalc&"); return} - if (app == "xload" ) {system("xload&"); return} - if (app == "exit") {x = 0} - } + close(host) # close client connection + host || getline # wait for new client request + } + # server terminated... + doc = Bye() + len = length(doc) + length(ORS) + print "HTTP/1.0", status, reason || host + print "Connection: Close" || host + print "Pragma: no-cache" || host + print "Content-length:", len || host + print ORS doc || host + close(host) + } + + function Setup() { + tmp = "\ + Simple gawk server\ + \ +

xterm\ +

xcalc\ +

xload\ +

terminate script\ + \ + " + return tmp + } + + function Bye() { + tmp = "\ + Simple gawk server\ +

Script Terminated...\ + " + return tmp + } + + function RunApp(app) { + if (app == "xterm") {system("xterm&"); return} + if (app == "xcalc" ) {system("xcalc&"); return} + if (app == "xload" ) {system("xload&"); return} + if (app == "exit") {x = 0} + } +~~~~~ ### I/O Character Encoding ### diff --git a/qse/doc/page/mainpage.md b/qse/doc/page/mainpage.md index fefaa2a2..cd387d1f 100644 --- a/qse/doc/page/mainpage.md +++ b/qse/doc/page/mainpage.md @@ -1,7 +1,7 @@ QSE {#mainpage} ================================================================================ -@image html qse-logo.png +\image html qse-logo.png The QSE library implements AWK, SED, and Unix commands in an embeddable form and defines data types, functions, and classes that you can use when you embed @@ -20,10 +20,10 @@ Chung, Hyung-Hwan See the subpages for more information. -- @ref installation -- @ref awk-lang -- @ref awk-embed -- @ref sed-cmd -- @ref sed-embed -- @subpage mem "Memory Management" +- \ref installation +- \ref awk-lang +- \ref awk-embed +- \ref sed-cmd +- \ref sed-embed +- \subpage mem "Memory Management" diff --git a/qse/include/qse/cmn/Makefile.am b/qse/include/qse/cmn/Makefile.am index 23c824a4..700c8075 100644 --- a/qse/include/qse/cmn/Makefile.am +++ b/qse/include/qse/cmn/Makefile.am @@ -20,6 +20,7 @@ pkginclude_HEADERS = \ lda.h \ main.h \ map.h \ + mb8.h \ mbwc.h \ mem.h \ mux.h \ diff --git a/qse/include/qse/cmn/Makefile.in b/qse/include/qse/cmn/Makefile.in index c5fa2c39..7b8c5fcb 100644 --- a/qse/include/qse/cmn/Makefile.in +++ b/qse/include/qse/cmn/Makefile.in @@ -54,10 +54,10 @@ SOURCES = DIST_SOURCES = am__pkginclude_HEADERS_DIST = alg.h chr.h cp949.h cp950.h dir.h dll.h \ env.h fio.h fma.h fmt.h fs.h gdl.h glob.h htb.h hton.h ipad.h \ - lda.h main.h map.h mbwc.h mem.h mux.h nwad.h nwif.h nwio.h \ - oht.h opt.h path.h pio.h pma.h rbt.h rex.h sio.h sll.h slmb.h \ - stdio.h str.h task.h time.h tio.h tre.h uni.h uri.h utf8.h \ - xma.h Mmgr.hpp StdMmgr.hpp Mmged.hpp + lda.h main.h map.h mb8.h mbwc.h mem.h mux.h nwad.h nwif.h \ + nwio.h oht.h opt.h path.h pio.h pma.h rbt.h rex.h sio.h sll.h \ + slmb.h stdio.h str.h task.h time.h tio.h tre.h uni.h uri.h \ + utf8.h xma.h Mmgr.hpp StdMmgr.hpp Mmged.hpp am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ @@ -266,8 +266,8 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ pkginclude_HEADERS = alg.h chr.h cp949.h cp950.h dir.h dll.h env.h \ fio.h fma.h fmt.h fs.h gdl.h glob.h htb.h hton.h ipad.h lda.h \ - main.h map.h mbwc.h mem.h mux.h nwad.h nwif.h nwio.h oht.h \ - opt.h path.h pio.h pma.h rbt.h rex.h sio.h sll.h slmb.h \ + main.h map.h mb8.h mbwc.h mem.h mux.h nwad.h nwif.h nwio.h \ + oht.h opt.h path.h pio.h pma.h rbt.h rex.h sio.h sll.h slmb.h \ stdio.h str.h task.h time.h tio.h tre.h uni.h uri.h utf8.h \ xma.h $(am__append_1) all: all-am diff --git a/qse/include/qse/cmn/mb8.h b/qse/include/qse/cmn/mb8.h new file mode 100644 index 00000000..6938c5c1 --- /dev/null +++ b/qse/include/qse/cmn/mb8.h @@ -0,0 +1,70 @@ +/* + * $Id$ + * + Copyright 2006-2012 Chung, Hyung-Hwan. + This file is part of QSE. + + QSE is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + QSE is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with QSE. If not, see . + */ + +#ifndef _QSE_CMN_MB8_H_ +#define _QSE_CMN_MB8_H_ + +#include +#include + +/** \file + * This file provides functions, types, macros for mb8 conversion. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * The qse_wctomb8() function converts a wide character to a mb8 sequence. + * \return + * - 0 is returned if \a wc is invalid. + * - An integer greater than \a size is returned if the \a mb8 sequence buffer + * is not #QSE_NULL and not large enough. This integer is actually the number + * of bytes needed. + * - If \a mb8 is #QSE_NULL, the number of bytes that would have been stored + * into \a mb8 if it had not been #QSE_NULL is returned. + * - An integer between 1 and size inclusive is returned in all other cases. + */ +qse_size_t qse_wctomb8 ( + qse_wchar_t wc, + qse_mchar_t* mb8, + qse_size_t size +); + +/** + * The qse_mb8towc() function converts a mb8 sequence to a wide character. + * \return + * - 0 is returned if the \a mb8 sequence is invalid. + * - An integer greater than \a size is returned if the \a mb8 sequence is + * not complete. + * - An integer between 1 and size inclusive is returned in all other cases. + */ +qse_size_t qse_mb8towc ( + const qse_mchar_t* mb8, + qse_size_t size, + qse_wchar_t* wc +); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/qse/include/qse/cmn/mbwc.h b/qse/include/qse/cmn/mbwc.h index 29316f16..b0db7df1 100644 --- a/qse/include/qse/cmn/mbwc.h +++ b/qse/include/qse/cmn/mbwc.h @@ -21,7 +21,7 @@ #ifndef _QSE_CMN_MBWC_H_ #define _QSE_CMN_MBWC_H_ -/** @file +/** \file * This file provides functions and definitions needed for * multibyte/wide-characer conversion. */ @@ -36,7 +36,9 @@ typedef qse_cmgr_t* (*qse_cmgr_finder_t) (const qse_char_t* name); enum qse_cmgr_id_t { QSE_CMGR_SLMB, - QSE_CMGR_UTF8 + QSE_CMGR_UTF8, + QSE_CMGR_MB8 + #if defined(QSE_ENABLE_XCMGRS) , QSE_CMGR_CP949, @@ -59,7 +61,7 @@ QSE_EXPORT qse_cmgr_t* qse_findcmgrbyid ( /** * The qse_getfindcmgr() function find a built-in cmgr matching a given * @a name and returns it. It returns #QSE_NULL if no match is found. - * The @a name can be one of "utf8", "slmb", "cp949", "cp950", and an + * The @a name can be one of "slmb", "utf8", "mb8", "cp949", "cp950", and an * empty string. Calling this function with an empty string is the same * as calling qse_getdflcmgr(). */ diff --git a/qse/lib/awk/fnc.c b/qse/lib/awk/fnc.c index 73c30377..11e6cf0c 100644 --- a/qse/lib/awk/fnc.c +++ b/qse/lib/awk/fnc.c @@ -225,7 +225,7 @@ static int fnc_close (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi) if (name == QSE_NULL) return -1; } - if (a1 != QSE_NULL) + if (a1) { if (a1->type == QSE_AWK_VAL_STR) { @@ -246,7 +246,7 @@ static int fnc_close (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi) if (len == 0) { - /* getline or print doesn't allow an emptry for the + /* getline or print doesn't allow an empty string for the * input or output file name. so close should not allow * it either. * another reason for this is if close is called explicitly @@ -346,8 +346,16 @@ static int fnc_fflush (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi) if (nargs == 0) { - /* flush the console output. - * fflush() should return -1 on errors */ + /* fflush() flushes the console output. + * fflush() should return -1 on errors. + * + * if no previous console output statement is seen, + * this function won't be able to find the entry. + * so it returns -1; + * + * BEGIN { flush(); } # flush() returns -1 + * BEGIN { print 1; flush(); } # flush() returns 0 + */ n = qse_awk_rtx_flushio (run, QSE_AWK_OUT_CONSOLE, QSE_T("")); } else @@ -380,17 +388,43 @@ static int fnc_fflush (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi) ptr++; } - /* flush the given rio */ + /* flush the given rio. + * + * flush("") flushes all output streams regardless of names. + * pass QSE_NULL for the name in that case so that the + * callee matches any streams. + * + * fflush() doesn't specify the type of output streams + * so it attemps to flush all types of output streams. + * + * though not useful, it's possible to have multiple + * streams with the same name but of different types. + * + * BEGIN { + * print 1 | "/tmp/x"; + * print 1 > "/tmp/x"; + * fflush("/tmp/x"); + * } + */ + n = flush_io ( - run, QSE_AWK_RIO_FILE, + run, QSE_AWK_OUT_FILE, ((len0 == 0)? QSE_NULL: str0), 1); /*if (n == -99) return -1;*/ n = flush_io ( - run, QSE_AWK_RIO_PIPE, + run, QSE_AWK_OUT_APFILE, + ((len0 == 0)? QSE_NULL: str0), n); + /*if (n == -99) return -1;*/ + n = flush_io ( + run, QSE_AWK_OUT_PIPE, + ((len0 == 0)? QSE_NULL: str0), n); + /*if (n == -99) return -1;*/ + n = flush_io ( + run, QSE_AWK_OUT_RWPIPE, ((len0 == 0)? QSE_NULL: str0), n); /*if (n == -99) return -1;*/ - /* if n remains 1, no ip handlers have been defined for + /* if n remains 1, no io handlers have been defined for * file, pipe, and rwpipe. so make fflush return -1. * if n is -2, no such named io has been found at all * if n is -1, the io handler has returned an error */ diff --git a/qse/lib/awk/rio.c b/qse/lib/awk/rio.c index 782c2daa..bf0049a9 100644 --- a/qse/lib/awk/rio.c +++ b/qse/lib/awk/rio.c @@ -114,7 +114,7 @@ static int find_rio_in ( } /* search the chain for exiting an existing io name */ - while (p != QSE_NULL) + while (p) { if (p->type == (io_type | io_mask) && qse_strcmp (p->name,name) == 0) break; @@ -688,7 +688,7 @@ int qse_awk_rtx_writeio_str ( } /* look for the corresponding rio for name */ - while (p != QSE_NULL) + while (p) { /* the file "1.tmp", in the following code snippets, * would be opened by the first print statement, but not by @@ -799,7 +799,7 @@ int qse_awk_rtx_flushio ( { qse_awk_rio_arg_t* p = run->rio.chain; qse_awk_rio_impl_t handler; - int io_type, /*io_mode,*/ io_mask; + int io_type, io_mode, io_mask; qse_ssize_t n; int ok = 0; @@ -809,7 +809,7 @@ int qse_awk_rtx_flushio ( /* translate the out_type into the relevant I/O type and mode */ io_type = out_type_map[out_type]; - /*io_mode = out_mode_map[out_type];*/ + io_mode = out_mode_map[out_type]; io_mask = out_mask_map[out_type]; handler = run->rio.handler[io_type]; @@ -821,9 +821,13 @@ int qse_awk_rtx_flushio ( } /* look for the corresponding rio for name */ - while (p != QSE_NULL) + while (p) { - if (p->type == (io_type | io_mask) && + /* without the check for io_mode and p->mode, + * QSE_AWK_OUT_FILE and QSE_AWK_OUT_APFILE matches the + * same entry since (io_type | io_mask) has the same value + * for both. */ + if (p->type == (io_type | io_mask) && p->mode == io_mode && (name == QSE_NULL || qse_strcmp(p->name,name) == 0)) { qse_awk_rtx_seterrnum (run, QSE_AWK_ENOERR, QSE_NULL); @@ -1123,7 +1127,7 @@ int qse_awk_rtx_closeio ( { qse_awk_rio_arg_t* p = rtx->rio.chain, * px = QSE_NULL; - while (p != QSE_NULL) + while (p) { /* it handles the first that matches the given name * regardless of the io type */ diff --git a/qse/lib/cmn/Makefile.am b/qse/lib/cmn/Makefile.am index c4b1f2b7..a607e0f0 100644 --- a/qse/lib/cmn/Makefile.am +++ b/qse/lib/cmn/Makefile.am @@ -45,6 +45,7 @@ libqsecmn_la_SOURCES = \ ipad.c \ lda.c \ main.c \ + mb8.c \ mbwc.c \ mbwc-str.c \ mem.c \ diff --git a/qse/lib/cmn/Makefile.in b/qse/lib/cmn/Makefile.in index ece3ff1e..7c833501 100644 --- a/qse/lib/cmn/Makefile.in +++ b/qse/lib/cmn/Makefile.in @@ -88,15 +88,15 @@ libqsecmn_la_DEPENDENCIES = $(am__DEPENDENCIES_1) am__libqsecmn_la_SOURCES_DIST = alg-base64.c alg-rand.c alg-search.c \ alg-sort.c assert.c chr.c dir.c dll.c env.c gdl.c htb.c fio.c \ fma.c fmt.c fs.c fs-err.c fs-move.c glob.c hton.c ipad.c lda.c \ - main.c mbwc.c mbwc-str.c mem.c mux.c nwad.c nwad-skad.c nwif.c \ - nwif-cfg.c nwio.c oht.c opt.c path-basename.c path-canon.c \ - pio.c pma.c rbt.c rex.c sio.c sll.c slmb.c stdio.c str-beg.c \ - str-cat.c str-chr.c str-cnv.c str-cmp.c str-cpy.c str-del.c \ - str-dup.c str-dynm.c str-dynw.c str-end.c str-excl.c \ - str-fcpy.c str-fnmat.c str-incl.c str-len.c str-pac.c \ - str-pbrk.c str-put.c str-rev.c str-rot.c str-set.c str-spl.c \ - str-spn.c str-str.c str-subst.c str-tok.c str-trm.c str-word.c \ - task.c time.c tio.c tre.c tre-ast.c tre-compile.c \ + main.c mb8.c mbwc.c mbwc-str.c mem.c mux.c nwad.c nwad-skad.c \ + nwif.c nwif-cfg.c nwio.c oht.c opt.c path-basename.c \ + path-canon.c pio.c pma.c rbt.c rex.c sio.c sll.c slmb.c \ + stdio.c str-beg.c str-cat.c str-chr.c str-cnv.c str-cmp.c \ + str-cpy.c str-del.c str-dup.c str-dynm.c str-dynw.c str-end.c \ + str-excl.c str-fcpy.c str-fnmat.c str-incl.c str-len.c \ + str-pac.c str-pbrk.c str-put.c str-rev.c str-rot.c str-set.c \ + str-spl.c str-spn.c str-str.c str-subst.c str-tok.c str-trm.c \ + str-word.c task.c time.c tio.c tre.c tre-ast.c tre-compile.c \ tre-match-backtrack.c tre-match-parallel.c tre-parse.c \ tre-stack.c uri.c utf8.c xma.c uni.c cp949.c cp950.c @ENABLE_BUNDLED_UNICODE_TRUE@am__objects_1 = uni.lo @@ -104,17 +104,17 @@ am__libqsecmn_la_SOURCES_DIST = alg-base64.c alg-rand.c alg-search.c \ am_libqsecmn_la_OBJECTS = alg-base64.lo alg-rand.lo alg-search.lo \ alg-sort.lo assert.lo chr.lo dir.lo dll.lo env.lo gdl.lo \ htb.lo fio.lo fma.lo fmt.lo fs.lo fs-err.lo fs-move.lo glob.lo \ - hton.lo ipad.lo lda.lo main.lo mbwc.lo mbwc-str.lo mem.lo \ - mux.lo nwad.lo nwad-skad.lo nwif.lo nwif-cfg.lo nwio.lo oht.lo \ - opt.lo path-basename.lo path-canon.lo pio.lo pma.lo rbt.lo \ - rex.lo sio.lo sll.lo slmb.lo stdio.lo str-beg.lo str-cat.lo \ - str-chr.lo str-cnv.lo str-cmp.lo str-cpy.lo str-del.lo \ - str-dup.lo str-dynm.lo str-dynw.lo str-end.lo str-excl.lo \ - str-fcpy.lo str-fnmat.lo str-incl.lo str-len.lo str-pac.lo \ - str-pbrk.lo str-put.lo str-rev.lo str-rot.lo str-set.lo \ - str-spl.lo str-spn.lo str-str.lo str-subst.lo str-tok.lo \ - str-trm.lo str-word.lo task.lo time.lo tio.lo tre.lo \ - tre-ast.lo tre-compile.lo tre-match-backtrack.lo \ + hton.lo ipad.lo lda.lo main.lo mb8.lo mbwc.lo mbwc-str.lo \ + mem.lo mux.lo nwad.lo nwad-skad.lo nwif.lo nwif-cfg.lo nwio.lo \ + oht.lo opt.lo path-basename.lo path-canon.lo pio.lo pma.lo \ + rbt.lo rex.lo sio.lo sll.lo slmb.lo stdio.lo str-beg.lo \ + str-cat.lo str-chr.lo str-cnv.lo str-cmp.lo str-cpy.lo \ + str-del.lo str-dup.lo str-dynm.lo str-dynw.lo str-end.lo \ + str-excl.lo str-fcpy.lo str-fnmat.lo str-incl.lo str-len.lo \ + str-pac.lo str-pbrk.lo str-put.lo str-rev.lo str-rot.lo \ + str-set.lo str-spl.lo str-spn.lo str-str.lo str-subst.lo \ + str-tok.lo str-trm.lo str-word.lo task.lo time.lo tio.lo \ + tre.lo tre-ast.lo tre-compile.lo tre-match-backtrack.lo \ tre-match-parallel.lo tre-parse.lo tre-stack.lo uri.lo utf8.lo \ xma.lo $(am__objects_1) $(am__objects_2) libqsecmn_la_OBJECTS = $(am_libqsecmn_la_OBJECTS) @@ -355,7 +355,7 @@ noinst_HEADERS = \ libqsecmn_la_SOURCES = alg-base64.c alg-rand.c alg-search.c alg-sort.c \ assert.c chr.c dir.c dll.c env.c gdl.c htb.c fio.c fma.c fmt.c \ fs.c fs-err.c fs-move.c glob.c hton.c ipad.c lda.c main.c \ - mbwc.c mbwc-str.c mem.c mux.c nwad.c nwad-skad.c nwif.c \ + mb8.c mbwc.c mbwc-str.c mem.c mux.c nwad.c nwad-skad.c nwif.c \ nwif-cfg.c nwio.c oht.c opt.c path-basename.c path-canon.c \ pio.c pma.c rbt.c rex.c sio.c sll.c slmb.c stdio.c str-beg.c \ str-cat.c str-chr.c str-cnv.c str-cmp.c str-cpy.c str-del.c \ @@ -475,6 +475,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ipad.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lda.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mb8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbwc-str.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbwc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mem.Plo@am__quote@ diff --git a/qse/lib/cmn/mb8.c b/qse/lib/cmn/mb8.c new file mode 100644 index 00000000..c1a7a06f --- /dev/null +++ b/qse/lib/cmn/mb8.c @@ -0,0 +1,38 @@ +/* + * $Id$ + * + Copyright 2006-2012 Chung, Hyung-Hwan. + This file is part of QSE. + + QSE is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + QSE is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with QSE. If not, see . + */ + +#include + +qse_size_t qse_wctomb8 (qse_wchar_t wc, qse_mchar_t* utf8, qse_size_t size) +{ + if (size <= 0) return size + 1; /* buffer too small */ + if (wc > QSE_TYPE_MAX(qse_uint8_t)) return 0; /* illegal character */ + if (utf8) *(qse_uint8_t*)utf8 = wc; + return 1; +} + +qse_size_t qse_mb8towc ( + const qse_mchar_t* utf8, qse_size_t size, qse_wchar_t* wc) +{ + QSE_ASSERT (utf8 != QSE_NULL); + QSE_ASSERT (size > 0); + *wc = *(const qse_uint8_t*)utf8; + return 1; +} diff --git a/qse/lib/cmn/mbwc.c b/qse/lib/cmn/mbwc.c index 1b39c73d..f52efbe9 100644 --- a/qse/lib/cmn/mbwc.c +++ b/qse/lib/cmn/mbwc.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -30,31 +31,18 @@ * dependent. */ -/* TODO: binary cmgr -> simply expands a byte to wchar and vice versa. */ - static qse_cmgr_t builtin_cmgr[] = { - { - qse_slmbtoslwc, - qse_slwctoslmb - }, - + /* keep the order aligned with qse_cmgr_id_t values in */ + { qse_slmbtoslwc, qse_slwctoslmb }, + { qse_utf8touc, qse_uctoutf8 }, + { qse_mb8towc, qse_wctomb8 } #if defined(QSE_ENABLE_XCMGRS) - { - qse_cp949touc, - qse_uctocp949 - }, - - { - qse_cp950touc, - qse_uctocp950 - }, + , + { qse_cp949touc, qse_uctocp949 }, + { qse_cp950touc, qse_uctocp950 } #endif - { - qse_utf8touc, - qse_uctoutf8 - } }; static qse_cmgr_t* dfl_cmgr = &builtin_cmgr[QSE_CMGR_SLMB]; @@ -84,6 +72,8 @@ qse_cmgr_t* qse_findcmgrbyid (qse_cmgr_id_t id) qse_cmgr_t* qse_findcmgr (const qse_char_t* name) { + /* TODO: binary search or something better for performance improvement + * when there are many entries in the table */ static struct { const qse_char_t* name; @@ -95,7 +85,8 @@ qse_cmgr_t* qse_findcmgr (const qse_char_t* name) { QSE_T("cp949"), QSE_CMGR_CP949 }, { QSE_T("cp950"), QSE_CMGR_CP950 }, #endif - { QSE_T("slmb"), QSE_CMGR_UTF8 } + { QSE_T("slmb"), QSE_CMGR_SLMB }, + { QSE_T("mb8"), QSE_CMGR_MB8 } }; if (name)