diff --git a/qse/doc/page/awk-lang.md b/qse/doc/page/awk-lang.md
index 791da53c..7b55d7ca 100644
--- a/qse/doc/page/awk-lang.md
+++ b/qse/doc/page/awk-lang.md
@@ -30,6 +30,41 @@ The following code snippet is a valid QSEAWK program that print the string
}
~~~~~
+In general, QSEAWK starts executing the *BEGIN* blocks. For each input record
+from an input stream, it executes the pattern-action blocks if the pattern
+evaluates to true. Finally, it executes the *END* blocks. By default, each
+line in the input stream is an input record. None of these blocks are
+mandatory. However, a useful program needs at least 1 block to be present.
+
+For the following input records,
+~~~~~{.txt}
+ abcdefgahijklmn
+ 1234567890
+ opqrstuvwxyz
+~~~~~
+
+this AWK program produces
+~~~~~{.awk}
+ BEGIN { mr=0; }
+ /abc|vwx/ { print $0; mr++; }
+ END {
+ print "total records: " NR;
+ print "matching records: " mr;
+ }
+~~~~~
+
+this output text.
+~~~~~{.txt}
+ abcdefgahijklmn
+ opqrstuvwxyz
+ total records: 3
+ matching records: 2
+~~~~~
+
+The QSEAWK library provides a capability to use a use a user-defined function
+as an entry point instead of executing these blocks. See \ref awk-embed for
+how to change the entry point.
+
Comments
--------
@@ -221,15 +256,15 @@ to a non-zero value. This is possible if you allow assigning a map to
another non-map variable with #QSE_AWK_MAPTOVAR. In this case, a map
is not deep-copied but the reference to it is copied.
-@code
-BEGIN {
- a[10]=20;
- b=a;
- b[20]=40;
- for (i in a) print i, a[i];
- print a===b;
-}
-@endcode
+~~~~~{.awk}
+ BEGIN {
+ a[10]=20;
+ b=a;
+ b[20]=40;
+ for (i in a) print i, a[i];
+ print a===b;
+ }
+~~~~~
The === operator may be also useful when you want to indicate an error
@@ -237,21 +272,21 @@ with an uninitialized variable. The following code check if the function
returned a map. Since the variable 'nil' has never been assigned, its
internal type is 'NIL' and
-@code
-function a ()
-{
- x[10] = 2;
- return x;
-}
+~~~~~{.awk}
+ function a ()
+ {
+ x[10] = 2;
+ return x;
+ }
-BEGIN {
- t = a();
- if (t === nil)
- print "nil";
- else
- print "ok";
-}
-@endcode.
+ BEGIN {
+ t = a();
+ if (t === nil)
+ print "nil";
+ else
+ print "ok";
+ }
+~~~~~
The !== operator is a negated form of the === operator.
@@ -287,14 +322,18 @@ stream being processed. The directive can only be used at the outmost scope
where global variable declarations, *BEGIN*, *END*, and/or pattern-action
blocks appear.
- @include "abc.awk"
- BEGIN { func_in_abc (); }
+~~~~~{.awk}
+ @include "abc.awk"
+ BEGIN { func_in_abc (); }
+~~~~~
A semicolon is optional after the included file name. The following is the
same as the sample above.
- @include "abc.awk";
- BEGIN { func_in_abc(); }
+~~~~~{.awk}
+ @include "abc.awk";
+ BEGIN { func_in_abc(); }
+~~~~~
If #QSE_AWK_NEWLINE is off, the semicolon is required.
@@ -330,41 +369,42 @@ of expressions separated with a comma. Each expression in the group is
evaluated in the appearing order. The evaluation result of the last
expression in the group is returned as that of the group.
-@code
-BEGIN {
- c = (1, 2, 9);
- a=((1*c, 3*c), (3 - c), ((k = 6+(c+1, c+2)), (-7 * c)));
- print c; # 9;
- print a; # -63
- print k; # 17
-}
-@endcode
+~~~~~{.awk}
+ BEGIN {
+ c = (1, 2, 9);
+ a=((1*c, 3*c), (3 - c), ((k = 6+(c+1, c+2)), (-7 * c)));
+ print c; # 9;
+ print a; # -63
+ print k; # 17
+ }
+~~~~~
### RETURN ###
The return statement is valid in pattern-action blocks as well as in functions.
The execution of a calling block is aborted once the return statement is executed.
-@code
-$ qseawk 'BEGIN { return 20; }' ; echo $?
-20
-#endcode
+~~~~~
+ $ qseawk 'BEGIN { return 20; }' ; echo $?
+ 20
+~~~~~
If #QSE_AWK_MAPTOVAR is on, you can return an arrayed value from a function.
-@code
-function getarray() {
- @local a;
- a["one"] = 1;
- a["two"] = 2;
- a["three"] = 3;
- return a;
-}
-BEGIN {
- @local x;
- x = getarray();
- for (i in x) print i, x[i];
-}
-@endcode
+~~~~~{.awk}
+ function getarray() {
+ @local a;
+ a["one"] = 1;
+ a["two"] = 2;
+ a["three"] = 3;
+ return a;
+ }
+
+ BEGIN {
+ @local x;
+ x = getarray();
+ for (i in x) print i, x[i];
+ }
+~~~~~
### RESET ###
@@ -373,14 +413,14 @@ After that, the array variable can also be used as a scalar variable again.
You must have #QSE_AWK_RESET on to be able to be able to use this
statement.
-@code
-BEGIN {
- a[1] = 20;
- reset a;
- a = 20; # this is legal
- print a;
-}
-@endcode
+~~~~~{.awk}
+ BEGIN {
+ a[1] = 20;
+ reset a;
+ a = 20; # this is legal
+ print a;
+ }
+~~~~~
### ABORT ###
The abort statment is similar to the exit statement except that
@@ -551,9 +591,11 @@ For this reason, you are advised to parenthesize *getline* and its related
components to avoid confusion whenever necessary. The example reading into
the variable *line* can be made clearer with parenthesization.
- BEGIN {
- while ((getline line) > 0) print line;
- }
+~~~~~{.awk}
+ BEGIN {
+ while ((getline line) > 0) print line;
+ }
+~~~~~
### print ###
**TODO**
@@ -565,30 +607,34 @@ they are function calls. In this mode, they return a negative number
on failure and a zero on success and any I/O failure doesn't abort
a running program.
- BEGIN {
- a = print "hello, world" > "/dev/null";
- print a;
- a = print ("hello, world") > "/dev/null";
- print a;
- }
+~~~~~{.awk}
+ BEGIN {
+ a = print "hello, world" > "/dev/null";
+ print a;
+ a = print ("hello, world") > "/dev/null";
+ print a;
+ }
+~~~~~
Since print and printf are like function calls, you can use them
in any context where a normal expression is allowed. For example,
printf is used as a conditional expression in an 'if' statement
in the sample code below.
- BEGIN {
- if ((printf "hello, world\n" || "tcp://127.0.0.1:9999") <= -1)
- print "FAILURE";
- else
- print "SUCCESS";
- }
+~~~~~{.awk}
+ BEGIN {
+ if ((printf "hello, world\n" || "tcp://127.0.0.1:9999") <= -1)
+ print "FAILURE";
+ else
+ print "SUCCESS";
+ }
+~~~~~
### close (io-name, what) ###
-The *close* function closes a stream indicated by the name *io-name*. It takes
-an optional parameter *what* indicating whether input or output should be
-closed.
+The *close* function closes a stream indicated by the name *io-name*.
+It takes an optional parameter *what* indicating whether input or output
+should be closed.
If *io-name* is a file, it closes the file handle associated;
If *io-name* is a command, it may kill the running process from the command,
@@ -597,12 +643,64 @@ If *io-name* is a network stream, it tears down connections to the network
peer and closes the socket handles.
The optional paramenter *what* must be one of *r* or *w* when used is useful
-when *io-name* is a command invoked for the two-way operator. The value of
-*r* causes the function to close the read-end of the pipe and the value of
+when *io-name* is a command invoked for the two-way pipe operator. The value
+of *r* causes the function to close the read-end of the pipe and the value of
*w* causes the function to close the write-end of the pipe.
The function returns 0 on success and -1 on failure.
+Though not so useful, it is possible to create more than 1 streams of different
+kinds under the same name. It is undefined which stream *close*
+should close in the following program.
+
+~~~~~{.awk}
+ BEGIN {
+ "/tmp/x" || getline y; # rwpipe stream
+ print 1 | "/tmp/x"; # pipe stream
+ print 1 > "/tmp/x"; # file stream
+ close ("/tmp/x");
+ }
+~~~~~
+
+### fflush (io-name) ###
+
+The *fflush* function flushes the output stream indicated by *io-name*.
+If *io-name* is not specified, it flushes the open console output stream.
+If *io-name* is an empty stream, it flushes all open output streams.
+It returns 0 on success and -1 on failure.
+
+QSEAWK doesn't open the console output stream before it executes any output
+commands like *print* or *printf*. so fflush() returns -1 in the following
+program.
+
+~~~~~{.awk}
+ BEGIN {
+ fflush();
+ }
+~~~~~
+
+The *print* command is executed before fflush() in the following program.
+When fflush() is executed, the output stream is open. so fflush() returns 0.
+
+~~~~~{.awk}
+ BEGIN {
+ print 1;
+ fflush();
+ }
+~~~~~
+
+Though not so useful, it is possible to create more than 1 output streams
+of different kinds under the same name. *fflush* in the following program
+flushes both the file stream and the pipe stream.
+
+~~~~~{.awk}
+ BEGIN {
+ print 1 | "/tmp/x"; # file stream
+ print 1 > "/tmp/x"; # pipe stream
+ fflush ("/tmp/x");
+ }
+~~~~~
+
### setioattr (io-name, attr-name, attr-value) ###
The *setioattr* function changes the I/O attribute of the name *attr-name* to
@@ -614,17 +712,19 @@ success and -1 on failure.
- *attr-name* is one of *codepage*, *ctimeout*, *atimeout*, *rtimeout*,
*wtimeout*.
- *attr-value* varies depending on *attr-name*.
- + codepage: *cp949*, *cp950*, *utf8*
+ + codepage: *cp949*, *cp950*, *utf8*, *slmb*, *mb8*
+ ctimeout, atimeout, rtimeout, wtimeout: the number of seconds. effective
on socket based streams only. you may use a floating-point number for
lower resoluation than a second. a negative value turns off timeout.
See this sample that prints the contents of a document encoded in cp949.
- BEGIN {
- setioattr ("README.TXT", "codepage", "cp949");
- while ((getline x < "README.TXT") > 0) print x;
- }
+~~~~~{.awk}
+ BEGIN {
+ setioattr ("README.TXT", "codepage", "cp949");
+ while ((getline x < "README.TXT") > 0) print x;
+ }
+~~~~~
### getioattr (io-name, attr-name, attr-value) ###
@@ -634,12 +734,14 @@ is set to the variable referenced by *attr-value*. See *setioattr* for
description on *io-name* and *attr-name*. It returns 0 on success and -1 on
failure.
- BEGIN {
- setioattr ("README.TXT", "codepage", "cp949");
- if (getioattr ("README.TXT", "codepage", codepage) <= -1)
- print "codepage unknown";
- else print "codepage: " codepage;
- }
+~~~~~{.awk}
+ BEGIN {
+ setioattr ("README.TXT", "codepage", "cp949");
+ if (getioattr ("README.TXT", "codepage", codepage) <= -1)
+ print "codepage unknown";
+ else print "codepage: " codepage;
+ }
+~~~~~
### Two-way Pipe ###
@@ -649,17 +751,19 @@ must be set with #QSE_AWK_RWPIPE to be able to use the two-way pipe.
The example redirects the output of *print* to the external *sort* command
and reads back the output.
- BEGIN {
- print "15" || "sort";
- print "14" || "sort";
- print "13" || "sort";
- print "12" || "sort";
- print "11" || "sort";
- # close the input side of the pipe as 'sort' starts emitting result
- # once the input is closed.
- close ("sort", "r");
- while (("sort" || getline x) > 0) print x;
- }
+~~~~~{.awk}
+ BEGIN {
+ print "15" || "sort";
+ print "14" || "sort";
+ print "13" || "sort";
+ print "12" || "sort";
+ print "11" || "sort";
+ # close the input side of the pipe as 'sort' starts emitting result
+ # once the input is closed.
+ close ("sort", "r");
+ while (("sort" || getline x) > 0) print x;
+ }
+~~~~~
This two-way pipe can create a TCP or UDP connection if the pipe command
string is prefixed with one of the followings:
@@ -671,100 +775,106 @@ string is prefixed with one of the followings:
See this example.
- BEGIN {
- # it binds a TCP socket to the IPv6 address :: and the port number
- # 9999 and waits for the first coming connection. It repeats writing
- # "hello world" to the first connected peer and reading a line from
- # it until the session is torn down.
- do {
- print "hello world" || "tcpd://[::]:9999";
- if (("tcpd://[::]:9999" || getline x) <= 0) break;
- print x;
- }
- while(1);
- }
+~~~~~{.awk}
+ BEGIN {
+ # it binds a TCP socket to the IPv6 address :: and the port number
+ # 9999 and waits for the first coming connection. It repeats writing
+ # "hello world" to the first connected peer and reading a line from
+ # it until the session is torn down.
+ do {
+ print "hello world" || "tcpd://[::]:9999";
+ if (("tcpd://[::]:9999" || getline x) <= 0) break;
+ print x;
+ }
+ while(1);
+ }
+~~~~~
You can manipulate TCP or UDP timeouts for connection, accepting, reading, and
writing with the *setioattr* function and the *getioattr* function.
See the example below.
- BEGIN {
- setioattr ("tcp://127.0.0.1:9999", "ctimeout", 3);
- setioattr ("tcp://127.0.0.1:9999", "rtimeout", 5.5);
- print "hello world" || "tcp://127.0.0.1:9999";
- "tcp://127.0.0.1:9999" || getline x;
- print x;
- }
+~~~~~{.awk}
+ BEGIN {
+ setioattr ("tcp://127.0.0.1:9999", "ctimeout", 3);
+ setioattr ("tcp://127.0.0.1:9999", "rtimeout", 5.5);
+ print "hello world" || "tcp://127.0.0.1:9999";
+ "tcp://127.0.0.1:9999" || getline x;
+ print x;
+ }
+~~~~~
Here is an interesting example adopting Michael Sanders' AWK web server,
modified for QSEAWK.
- #
- # Michael Sanders' AWK web server for QSEAWK.
- # Orginal code in http://awk.info/?tools/server
- #
- # qseawk --tolerant=on --rwpipe=on webserver.awk
- #
- BEGIN {
- x = 1 # script exits if x < 1
- port = 8080 # port number
- host = "tcpd://0.0.0.0:" port # host string
- url = "http://localhost:" port # server url
- status = 200 # 200 == OK
- reason = "OK" # server response
- RS = ORS = "\r\n" # header line terminators
- doc = Setup() # html document
- len = length(doc) + length(ORS) # length of document
- while (x) {
- if ($1 == "GET") RunApp(substr($2, 2))
- if (! x) break
- print "HTTP/1.0", status, reason || host
- print "Connection: Close" || host
- print "Pragma: no-cache" || host
- print "Content-length:", len || host
- print ORS doc || host
- close(host) # close client connection
- host || getline # wait for new client request
- }
- # server terminated...
- doc = Bye()
- len = length(doc) + length(ORS)
+~~~~~{.awk}
+ #
+ # Michael Sanders' AWK web server for QSEAWK.
+ # Orginal code in http://awk.info/?tools/server
+ #
+ # qseawk --tolerant=on --rwpipe=on webserver.awk
+ #
+ BEGIN {
+ x = 1 # script exits if x < 1
+ port = 8080 # port number
+ host = "tcpd://0.0.0.0:" port # host string
+ url = "http://localhost:" port # server url
+ status = 200 # 200 == OK
+ reason = "OK" # server response
+ RS = ORS = "\r\n" # header line terminators
+ doc = Setup() # html document
+ len = length(doc) + length(ORS) # length of document
+ while (x) {
+ if ($1 == "GET") RunApp(substr($2, 2))
+ if (! x) break
print "HTTP/1.0", status, reason || host
print "Connection: Close" || host
print "Pragma: no-cache" || host
print "Content-length:", len || host
print ORS doc || host
- close(host)
- }
-
- function Setup() {
- tmp = "\
-
Simple gawk server\
- \
- xterm\
-
xcalc\
-
xload\
-
terminate script\
- \
- "
- return tmp
- }
-
- function Bye() {
- tmp = "\
-
Simple gawk server\
- Script Terminated...\
- "
- return tmp
- }
-
- function RunApp(app) {
- if (app == "xterm") {system("xterm&"); return}
- if (app == "xcalc" ) {system("xcalc&"); return}
- if (app == "xload" ) {system("xload&"); return}
- if (app == "exit") {x = 0}
- }
+ close(host) # close client connection
+ host || getline # wait for new client request
+ }
+ # server terminated...
+ doc = Bye()
+ len = length(doc) + length(ORS)
+ print "HTTP/1.0", status, reason || host
+ print "Connection: Close" || host
+ print "Pragma: no-cache" || host
+ print "Content-length:", len || host
+ print ORS doc || host
+ close(host)
+ }
+
+ function Setup() {
+ tmp = "\
+
Simple gawk server\
+ \
+ xterm\
+
xcalc\
+
xload\
+
terminate script\
+ \
+ "
+ return tmp
+ }
+
+ function Bye() {
+ tmp = "\
+
Simple gawk server\
+ Script Terminated...\
+ "
+ return tmp
+ }
+
+ function RunApp(app) {
+ if (app == "xterm") {system("xterm&"); return}
+ if (app == "xcalc" ) {system("xcalc&"); return}
+ if (app == "xload" ) {system("xload&"); return}
+ if (app == "exit") {x = 0}
+ }
+~~~~~
### I/O Character Encoding ###
diff --git a/qse/doc/page/mainpage.md b/qse/doc/page/mainpage.md
index fefaa2a2..cd387d1f 100644
--- a/qse/doc/page/mainpage.md
+++ b/qse/doc/page/mainpage.md
@@ -1,7 +1,7 @@
QSE {#mainpage}
================================================================================
-@image html qse-logo.png
+\image html qse-logo.png
The QSE library implements AWK, SED, and Unix commands in an embeddable form
and defines data types, functions, and classes that you can use when you embed
@@ -20,10 +20,10 @@ Chung, Hyung-Hwan
See the subpages for more information.
-- @ref installation
-- @ref awk-lang
-- @ref awk-embed
-- @ref sed-cmd
-- @ref sed-embed
-- @subpage mem "Memory Management"
+- \ref installation
+- \ref awk-lang
+- \ref awk-embed
+- \ref sed-cmd
+- \ref sed-embed
+- \subpage mem "Memory Management"
diff --git a/qse/include/qse/cmn/Makefile.am b/qse/include/qse/cmn/Makefile.am
index 23c824a4..700c8075 100644
--- a/qse/include/qse/cmn/Makefile.am
+++ b/qse/include/qse/cmn/Makefile.am
@@ -20,6 +20,7 @@ pkginclude_HEADERS = \
lda.h \
main.h \
map.h \
+ mb8.h \
mbwc.h \
mem.h \
mux.h \
diff --git a/qse/include/qse/cmn/Makefile.in b/qse/include/qse/cmn/Makefile.in
index c5fa2c39..7b8c5fcb 100644
--- a/qse/include/qse/cmn/Makefile.in
+++ b/qse/include/qse/cmn/Makefile.in
@@ -54,10 +54,10 @@ SOURCES =
DIST_SOURCES =
am__pkginclude_HEADERS_DIST = alg.h chr.h cp949.h cp950.h dir.h dll.h \
env.h fio.h fma.h fmt.h fs.h gdl.h glob.h htb.h hton.h ipad.h \
- lda.h main.h map.h mbwc.h mem.h mux.h nwad.h nwif.h nwio.h \
- oht.h opt.h path.h pio.h pma.h rbt.h rex.h sio.h sll.h slmb.h \
- stdio.h str.h task.h time.h tio.h tre.h uni.h uri.h utf8.h \
- xma.h Mmgr.hpp StdMmgr.hpp Mmged.hpp
+ lda.h main.h map.h mb8.h mbwc.h mem.h mux.h nwad.h nwif.h \
+ nwio.h oht.h opt.h path.h pio.h pma.h rbt.h rex.h sio.h sll.h \
+ slmb.h stdio.h str.h task.h time.h tio.h tre.h uni.h uri.h \
+ utf8.h xma.h Mmgr.hpp StdMmgr.hpp Mmged.hpp
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
am__vpath_adj = case $$p in \
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
@@ -266,8 +266,8 @@ top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
pkginclude_HEADERS = alg.h chr.h cp949.h cp950.h dir.h dll.h env.h \
fio.h fma.h fmt.h fs.h gdl.h glob.h htb.h hton.h ipad.h lda.h \
- main.h map.h mbwc.h mem.h mux.h nwad.h nwif.h nwio.h oht.h \
- opt.h path.h pio.h pma.h rbt.h rex.h sio.h sll.h slmb.h \
+ main.h map.h mb8.h mbwc.h mem.h mux.h nwad.h nwif.h nwio.h \
+ oht.h opt.h path.h pio.h pma.h rbt.h rex.h sio.h sll.h slmb.h \
stdio.h str.h task.h time.h tio.h tre.h uni.h uri.h utf8.h \
xma.h $(am__append_1)
all: all-am
diff --git a/qse/include/qse/cmn/mb8.h b/qse/include/qse/cmn/mb8.h
new file mode 100644
index 00000000..6938c5c1
--- /dev/null
+++ b/qse/include/qse/cmn/mb8.h
@@ -0,0 +1,70 @@
+/*
+ * $Id$
+ *
+ Copyright 2006-2012 Chung, Hyung-Hwan.
+ This file is part of QSE.
+
+ QSE is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as
+ published by the Free Software Foundation, either version 3 of
+ the License, or (at your option) any later version.
+
+ QSE is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with QSE. If not, see .
+ */
+
+#ifndef _QSE_CMN_MB8_H_
+#define _QSE_CMN_MB8_H_
+
+#include
+#include
+
+/** \file
+ * This file provides functions, types, macros for mb8 conversion.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * The qse_wctomb8() function converts a wide character to a mb8 sequence.
+ * \return
+ * - 0 is returned if \a wc is invalid.
+ * - An integer greater than \a size is returned if the \a mb8 sequence buffer
+ * is not #QSE_NULL and not large enough. This integer is actually the number
+ * of bytes needed.
+ * - If \a mb8 is #QSE_NULL, the number of bytes that would have been stored
+ * into \a mb8 if it had not been #QSE_NULL is returned.
+ * - An integer between 1 and size inclusive is returned in all other cases.
+ */
+qse_size_t qse_wctomb8 (
+ qse_wchar_t wc,
+ qse_mchar_t* mb8,
+ qse_size_t size
+);
+
+/**
+ * The qse_mb8towc() function converts a mb8 sequence to a wide character.
+ * \return
+ * - 0 is returned if the \a mb8 sequence is invalid.
+ * - An integer greater than \a size is returned if the \a mb8 sequence is
+ * not complete.
+ * - An integer between 1 and size inclusive is returned in all other cases.
+ */
+qse_size_t qse_mb8towc (
+ const qse_mchar_t* mb8,
+ qse_size_t size,
+ qse_wchar_t* wc
+);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/qse/include/qse/cmn/mbwc.h b/qse/include/qse/cmn/mbwc.h
index 29316f16..b0db7df1 100644
--- a/qse/include/qse/cmn/mbwc.h
+++ b/qse/include/qse/cmn/mbwc.h
@@ -21,7 +21,7 @@
#ifndef _QSE_CMN_MBWC_H_
#define _QSE_CMN_MBWC_H_
-/** @file
+/** \file
* This file provides functions and definitions needed for
* multibyte/wide-characer conversion.
*/
@@ -36,7 +36,9 @@ typedef qse_cmgr_t* (*qse_cmgr_finder_t) (const qse_char_t* name);
enum qse_cmgr_id_t
{
QSE_CMGR_SLMB,
- QSE_CMGR_UTF8
+ QSE_CMGR_UTF8,
+ QSE_CMGR_MB8
+
#if defined(QSE_ENABLE_XCMGRS)
,
QSE_CMGR_CP949,
@@ -59,7 +61,7 @@ QSE_EXPORT qse_cmgr_t* qse_findcmgrbyid (
/**
* The qse_getfindcmgr() function find a built-in cmgr matching a given
* @a name and returns it. It returns #QSE_NULL if no match is found.
- * The @a name can be one of "utf8", "slmb", "cp949", "cp950", and an
+ * The @a name can be one of "slmb", "utf8", "mb8", "cp949", "cp950", and an
* empty string. Calling this function with an empty string is the same
* as calling qse_getdflcmgr().
*/
diff --git a/qse/lib/awk/fnc.c b/qse/lib/awk/fnc.c
index 73c30377..11e6cf0c 100644
--- a/qse/lib/awk/fnc.c
+++ b/qse/lib/awk/fnc.c
@@ -225,7 +225,7 @@ static int fnc_close (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
if (name == QSE_NULL) return -1;
}
- if (a1 != QSE_NULL)
+ if (a1)
{
if (a1->type == QSE_AWK_VAL_STR)
{
@@ -246,7 +246,7 @@ static int fnc_close (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
if (len == 0)
{
- /* getline or print doesn't allow an emptry for the
+ /* getline or print doesn't allow an empty string for the
* input or output file name. so close should not allow
* it either.
* another reason for this is if close is called explicitly
@@ -346,8 +346,16 @@ static int fnc_fflush (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
if (nargs == 0)
{
- /* flush the console output.
- * fflush() should return -1 on errors */
+ /* fflush() flushes the console output.
+ * fflush() should return -1 on errors.
+ *
+ * if no previous console output statement is seen,
+ * this function won't be able to find the entry.
+ * so it returns -1;
+ *
+ * BEGIN { flush(); } # flush() returns -1
+ * BEGIN { print 1; flush(); } # flush() returns 0
+ */
n = qse_awk_rtx_flushio (run, QSE_AWK_OUT_CONSOLE, QSE_T(""));
}
else
@@ -380,17 +388,43 @@ static int fnc_fflush (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
ptr++;
}
- /* flush the given rio */
+ /* flush the given rio.
+ *
+ * flush("") flushes all output streams regardless of names.
+ * pass QSE_NULL for the name in that case so that the
+ * callee matches any streams.
+ *
+ * fflush() doesn't specify the type of output streams
+ * so it attemps to flush all types of output streams.
+ *
+ * though not useful, it's possible to have multiple
+ * streams with the same name but of different types.
+ *
+ * BEGIN {
+ * print 1 | "/tmp/x";
+ * print 1 > "/tmp/x";
+ * fflush("/tmp/x");
+ * }
+ */
+
n = flush_io (
- run, QSE_AWK_RIO_FILE,
+ run, QSE_AWK_OUT_FILE,
((len0 == 0)? QSE_NULL: str0), 1);
/*if (n == -99) return -1;*/
n = flush_io (
- run, QSE_AWK_RIO_PIPE,
+ run, QSE_AWK_OUT_APFILE,
+ ((len0 == 0)? QSE_NULL: str0), n);
+ /*if (n == -99) return -1;*/
+ n = flush_io (
+ run, QSE_AWK_OUT_PIPE,
+ ((len0 == 0)? QSE_NULL: str0), n);
+ /*if (n == -99) return -1;*/
+ n = flush_io (
+ run, QSE_AWK_OUT_RWPIPE,
((len0 == 0)? QSE_NULL: str0), n);
/*if (n == -99) return -1;*/
- /* if n remains 1, no ip handlers have been defined for
+ /* if n remains 1, no io handlers have been defined for
* file, pipe, and rwpipe. so make fflush return -1.
* if n is -2, no such named io has been found at all
* if n is -1, the io handler has returned an error */
diff --git a/qse/lib/awk/rio.c b/qse/lib/awk/rio.c
index 782c2daa..bf0049a9 100644
--- a/qse/lib/awk/rio.c
+++ b/qse/lib/awk/rio.c
@@ -114,7 +114,7 @@ static int find_rio_in (
}
/* search the chain for exiting an existing io name */
- while (p != QSE_NULL)
+ while (p)
{
if (p->type == (io_type | io_mask) &&
qse_strcmp (p->name,name) == 0) break;
@@ -688,7 +688,7 @@ int qse_awk_rtx_writeio_str (
}
/* look for the corresponding rio for name */
- while (p != QSE_NULL)
+ while (p)
{
/* the file "1.tmp", in the following code snippets,
* would be opened by the first print statement, but not by
@@ -799,7 +799,7 @@ int qse_awk_rtx_flushio (
{
qse_awk_rio_arg_t* p = run->rio.chain;
qse_awk_rio_impl_t handler;
- int io_type, /*io_mode,*/ io_mask;
+ int io_type, io_mode, io_mask;
qse_ssize_t n;
int ok = 0;
@@ -809,7 +809,7 @@ int qse_awk_rtx_flushio (
/* translate the out_type into the relevant I/O type and mode */
io_type = out_type_map[out_type];
- /*io_mode = out_mode_map[out_type];*/
+ io_mode = out_mode_map[out_type];
io_mask = out_mask_map[out_type];
handler = run->rio.handler[io_type];
@@ -821,9 +821,13 @@ int qse_awk_rtx_flushio (
}
/* look for the corresponding rio for name */
- while (p != QSE_NULL)
+ while (p)
{
- if (p->type == (io_type | io_mask) &&
+ /* without the check for io_mode and p->mode,
+ * QSE_AWK_OUT_FILE and QSE_AWK_OUT_APFILE matches the
+ * same entry since (io_type | io_mask) has the same value
+ * for both. */
+ if (p->type == (io_type | io_mask) && p->mode == io_mode &&
(name == QSE_NULL || qse_strcmp(p->name,name) == 0))
{
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOERR, QSE_NULL);
@@ -1123,7 +1127,7 @@ int qse_awk_rtx_closeio (
{
qse_awk_rio_arg_t* p = rtx->rio.chain, * px = QSE_NULL;
- while (p != QSE_NULL)
+ while (p)
{
/* it handles the first that matches the given name
* regardless of the io type */
diff --git a/qse/lib/cmn/Makefile.am b/qse/lib/cmn/Makefile.am
index c4b1f2b7..a607e0f0 100644
--- a/qse/lib/cmn/Makefile.am
+++ b/qse/lib/cmn/Makefile.am
@@ -45,6 +45,7 @@ libqsecmn_la_SOURCES = \
ipad.c \
lda.c \
main.c \
+ mb8.c \
mbwc.c \
mbwc-str.c \
mem.c \
diff --git a/qse/lib/cmn/Makefile.in b/qse/lib/cmn/Makefile.in
index ece3ff1e..7c833501 100644
--- a/qse/lib/cmn/Makefile.in
+++ b/qse/lib/cmn/Makefile.in
@@ -88,15 +88,15 @@ libqsecmn_la_DEPENDENCIES = $(am__DEPENDENCIES_1)
am__libqsecmn_la_SOURCES_DIST = alg-base64.c alg-rand.c alg-search.c \
alg-sort.c assert.c chr.c dir.c dll.c env.c gdl.c htb.c fio.c \
fma.c fmt.c fs.c fs-err.c fs-move.c glob.c hton.c ipad.c lda.c \
- main.c mbwc.c mbwc-str.c mem.c mux.c nwad.c nwad-skad.c nwif.c \
- nwif-cfg.c nwio.c oht.c opt.c path-basename.c path-canon.c \
- pio.c pma.c rbt.c rex.c sio.c sll.c slmb.c stdio.c str-beg.c \
- str-cat.c str-chr.c str-cnv.c str-cmp.c str-cpy.c str-del.c \
- str-dup.c str-dynm.c str-dynw.c str-end.c str-excl.c \
- str-fcpy.c str-fnmat.c str-incl.c str-len.c str-pac.c \
- str-pbrk.c str-put.c str-rev.c str-rot.c str-set.c str-spl.c \
- str-spn.c str-str.c str-subst.c str-tok.c str-trm.c str-word.c \
- task.c time.c tio.c tre.c tre-ast.c tre-compile.c \
+ main.c mb8.c mbwc.c mbwc-str.c mem.c mux.c nwad.c nwad-skad.c \
+ nwif.c nwif-cfg.c nwio.c oht.c opt.c path-basename.c \
+ path-canon.c pio.c pma.c rbt.c rex.c sio.c sll.c slmb.c \
+ stdio.c str-beg.c str-cat.c str-chr.c str-cnv.c str-cmp.c \
+ str-cpy.c str-del.c str-dup.c str-dynm.c str-dynw.c str-end.c \
+ str-excl.c str-fcpy.c str-fnmat.c str-incl.c str-len.c \
+ str-pac.c str-pbrk.c str-put.c str-rev.c str-rot.c str-set.c \
+ str-spl.c str-spn.c str-str.c str-subst.c str-tok.c str-trm.c \
+ str-word.c task.c time.c tio.c tre.c tre-ast.c tre-compile.c \
tre-match-backtrack.c tre-match-parallel.c tre-parse.c \
tre-stack.c uri.c utf8.c xma.c uni.c cp949.c cp950.c
@ENABLE_BUNDLED_UNICODE_TRUE@am__objects_1 = uni.lo
@@ -104,17 +104,17 @@ am__libqsecmn_la_SOURCES_DIST = alg-base64.c alg-rand.c alg-search.c \
am_libqsecmn_la_OBJECTS = alg-base64.lo alg-rand.lo alg-search.lo \
alg-sort.lo assert.lo chr.lo dir.lo dll.lo env.lo gdl.lo \
htb.lo fio.lo fma.lo fmt.lo fs.lo fs-err.lo fs-move.lo glob.lo \
- hton.lo ipad.lo lda.lo main.lo mbwc.lo mbwc-str.lo mem.lo \
- mux.lo nwad.lo nwad-skad.lo nwif.lo nwif-cfg.lo nwio.lo oht.lo \
- opt.lo path-basename.lo path-canon.lo pio.lo pma.lo rbt.lo \
- rex.lo sio.lo sll.lo slmb.lo stdio.lo str-beg.lo str-cat.lo \
- str-chr.lo str-cnv.lo str-cmp.lo str-cpy.lo str-del.lo \
- str-dup.lo str-dynm.lo str-dynw.lo str-end.lo str-excl.lo \
- str-fcpy.lo str-fnmat.lo str-incl.lo str-len.lo str-pac.lo \
- str-pbrk.lo str-put.lo str-rev.lo str-rot.lo str-set.lo \
- str-spl.lo str-spn.lo str-str.lo str-subst.lo str-tok.lo \
- str-trm.lo str-word.lo task.lo time.lo tio.lo tre.lo \
- tre-ast.lo tre-compile.lo tre-match-backtrack.lo \
+ hton.lo ipad.lo lda.lo main.lo mb8.lo mbwc.lo mbwc-str.lo \
+ mem.lo mux.lo nwad.lo nwad-skad.lo nwif.lo nwif-cfg.lo nwio.lo \
+ oht.lo opt.lo path-basename.lo path-canon.lo pio.lo pma.lo \
+ rbt.lo rex.lo sio.lo sll.lo slmb.lo stdio.lo str-beg.lo \
+ str-cat.lo str-chr.lo str-cnv.lo str-cmp.lo str-cpy.lo \
+ str-del.lo str-dup.lo str-dynm.lo str-dynw.lo str-end.lo \
+ str-excl.lo str-fcpy.lo str-fnmat.lo str-incl.lo str-len.lo \
+ str-pac.lo str-pbrk.lo str-put.lo str-rev.lo str-rot.lo \
+ str-set.lo str-spl.lo str-spn.lo str-str.lo str-subst.lo \
+ str-tok.lo str-trm.lo str-word.lo task.lo time.lo tio.lo \
+ tre.lo tre-ast.lo tre-compile.lo tre-match-backtrack.lo \
tre-match-parallel.lo tre-parse.lo tre-stack.lo uri.lo utf8.lo \
xma.lo $(am__objects_1) $(am__objects_2)
libqsecmn_la_OBJECTS = $(am_libqsecmn_la_OBJECTS)
@@ -355,7 +355,7 @@ noinst_HEADERS = \
libqsecmn_la_SOURCES = alg-base64.c alg-rand.c alg-search.c alg-sort.c \
assert.c chr.c dir.c dll.c env.c gdl.c htb.c fio.c fma.c fmt.c \
fs.c fs-err.c fs-move.c glob.c hton.c ipad.c lda.c main.c \
- mbwc.c mbwc-str.c mem.c mux.c nwad.c nwad-skad.c nwif.c \
+ mb8.c mbwc.c mbwc-str.c mem.c mux.c nwad.c nwad-skad.c nwif.c \
nwif-cfg.c nwio.c oht.c opt.c path-basename.c path-canon.c \
pio.c pma.c rbt.c rex.c sio.c sll.c slmb.c stdio.c str-beg.c \
str-cat.c str-chr.c str-cnv.c str-cmp.c str-cpy.c str-del.c \
@@ -475,6 +475,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ipad.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lda.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mb8.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbwc-str.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbwc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mem.Plo@am__quote@
diff --git a/qse/lib/cmn/mb8.c b/qse/lib/cmn/mb8.c
new file mode 100644
index 00000000..c1a7a06f
--- /dev/null
+++ b/qse/lib/cmn/mb8.c
@@ -0,0 +1,38 @@
+/*
+ * $Id$
+ *
+ Copyright 2006-2012 Chung, Hyung-Hwan.
+ This file is part of QSE.
+
+ QSE is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as
+ published by the Free Software Foundation, either version 3 of
+ the License, or (at your option) any later version.
+
+ QSE is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with QSE. If not, see .
+ */
+
+#include
+
+qse_size_t qse_wctomb8 (qse_wchar_t wc, qse_mchar_t* utf8, qse_size_t size)
+{
+ if (size <= 0) return size + 1; /* buffer too small */
+ if (wc > QSE_TYPE_MAX(qse_uint8_t)) return 0; /* illegal character */
+ if (utf8) *(qse_uint8_t*)utf8 = wc;
+ return 1;
+}
+
+qse_size_t qse_mb8towc (
+ const qse_mchar_t* utf8, qse_size_t size, qse_wchar_t* wc)
+{
+ QSE_ASSERT (utf8 != QSE_NULL);
+ QSE_ASSERT (size > 0);
+ *wc = *(const qse_uint8_t*)utf8;
+ return 1;
+}
diff --git a/qse/lib/cmn/mbwc.c b/qse/lib/cmn/mbwc.c
index 1b39c73d..f52efbe9 100644
--- a/qse/lib/cmn/mbwc.c
+++ b/qse/lib/cmn/mbwc.c
@@ -21,6 +21,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -30,31 +31,18 @@
* dependent.
*/
-/* TODO: binary cmgr -> simply expands a byte to wchar and vice versa. */
-
static qse_cmgr_t builtin_cmgr[] =
{
- {
- qse_slmbtoslwc,
- qse_slwctoslmb
- },
-
+ /* keep the order aligned with qse_cmgr_id_t values in */
+ { qse_slmbtoslwc, qse_slwctoslmb },
+ { qse_utf8touc, qse_uctoutf8 },
+ { qse_mb8towc, qse_wctomb8 }
#if defined(QSE_ENABLE_XCMGRS)
- {
- qse_cp949touc,
- qse_uctocp949
- },
-
- {
- qse_cp950touc,
- qse_uctocp950
- },
+ ,
+ { qse_cp949touc, qse_uctocp949 },
+ { qse_cp950touc, qse_uctocp950 }
#endif
- {
- qse_utf8touc,
- qse_uctoutf8
- }
};
static qse_cmgr_t* dfl_cmgr = &builtin_cmgr[QSE_CMGR_SLMB];
@@ -84,6 +72,8 @@ qse_cmgr_t* qse_findcmgrbyid (qse_cmgr_id_t id)
qse_cmgr_t* qse_findcmgr (const qse_char_t* name)
{
+ /* TODO: binary search or something better for performance improvement
+ * when there are many entries in the table */
static struct
{
const qse_char_t* name;
@@ -95,7 +85,8 @@ qse_cmgr_t* qse_findcmgr (const qse_char_t* name)
{ QSE_T("cp949"), QSE_CMGR_CP949 },
{ QSE_T("cp950"), QSE_CMGR_CP950 },
#endif
- { QSE_T("slmb"), QSE_CMGR_UTF8 }
+ { QSE_T("slmb"), QSE_CMGR_SLMB },
+ { QSE_T("mb8"), QSE_CMGR_MB8 }
};
if (name)