fixed a bug in awk's built-in function fflush().

added the mb8 conversion routines for raw byte conversion
This commit is contained in:
hyung-hwan 2013-01-16 05:04:56 +00:00
parent 444abec9fe
commit 121ed07cf0
12 changed files with 508 additions and 256 deletions

View File

@ -30,6 +30,41 @@ The following code snippet is a valid QSEAWK program that print the string
} }
~~~~~ ~~~~~
In general, QSEAWK starts executing the *BEGIN* blocks. For each input record
from an input stream, it executes the pattern-action blocks if the pattern
evaluates to true. Finally, it executes the *END* blocks. By default, each
line in the input stream is an input record. None of these blocks are
mandatory. However, a useful program needs at least 1 block to be present.
For the following input records,
~~~~~{.txt}
abcdefgahijklmn
1234567890
opqrstuvwxyz
~~~~~
this AWK program produces
~~~~~{.awk}
BEGIN { mr=0; }
/abc|vwx/ { print $0; mr++; }
END {
print "total records: " NR;
print "matching records: " mr;
}
~~~~~
this output text.
~~~~~{.txt}
abcdefgahijklmn
opqrstuvwxyz
total records: 3
matching records: 2
~~~~~
The QSEAWK library provides a capability to use a use a user-defined function
as an entry point instead of executing these blocks. See \ref awk-embed for
how to change the entry point.
Comments Comments
-------- --------
@ -221,15 +256,15 @@ to a non-zero value. This is possible if you allow assigning a map to
another non-map variable with #QSE_AWK_MAPTOVAR. In this case, a map another non-map variable with #QSE_AWK_MAPTOVAR. In this case, a map
is not deep-copied but the reference to it is copied. is not deep-copied but the reference to it is copied.
@code ~~~~~{.awk}
BEGIN { BEGIN {
a[10]=20; a[10]=20;
b=a; b=a;
b[20]=40; b[20]=40;
for (i in a) print i, a[i]; for (i in a) print i, a[i];
print a===b; print a===b;
} }
@endcode ~~~~~
The === operator may be also useful when you want to indicate an error The === operator may be also useful when you want to indicate an error
@ -237,21 +272,21 @@ with an uninitialized variable. The following code check if the function
returned a map. Since the variable 'nil' has never been assigned, its returned a map. Since the variable 'nil' has never been assigned, its
internal type is 'NIL' and internal type is 'NIL' and
@code ~~~~~{.awk}
function a () function a ()
{ {
x[10] = 2; x[10] = 2;
return x; return x;
} }
BEGIN { BEGIN {
t = a(); t = a();
if (t === nil) if (t === nil)
print "nil"; print "nil";
else else
print "ok"; print "ok";
} }
@endcode. ~~~~~
The !== operator is a negated form of the === operator. The !== operator is a negated form of the === operator.
@ -287,14 +322,18 @@ stream being processed. The directive can only be used at the outmost scope
where global variable declarations, *BEGIN*, *END*, and/or pattern-action where global variable declarations, *BEGIN*, *END*, and/or pattern-action
blocks appear. blocks appear.
@include "abc.awk" ~~~~~{.awk}
BEGIN { func_in_abc (); } @include "abc.awk"
BEGIN { func_in_abc (); }
~~~~~
A semicolon is optional after the included file name. The following is the A semicolon is optional after the included file name. The following is the
same as the sample above. same as the sample above.
@include "abc.awk"; ~~~~~{.awk}
BEGIN { func_in_abc(); } @include "abc.awk";
BEGIN { func_in_abc(); }
~~~~~
If #QSE_AWK_NEWLINE is off, the semicolon is required. If #QSE_AWK_NEWLINE is off, the semicolon is required.
@ -330,41 +369,42 @@ of expressions separated with a comma. Each expression in the group is
evaluated in the appearing order. The evaluation result of the last evaluated in the appearing order. The evaluation result of the last
expression in the group is returned as that of the group. expression in the group is returned as that of the group.
@code ~~~~~{.awk}
BEGIN { BEGIN {
c = (1, 2, 9); c = (1, 2, 9);
a=((1*c, 3*c), (3 - c), ((k = 6+(c+1, c+2)), (-7 * c))); a=((1*c, 3*c), (3 - c), ((k = 6+(c+1, c+2)), (-7 * c)));
print c; # 9; print c; # 9;
print a; # -63 print a; # -63
print k; # 17 print k; # 17
} }
@endcode ~~~~~
### RETURN ### ### RETURN ###
The return statement is valid in pattern-action blocks as well as in functions. The return statement is valid in pattern-action blocks as well as in functions.
The execution of a calling block is aborted once the return statement is executed. The execution of a calling block is aborted once the return statement is executed.
@code ~~~~~
$ qseawk 'BEGIN { return 20; }' ; echo $? $ qseawk 'BEGIN { return 20; }' ; echo $?
20 20
#endcode ~~~~~
If #QSE_AWK_MAPTOVAR is on, you can return an arrayed value from a function. If #QSE_AWK_MAPTOVAR is on, you can return an arrayed value from a function.
@code
function getarray() {
@local a;
a["one"] = 1;
a["two"] = 2;
a["three"] = 3;
return a;
}
BEGIN { ~~~~~{.awk}
@local x; function getarray() {
x = getarray(); @local a;
for (i in x) print i, x[i]; a["one"] = 1;
} a["two"] = 2;
@endcode a["three"] = 3;
return a;
}
BEGIN {
@local x;
x = getarray();
for (i in x) print i, x[i];
}
~~~~~
### RESET ### ### RESET ###
@ -373,14 +413,14 @@ After that, the array variable can also be used as a scalar variable again.
You must have #QSE_AWK_RESET on to be able to be able to use this You must have #QSE_AWK_RESET on to be able to be able to use this
statement. statement.
@code ~~~~~{.awk}
BEGIN { BEGIN {
a[1] = 20; a[1] = 20;
reset a; reset a;
a = 20; # this is legal a = 20; # this is legal
print a; print a;
} }
@endcode ~~~~~
### ABORT ### ### ABORT ###
The abort statment is similar to the exit statement except that The abort statment is similar to the exit statement except that
@ -551,9 +591,11 @@ For this reason, you are advised to parenthesize *getline* and its related
components to avoid confusion whenever necessary. The example reading into components to avoid confusion whenever necessary. The example reading into
the variable *line* can be made clearer with parenthesization. the variable *line* can be made clearer with parenthesization.
BEGIN { ~~~~~{.awk}
while ((getline line) > 0) print line; BEGIN {
} while ((getline line) > 0) print line;
}
~~~~~
### print ### ### print ###
**TODO** **TODO**
@ -565,30 +607,34 @@ they are function calls. In this mode, they return a negative number
on failure and a zero on success and any I/O failure doesn't abort on failure and a zero on success and any I/O failure doesn't abort
a running program. a running program.
BEGIN { ~~~~~{.awk}
a = print "hello, world" > "/dev/null"; BEGIN {
print a; a = print "hello, world" > "/dev/null";
a = print ("hello, world") > "/dev/null"; print a;
print a; a = print ("hello, world") > "/dev/null";
} print a;
}
~~~~~
Since print and printf are like function calls, you can use them Since print and printf are like function calls, you can use them
in any context where a normal expression is allowed. For example, in any context where a normal expression is allowed. For example,
printf is used as a conditional expression in an 'if' statement printf is used as a conditional expression in an 'if' statement
in the sample code below. in the sample code below.
BEGIN { ~~~~~{.awk}
if ((printf "hello, world\n" || "tcp://127.0.0.1:9999") <= -1) BEGIN {
print "FAILURE"; if ((printf "hello, world\n" || "tcp://127.0.0.1:9999") <= -1)
else print "FAILURE";
print "SUCCESS"; else
} print "SUCCESS";
}
~~~~~
### close (io-name, what) ### ### close (io-name, what) ###
The *close* function closes a stream indicated by the name *io-name*. It takes The *close* function closes a stream indicated by the name *io-name*.
an optional parameter *what* indicating whether input or output should be It takes an optional parameter *what* indicating whether input or output
closed. should be closed.
If *io-name* is a file, it closes the file handle associated; If *io-name* is a file, it closes the file handle associated;
If *io-name* is a command, it may kill the running process from the command, If *io-name* is a command, it may kill the running process from the command,
@ -597,12 +643,64 @@ If *io-name* is a network stream, it tears down connections to the network
peer and closes the socket handles. peer and closes the socket handles.
The optional paramenter *what* must be one of *r* or *w* when used is useful The optional paramenter *what* must be one of *r* or *w* when used is useful
when *io-name* is a command invoked for the two-way operator. The value of when *io-name* is a command invoked for the two-way pipe operator. The value
*r* causes the function to close the read-end of the pipe and the value of of *r* causes the function to close the read-end of the pipe and the value of
*w* causes the function to close the write-end of the pipe. *w* causes the function to close the write-end of the pipe.
The function returns 0 on success and -1 on failure. The function returns 0 on success and -1 on failure.
Though not so useful, it is possible to create more than 1 streams of different
kinds under the same name. It is undefined which stream *close*
should close in the following program.
~~~~~{.awk}
BEGIN {
"/tmp/x" || getline y; # rwpipe stream
print 1 | "/tmp/x"; # pipe stream
print 1 > "/tmp/x"; # file stream
close ("/tmp/x");
}
~~~~~
### fflush (io-name) ###
The *fflush* function flushes the output stream indicated by *io-name*.
If *io-name* is not specified, it flushes the open console output stream.
If *io-name* is an empty stream, it flushes all open output streams.
It returns 0 on success and -1 on failure.
QSEAWK doesn't open the console output stream before it executes any output
commands like *print* or *printf*. so fflush() returns -1 in the following
program.
~~~~~{.awk}
BEGIN {
fflush();
}
~~~~~
The *print* command is executed before fflush() in the following program.
When fflush() is executed, the output stream is open. so fflush() returns 0.
~~~~~{.awk}
BEGIN {
print 1;
fflush();
}
~~~~~
Though not so useful, it is possible to create more than 1 output streams
of different kinds under the same name. *fflush* in the following program
flushes both the file stream and the pipe stream.
~~~~~{.awk}
BEGIN {
print 1 | "/tmp/x"; # file stream
print 1 > "/tmp/x"; # pipe stream
fflush ("/tmp/x");
}
~~~~~
### setioattr (io-name, attr-name, attr-value) ### ### setioattr (io-name, attr-name, attr-value) ###
The *setioattr* function changes the I/O attribute of the name *attr-name* to The *setioattr* function changes the I/O attribute of the name *attr-name* to
@ -614,17 +712,19 @@ success and -1 on failure.
- *attr-name* is one of *codepage*, *ctimeout*, *atimeout*, *rtimeout*, - *attr-name* is one of *codepage*, *ctimeout*, *atimeout*, *rtimeout*,
*wtimeout*. *wtimeout*.
- *attr-value* varies depending on *attr-name*. - *attr-value* varies depending on *attr-name*.
+ codepage: *cp949*, *cp950*, *utf8* + codepage: *cp949*, *cp950*, *utf8*, *slmb*, *mb8*
+ ctimeout, atimeout, rtimeout, wtimeout: the number of seconds. effective + ctimeout, atimeout, rtimeout, wtimeout: the number of seconds. effective
on socket based streams only. you may use a floating-point number for on socket based streams only. you may use a floating-point number for
lower resoluation than a second. a negative value turns off timeout. lower resoluation than a second. a negative value turns off timeout.
See this sample that prints the contents of a document encoded in cp949. See this sample that prints the contents of a document encoded in cp949.
BEGIN { ~~~~~{.awk}
setioattr ("README.TXT", "codepage", "cp949"); BEGIN {
while ((getline x < "README.TXT") > 0) print x; setioattr ("README.TXT", "codepage", "cp949");
} while ((getline x < "README.TXT") > 0) print x;
}
~~~~~
### getioattr (io-name, attr-name, attr-value) ### ### getioattr (io-name, attr-name, attr-value) ###
@ -634,12 +734,14 @@ is set to the variable referenced by *attr-value*. See *setioattr* for
description on *io-name* and *attr-name*. It returns 0 on success and -1 on description on *io-name* and *attr-name*. It returns 0 on success and -1 on
failure. failure.
BEGIN { ~~~~~{.awk}
setioattr ("README.TXT", "codepage", "cp949"); BEGIN {
if (getioattr ("README.TXT", "codepage", codepage) <= -1) setioattr ("README.TXT", "codepage", "cp949");
print "codepage unknown"; if (getioattr ("README.TXT", "codepage", codepage) <= -1)
else print "codepage: " codepage; print "codepage unknown";
} else print "codepage: " codepage;
}
~~~~~
### Two-way Pipe ### ### Two-way Pipe ###
@ -649,17 +751,19 @@ must be set with #QSE_AWK_RWPIPE to be able to use the two-way pipe.
The example redirects the output of *print* to the external *sort* command The example redirects the output of *print* to the external *sort* command
and reads back the output. and reads back the output.
BEGIN { ~~~~~{.awk}
print "15" || "sort"; BEGIN {
print "14" || "sort"; print "15" || "sort";
print "13" || "sort"; print "14" || "sort";
print "12" || "sort"; print "13" || "sort";
print "11" || "sort"; print "12" || "sort";
# close the input side of the pipe as 'sort' starts emitting result print "11" || "sort";
# once the input is closed. # close the input side of the pipe as 'sort' starts emitting result
close ("sort", "r"); # once the input is closed.
while (("sort" || getline x) > 0) print x; close ("sort", "r");
} while (("sort" || getline x) > 0) print x;
}
~~~~~
This two-way pipe can create a TCP or UDP connection if the pipe command This two-way pipe can create a TCP or UDP connection if the pipe command
string is prefixed with one of the followings: string is prefixed with one of the followings:
@ -671,100 +775,106 @@ string is prefixed with one of the followings:
See this example. See this example.
BEGIN { ~~~~~{.awk}
# it binds a TCP socket to the IPv6 address :: and the port number BEGIN {
# 9999 and waits for the first coming connection. It repeats writing # it binds a TCP socket to the IPv6 address :: and the port number
# "hello world" to the first connected peer and reading a line from # 9999 and waits for the first coming connection. It repeats writing
# it until the session is torn down. # "hello world" to the first connected peer and reading a line from
do { # it until the session is torn down.
print "hello world" || "tcpd://[::]:9999"; do {
if (("tcpd://[::]:9999" || getline x) <= 0) break; print "hello world" || "tcpd://[::]:9999";
print x; if (("tcpd://[::]:9999" || getline x) <= 0) break;
} print x;
while(1); }
} while(1);
}
~~~~~
You can manipulate TCP or UDP timeouts for connection, accepting, reading, and You can manipulate TCP or UDP timeouts for connection, accepting, reading, and
writing with the *setioattr* function and the *getioattr* function. writing with the *setioattr* function and the *getioattr* function.
See the example below. See the example below.
BEGIN { ~~~~~{.awk}
setioattr ("tcp://127.0.0.1:9999", "ctimeout", 3); BEGIN {
setioattr ("tcp://127.0.0.1:9999", "rtimeout", 5.5); setioattr ("tcp://127.0.0.1:9999", "ctimeout", 3);
print "hello world" || "tcp://127.0.0.1:9999"; setioattr ("tcp://127.0.0.1:9999", "rtimeout", 5.5);
"tcp://127.0.0.1:9999" || getline x; print "hello world" || "tcp://127.0.0.1:9999";
print x; "tcp://127.0.0.1:9999" || getline x;
} print x;
}
~~~~~
Here is an interesting example adopting Michael Sanders' AWK web server, Here is an interesting example adopting Michael Sanders' AWK web server,
modified for QSEAWK. modified for QSEAWK.
# ~~~~~{.awk}
# Michael Sanders' AWK web server for QSEAWK. #
# Orginal code in http://awk.info/?tools/server # Michael Sanders' AWK web server for QSEAWK.
# # Orginal code in http://awk.info/?tools/server
# qseawk --tolerant=on --rwpipe=on webserver.awk #
# # qseawk --tolerant=on --rwpipe=on webserver.awk
BEGIN { #
x = 1 # script exits if x < 1 BEGIN {
port = 8080 # port number x = 1 # script exits if x < 1
host = "tcpd://0.0.0.0:" port # host string port = 8080 # port number
url = "http://localhost:" port # server url host = "tcpd://0.0.0.0:" port # host string
status = 200 # 200 == OK url = "http://localhost:" port # server url
reason = "OK" # server response status = 200 # 200 == OK
RS = ORS = "\r\n" # header line terminators reason = "OK" # server response
doc = Setup() # html document RS = ORS = "\r\n" # header line terminators
len = length(doc) + length(ORS) # length of document doc = Setup() # html document
while (x) { len = length(doc) + length(ORS) # length of document
if ($1 == "GET") RunApp(substr($2, 2)) while (x) {
if (! x) break if ($1 == "GET") RunApp(substr($2, 2))
print "HTTP/1.0", status, reason || host if (! x) break
print "Connection: Close" || host
print "Pragma: no-cache" || host
print "Content-length:", len || host
print ORS doc || host
close(host) # close client connection
host || getline # wait for new client request
}
# server terminated...
doc = Bye()
len = length(doc) + length(ORS)
print "HTTP/1.0", status, reason || host print "HTTP/1.0", status, reason || host
print "Connection: Close" || host print "Connection: Close" || host
print "Pragma: no-cache" || host print "Pragma: no-cache" || host
print "Content-length:", len || host print "Content-length:", len || host
print ORS doc || host print ORS doc || host
close(host) close(host) # close client connection
} host || getline # wait for new client request
}
function Setup() { # server terminated...
tmp = "<html>\ doc = Bye()
<head><title>Simple gawk server</title></head>\ len = length(doc) + length(ORS)
<body>\ print "HTTP/1.0", status, reason || host
<p><a href=" url "/xterm>xterm</a>\ print "Connection: Close" || host
<p><a href=" url "/xcalc>xcalc</a>\ print "Pragma: no-cache" || host
<p><a href=" url "/xload>xload</a>\ print "Content-length:", len || host
<p><a href=" url "/exit>terminate script</a>\ print ORS doc || host
</body>\ close(host)
</html>" }
return tmp
} function Setup() {
tmp = "<html>\
function Bye() { <head><title>Simple gawk server</title></head>\
tmp = "<html>\ <body>\
<head><title>Simple gawk server</title></head>\ <p><a href=" url "/xterm>xterm</a>\
<body><p>Script Terminated...</body>\ <p><a href=" url "/xcalc>xcalc</a>\
</html>" <p><a href=" url "/xload>xload</a>\
return tmp <p><a href=" url "/exit>terminate script</a>\
} </body>\
</html>"
function RunApp(app) { return tmp
if (app == "xterm") {system("xterm&"); return} }
if (app == "xcalc" ) {system("xcalc&"); return}
if (app == "xload" ) {system("xload&"); return} function Bye() {
if (app == "exit") {x = 0} tmp = "<html>\
} <head><title>Simple gawk server</title></head>\
<body><p>Script Terminated...</body>\
</html>"
return tmp
}
function RunApp(app) {
if (app == "xterm") {system("xterm&"); return}
if (app == "xcalc" ) {system("xcalc&"); return}
if (app == "xload" ) {system("xload&"); return}
if (app == "exit") {x = 0}
}
~~~~~
### I/O Character Encoding ### ### I/O Character Encoding ###

View File

@ -1,7 +1,7 @@
QSE {#mainpage} QSE {#mainpage}
================================================================================ ================================================================================
@image html qse-logo.png \image html qse-logo.png
The QSE library implements AWK, SED, and Unix commands in an embeddable form The QSE library implements AWK, SED, and Unix commands in an embeddable form
and defines data types, functions, and classes that you can use when you embed and defines data types, functions, and classes that you can use when you embed
@ -20,10 +20,10 @@ Chung, Hyung-Hwan <hyunghwan.chung@gmail.com>
See the subpages for more information. See the subpages for more information.
- @ref installation - \ref installation
- @ref awk-lang - \ref awk-lang
- @ref awk-embed - \ref awk-embed
- @ref sed-cmd - \ref sed-cmd
- @ref sed-embed - \ref sed-embed
- @subpage mem "Memory Management" - \subpage mem "Memory Management"

View File

@ -20,6 +20,7 @@ pkginclude_HEADERS = \
lda.h \ lda.h \
main.h \ main.h \
map.h \ map.h \
mb8.h \
mbwc.h \ mbwc.h \
mem.h \ mem.h \
mux.h \ mux.h \

View File

@ -54,10 +54,10 @@ SOURCES =
DIST_SOURCES = DIST_SOURCES =
am__pkginclude_HEADERS_DIST = alg.h chr.h cp949.h cp950.h dir.h dll.h \ am__pkginclude_HEADERS_DIST = alg.h chr.h cp949.h cp950.h dir.h dll.h \
env.h fio.h fma.h fmt.h fs.h gdl.h glob.h htb.h hton.h ipad.h \ env.h fio.h fma.h fmt.h fs.h gdl.h glob.h htb.h hton.h ipad.h \
lda.h main.h map.h mbwc.h mem.h mux.h nwad.h nwif.h nwio.h \ lda.h main.h map.h mb8.h mbwc.h mem.h mux.h nwad.h nwif.h \
oht.h opt.h path.h pio.h pma.h rbt.h rex.h sio.h sll.h slmb.h \ nwio.h oht.h opt.h path.h pio.h pma.h rbt.h rex.h sio.h sll.h \
stdio.h str.h task.h time.h tio.h tre.h uni.h uri.h utf8.h \ slmb.h stdio.h str.h task.h time.h tio.h tre.h uni.h uri.h \
xma.h Mmgr.hpp StdMmgr.hpp Mmged.hpp utf8.h xma.h Mmgr.hpp StdMmgr.hpp Mmged.hpp
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
am__vpath_adj = case $$p in \ am__vpath_adj = case $$p in \
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
@ -266,8 +266,8 @@ top_builddir = @top_builddir@
top_srcdir = @top_srcdir@ top_srcdir = @top_srcdir@
pkginclude_HEADERS = alg.h chr.h cp949.h cp950.h dir.h dll.h env.h \ pkginclude_HEADERS = alg.h chr.h cp949.h cp950.h dir.h dll.h env.h \
fio.h fma.h fmt.h fs.h gdl.h glob.h htb.h hton.h ipad.h lda.h \ fio.h fma.h fmt.h fs.h gdl.h glob.h htb.h hton.h ipad.h lda.h \
main.h map.h mbwc.h mem.h mux.h nwad.h nwif.h nwio.h oht.h \ main.h map.h mb8.h mbwc.h mem.h mux.h nwad.h nwif.h nwio.h \
opt.h path.h pio.h pma.h rbt.h rex.h sio.h sll.h slmb.h \ oht.h opt.h path.h pio.h pma.h rbt.h rex.h sio.h sll.h slmb.h \
stdio.h str.h task.h time.h tio.h tre.h uni.h uri.h utf8.h \ stdio.h str.h task.h time.h tio.h tre.h uni.h uri.h utf8.h \
xma.h $(am__append_1) xma.h $(am__append_1)
all: all-am all: all-am

70
qse/include/qse/cmn/mb8.h Normal file
View File

@ -0,0 +1,70 @@
/*
* $Id$
*
Copyright 2006-2012 Chung, Hyung-Hwan.
This file is part of QSE.
QSE is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3 of
the License, or (at your option) any later version.
QSE is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with QSE. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _QSE_CMN_MB8_H_
#define _QSE_CMN_MB8_H_
#include <qse/types.h>
#include <qse/macros.h>
/** \file
* This file provides functions, types, macros for mb8 conversion.
*/
#ifdef __cplusplus
extern "C" {
#endif
/**
* The qse_wctomb8() function converts a wide character to a mb8 sequence.
* \return
* - 0 is returned if \a wc is invalid.
* - An integer greater than \a size is returned if the \a mb8 sequence buffer
* is not #QSE_NULL and not large enough. This integer is actually the number
* of bytes needed.
* - If \a mb8 is #QSE_NULL, the number of bytes that would have been stored
* into \a mb8 if it had not been #QSE_NULL is returned.
* - An integer between 1 and size inclusive is returned in all other cases.
*/
qse_size_t qse_wctomb8 (
qse_wchar_t wc,
qse_mchar_t* mb8,
qse_size_t size
);
/**
* The qse_mb8towc() function converts a mb8 sequence to a wide character.
* \return
* - 0 is returned if the \a mb8 sequence is invalid.
* - An integer greater than \a size is returned if the \a mb8 sequence is
* not complete.
* - An integer between 1 and size inclusive is returned in all other cases.
*/
qse_size_t qse_mb8towc (
const qse_mchar_t* mb8,
qse_size_t size,
qse_wchar_t* wc
);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -21,7 +21,7 @@
#ifndef _QSE_CMN_MBWC_H_ #ifndef _QSE_CMN_MBWC_H_
#define _QSE_CMN_MBWC_H_ #define _QSE_CMN_MBWC_H_
/** @file /** \file
* This file provides functions and definitions needed for * This file provides functions and definitions needed for
* multibyte/wide-characer conversion. * multibyte/wide-characer conversion.
*/ */
@ -36,7 +36,9 @@ typedef qse_cmgr_t* (*qse_cmgr_finder_t) (const qse_char_t* name);
enum qse_cmgr_id_t enum qse_cmgr_id_t
{ {
QSE_CMGR_SLMB, QSE_CMGR_SLMB,
QSE_CMGR_UTF8 QSE_CMGR_UTF8,
QSE_CMGR_MB8
#if defined(QSE_ENABLE_XCMGRS) #if defined(QSE_ENABLE_XCMGRS)
, ,
QSE_CMGR_CP949, QSE_CMGR_CP949,
@ -59,7 +61,7 @@ QSE_EXPORT qse_cmgr_t* qse_findcmgrbyid (
/** /**
* The qse_getfindcmgr() function find a built-in cmgr matching a given * The qse_getfindcmgr() function find a built-in cmgr matching a given
* @a name and returns it. It returns #QSE_NULL if no match is found. * @a name and returns it. It returns #QSE_NULL if no match is found.
* The @a name can be one of "utf8", "slmb", "cp949", "cp950", and an * The @a name can be one of "slmb", "utf8", "mb8", "cp949", "cp950", and an
* empty string. Calling this function with an empty string is the same * empty string. Calling this function with an empty string is the same
* as calling qse_getdflcmgr(). * as calling qse_getdflcmgr().
*/ */

View File

@ -225,7 +225,7 @@ static int fnc_close (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
if (name == QSE_NULL) return -1; if (name == QSE_NULL) return -1;
} }
if (a1 != QSE_NULL) if (a1)
{ {
if (a1->type == QSE_AWK_VAL_STR) if (a1->type == QSE_AWK_VAL_STR)
{ {
@ -246,7 +246,7 @@ static int fnc_close (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
if (len == 0) if (len == 0)
{ {
/* getline or print doesn't allow an emptry for the /* getline or print doesn't allow an empty string for the
* input or output file name. so close should not allow * input or output file name. so close should not allow
* it either. * it either.
* another reason for this is if close is called explicitly * another reason for this is if close is called explicitly
@ -346,8 +346,16 @@ static int fnc_fflush (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
if (nargs == 0) if (nargs == 0)
{ {
/* flush the console output. /* fflush() flushes the console output.
* fflush() should return -1 on errors */ * fflush() should return -1 on errors.
*
* if no previous console output statement is seen,
* this function won't be able to find the entry.
* so it returns -1;
*
* BEGIN { flush(); } # flush() returns -1
* BEGIN { print 1; flush(); } # flush() returns 0
*/
n = qse_awk_rtx_flushio (run, QSE_AWK_OUT_CONSOLE, QSE_T("")); n = qse_awk_rtx_flushio (run, QSE_AWK_OUT_CONSOLE, QSE_T(""));
} }
else else
@ -380,17 +388,43 @@ static int fnc_fflush (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
ptr++; ptr++;
} }
/* flush the given rio */ /* flush the given rio.
*
* flush("") flushes all output streams regardless of names.
* pass QSE_NULL for the name in that case so that the
* callee matches any streams.
*
* fflush() doesn't specify the type of output streams
* so it attemps to flush all types of output streams.
*
* though not useful, it's possible to have multiple
* streams with the same name but of different types.
*
* BEGIN {
* print 1 | "/tmp/x";
* print 1 > "/tmp/x";
* fflush("/tmp/x");
* }
*/
n = flush_io ( n = flush_io (
run, QSE_AWK_RIO_FILE, run, QSE_AWK_OUT_FILE,
((len0 == 0)? QSE_NULL: str0), 1); ((len0 == 0)? QSE_NULL: str0), 1);
/*if (n == -99) return -1;*/ /*if (n == -99) return -1;*/
n = flush_io ( n = flush_io (
run, QSE_AWK_RIO_PIPE, run, QSE_AWK_OUT_APFILE,
((len0 == 0)? QSE_NULL: str0), n);
/*if (n == -99) return -1;*/
n = flush_io (
run, QSE_AWK_OUT_PIPE,
((len0 == 0)? QSE_NULL: str0), n);
/*if (n == -99) return -1;*/
n = flush_io (
run, QSE_AWK_OUT_RWPIPE,
((len0 == 0)? QSE_NULL: str0), n); ((len0 == 0)? QSE_NULL: str0), n);
/*if (n == -99) return -1;*/ /*if (n == -99) return -1;*/
/* if n remains 1, no ip handlers have been defined for /* if n remains 1, no io handlers have been defined for
* file, pipe, and rwpipe. so make fflush return -1. * file, pipe, and rwpipe. so make fflush return -1.
* if n is -2, no such named io has been found at all * if n is -2, no such named io has been found at all
* if n is -1, the io handler has returned an error */ * if n is -1, the io handler has returned an error */

View File

@ -114,7 +114,7 @@ static int find_rio_in (
} }
/* search the chain for exiting an existing io name */ /* search the chain for exiting an existing io name */
while (p != QSE_NULL) while (p)
{ {
if (p->type == (io_type | io_mask) && if (p->type == (io_type | io_mask) &&
qse_strcmp (p->name,name) == 0) break; qse_strcmp (p->name,name) == 0) break;
@ -688,7 +688,7 @@ int qse_awk_rtx_writeio_str (
} }
/* look for the corresponding rio for name */ /* look for the corresponding rio for name */
while (p != QSE_NULL) while (p)
{ {
/* the file "1.tmp", in the following code snippets, /* the file "1.tmp", in the following code snippets,
* would be opened by the first print statement, but not by * would be opened by the first print statement, but not by
@ -799,7 +799,7 @@ int qse_awk_rtx_flushio (
{ {
qse_awk_rio_arg_t* p = run->rio.chain; qse_awk_rio_arg_t* p = run->rio.chain;
qse_awk_rio_impl_t handler; qse_awk_rio_impl_t handler;
int io_type, /*io_mode,*/ io_mask; int io_type, io_mode, io_mask;
qse_ssize_t n; qse_ssize_t n;
int ok = 0; int ok = 0;
@ -809,7 +809,7 @@ int qse_awk_rtx_flushio (
/* translate the out_type into the relevant I/O type and mode */ /* translate the out_type into the relevant I/O type and mode */
io_type = out_type_map[out_type]; io_type = out_type_map[out_type];
/*io_mode = out_mode_map[out_type];*/ io_mode = out_mode_map[out_type];
io_mask = out_mask_map[out_type]; io_mask = out_mask_map[out_type];
handler = run->rio.handler[io_type]; handler = run->rio.handler[io_type];
@ -821,9 +821,13 @@ int qse_awk_rtx_flushio (
} }
/* look for the corresponding rio for name */ /* look for the corresponding rio for name */
while (p != QSE_NULL) while (p)
{ {
if (p->type == (io_type | io_mask) && /* without the check for io_mode and p->mode,
* QSE_AWK_OUT_FILE and QSE_AWK_OUT_APFILE matches the
* same entry since (io_type | io_mask) has the same value
* for both. */
if (p->type == (io_type | io_mask) && p->mode == io_mode &&
(name == QSE_NULL || qse_strcmp(p->name,name) == 0)) (name == QSE_NULL || qse_strcmp(p->name,name) == 0))
{ {
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOERR, QSE_NULL); qse_awk_rtx_seterrnum (run, QSE_AWK_ENOERR, QSE_NULL);
@ -1123,7 +1127,7 @@ int qse_awk_rtx_closeio (
{ {
qse_awk_rio_arg_t* p = rtx->rio.chain, * px = QSE_NULL; qse_awk_rio_arg_t* p = rtx->rio.chain, * px = QSE_NULL;
while (p != QSE_NULL) while (p)
{ {
/* it handles the first that matches the given name /* it handles the first that matches the given name
* regardless of the io type */ * regardless of the io type */

View File

@ -45,6 +45,7 @@ libqsecmn_la_SOURCES = \
ipad.c \ ipad.c \
lda.c \ lda.c \
main.c \ main.c \
mb8.c \
mbwc.c \ mbwc.c \
mbwc-str.c \ mbwc-str.c \
mem.c \ mem.c \

View File

@ -88,15 +88,15 @@ libqsecmn_la_DEPENDENCIES = $(am__DEPENDENCIES_1)
am__libqsecmn_la_SOURCES_DIST = alg-base64.c alg-rand.c alg-search.c \ am__libqsecmn_la_SOURCES_DIST = alg-base64.c alg-rand.c alg-search.c \
alg-sort.c assert.c chr.c dir.c dll.c env.c gdl.c htb.c fio.c \ alg-sort.c assert.c chr.c dir.c dll.c env.c gdl.c htb.c fio.c \
fma.c fmt.c fs.c fs-err.c fs-move.c glob.c hton.c ipad.c lda.c \ fma.c fmt.c fs.c fs-err.c fs-move.c glob.c hton.c ipad.c lda.c \
main.c mbwc.c mbwc-str.c mem.c mux.c nwad.c nwad-skad.c nwif.c \ main.c mb8.c mbwc.c mbwc-str.c mem.c mux.c nwad.c nwad-skad.c \
nwif-cfg.c nwio.c oht.c opt.c path-basename.c path-canon.c \ nwif.c nwif-cfg.c nwio.c oht.c opt.c path-basename.c \
pio.c pma.c rbt.c rex.c sio.c sll.c slmb.c stdio.c str-beg.c \ path-canon.c pio.c pma.c rbt.c rex.c sio.c sll.c slmb.c \
str-cat.c str-chr.c str-cnv.c str-cmp.c str-cpy.c str-del.c \ stdio.c str-beg.c str-cat.c str-chr.c str-cnv.c str-cmp.c \
str-dup.c str-dynm.c str-dynw.c str-end.c str-excl.c \ str-cpy.c str-del.c str-dup.c str-dynm.c str-dynw.c str-end.c \
str-fcpy.c str-fnmat.c str-incl.c str-len.c str-pac.c \ str-excl.c str-fcpy.c str-fnmat.c str-incl.c str-len.c \
str-pbrk.c str-put.c str-rev.c str-rot.c str-set.c str-spl.c \ str-pac.c str-pbrk.c str-put.c str-rev.c str-rot.c str-set.c \
str-spn.c str-str.c str-subst.c str-tok.c str-trm.c str-word.c \ str-spl.c str-spn.c str-str.c str-subst.c str-tok.c str-trm.c \
task.c time.c tio.c tre.c tre-ast.c tre-compile.c \ str-word.c task.c time.c tio.c tre.c tre-ast.c tre-compile.c \
tre-match-backtrack.c tre-match-parallel.c tre-parse.c \ tre-match-backtrack.c tre-match-parallel.c tre-parse.c \
tre-stack.c uri.c utf8.c xma.c uni.c cp949.c cp950.c tre-stack.c uri.c utf8.c xma.c uni.c cp949.c cp950.c
@ENABLE_BUNDLED_UNICODE_TRUE@am__objects_1 = uni.lo @ENABLE_BUNDLED_UNICODE_TRUE@am__objects_1 = uni.lo
@ -104,17 +104,17 @@ am__libqsecmn_la_SOURCES_DIST = alg-base64.c alg-rand.c alg-search.c \
am_libqsecmn_la_OBJECTS = alg-base64.lo alg-rand.lo alg-search.lo \ am_libqsecmn_la_OBJECTS = alg-base64.lo alg-rand.lo alg-search.lo \
alg-sort.lo assert.lo chr.lo dir.lo dll.lo env.lo gdl.lo \ alg-sort.lo assert.lo chr.lo dir.lo dll.lo env.lo gdl.lo \
htb.lo fio.lo fma.lo fmt.lo fs.lo fs-err.lo fs-move.lo glob.lo \ htb.lo fio.lo fma.lo fmt.lo fs.lo fs-err.lo fs-move.lo glob.lo \
hton.lo ipad.lo lda.lo main.lo mbwc.lo mbwc-str.lo mem.lo \ hton.lo ipad.lo lda.lo main.lo mb8.lo mbwc.lo mbwc-str.lo \
mux.lo nwad.lo nwad-skad.lo nwif.lo nwif-cfg.lo nwio.lo oht.lo \ mem.lo mux.lo nwad.lo nwad-skad.lo nwif.lo nwif-cfg.lo nwio.lo \
opt.lo path-basename.lo path-canon.lo pio.lo pma.lo rbt.lo \ oht.lo opt.lo path-basename.lo path-canon.lo pio.lo pma.lo \
rex.lo sio.lo sll.lo slmb.lo stdio.lo str-beg.lo str-cat.lo \ rbt.lo rex.lo sio.lo sll.lo slmb.lo stdio.lo str-beg.lo \
str-chr.lo str-cnv.lo str-cmp.lo str-cpy.lo str-del.lo \ str-cat.lo str-chr.lo str-cnv.lo str-cmp.lo str-cpy.lo \
str-dup.lo str-dynm.lo str-dynw.lo str-end.lo str-excl.lo \ str-del.lo str-dup.lo str-dynm.lo str-dynw.lo str-end.lo \
str-fcpy.lo str-fnmat.lo str-incl.lo str-len.lo str-pac.lo \ str-excl.lo str-fcpy.lo str-fnmat.lo str-incl.lo str-len.lo \
str-pbrk.lo str-put.lo str-rev.lo str-rot.lo str-set.lo \ str-pac.lo str-pbrk.lo str-put.lo str-rev.lo str-rot.lo \
str-spl.lo str-spn.lo str-str.lo str-subst.lo str-tok.lo \ str-set.lo str-spl.lo str-spn.lo str-str.lo str-subst.lo \
str-trm.lo str-word.lo task.lo time.lo tio.lo tre.lo \ str-tok.lo str-trm.lo str-word.lo task.lo time.lo tio.lo \
tre-ast.lo tre-compile.lo tre-match-backtrack.lo \ tre.lo tre-ast.lo tre-compile.lo tre-match-backtrack.lo \
tre-match-parallel.lo tre-parse.lo tre-stack.lo uri.lo utf8.lo \ tre-match-parallel.lo tre-parse.lo tre-stack.lo uri.lo utf8.lo \
xma.lo $(am__objects_1) $(am__objects_2) xma.lo $(am__objects_1) $(am__objects_2)
libqsecmn_la_OBJECTS = $(am_libqsecmn_la_OBJECTS) libqsecmn_la_OBJECTS = $(am_libqsecmn_la_OBJECTS)
@ -355,7 +355,7 @@ noinst_HEADERS = \
libqsecmn_la_SOURCES = alg-base64.c alg-rand.c alg-search.c alg-sort.c \ libqsecmn_la_SOURCES = alg-base64.c alg-rand.c alg-search.c alg-sort.c \
assert.c chr.c dir.c dll.c env.c gdl.c htb.c fio.c fma.c fmt.c \ assert.c chr.c dir.c dll.c env.c gdl.c htb.c fio.c fma.c fmt.c \
fs.c fs-err.c fs-move.c glob.c hton.c ipad.c lda.c main.c \ fs.c fs-err.c fs-move.c glob.c hton.c ipad.c lda.c main.c \
mbwc.c mbwc-str.c mem.c mux.c nwad.c nwad-skad.c nwif.c \ mb8.c mbwc.c mbwc-str.c mem.c mux.c nwad.c nwad-skad.c nwif.c \
nwif-cfg.c nwio.c oht.c opt.c path-basename.c path-canon.c \ nwif-cfg.c nwio.c oht.c opt.c path-basename.c path-canon.c \
pio.c pma.c rbt.c rex.c sio.c sll.c slmb.c stdio.c str-beg.c \ pio.c pma.c rbt.c rex.c sio.c sll.c slmb.c stdio.c str-beg.c \
str-cat.c str-chr.c str-cnv.c str-cmp.c str-cpy.c str-del.c \ str-cat.c str-chr.c str-cnv.c str-cmp.c str-cpy.c str-del.c \
@ -475,6 +475,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ipad.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ipad.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lda.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lda.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mb8.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbwc-str.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbwc-str.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbwc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbwc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mem.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mem.Plo@am__quote@

38
qse/lib/cmn/mb8.c Normal file
View File

@ -0,0 +1,38 @@
/*
* $Id$
*
Copyright 2006-2012 Chung, Hyung-Hwan.
This file is part of QSE.
QSE is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3 of
the License, or (at your option) any later version.
QSE is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with QSE. If not, see <http://www.gnu.org/licenses/>.
*/
#include <qse/cmn/mb8.h>
qse_size_t qse_wctomb8 (qse_wchar_t wc, qse_mchar_t* utf8, qse_size_t size)
{
if (size <= 0) return size + 1; /* buffer too small */
if (wc > QSE_TYPE_MAX(qse_uint8_t)) return 0; /* illegal character */
if (utf8) *(qse_uint8_t*)utf8 = wc;
return 1;
}
qse_size_t qse_mb8towc (
const qse_mchar_t* utf8, qse_size_t size, qse_wchar_t* wc)
{
QSE_ASSERT (utf8 != QSE_NULL);
QSE_ASSERT (size > 0);
*wc = *(const qse_uint8_t*)utf8;
return 1;
}

View File

@ -21,6 +21,7 @@
#include <qse/cmn/mbwc.h> #include <qse/cmn/mbwc.h>
#include <qse/cmn/slmb.h> #include <qse/cmn/slmb.h>
#include <qse/cmn/utf8.h> #include <qse/cmn/utf8.h>
#include <qse/cmn/mb8.h>
#include <qse/cmn/cp949.h> #include <qse/cmn/cp949.h>
#include <qse/cmn/cp950.h> #include <qse/cmn/cp950.h>
#include <qse/cmn/str.h> #include <qse/cmn/str.h>
@ -30,31 +31,18 @@
* dependent. * dependent.
*/ */
/* TODO: binary cmgr -> simply expands a byte to wchar and vice versa. */
static qse_cmgr_t builtin_cmgr[] = static qse_cmgr_t builtin_cmgr[] =
{ {
{ /* keep the order aligned with qse_cmgr_id_t values in <qse/cmn/mbwc.h> */
qse_slmbtoslwc, { qse_slmbtoslwc, qse_slwctoslmb },
qse_slwctoslmb { qse_utf8touc, qse_uctoutf8 },
}, { qse_mb8towc, qse_wctomb8 }
#if defined(QSE_ENABLE_XCMGRS) #if defined(QSE_ENABLE_XCMGRS)
{ ,
qse_cp949touc, { qse_cp949touc, qse_uctocp949 },
qse_uctocp949 { qse_cp950touc, qse_uctocp950 }
},
{
qse_cp950touc,
qse_uctocp950
},
#endif #endif
{
qse_utf8touc,
qse_uctoutf8
}
}; };
static qse_cmgr_t* dfl_cmgr = &builtin_cmgr[QSE_CMGR_SLMB]; static qse_cmgr_t* dfl_cmgr = &builtin_cmgr[QSE_CMGR_SLMB];
@ -84,6 +72,8 @@ qse_cmgr_t* qse_findcmgrbyid (qse_cmgr_id_t id)
qse_cmgr_t* qse_findcmgr (const qse_char_t* name) qse_cmgr_t* qse_findcmgr (const qse_char_t* name)
{ {
/* TODO: binary search or something better for performance improvement
* when there are many entries in the table */
static struct static struct
{ {
const qse_char_t* name; const qse_char_t* name;
@ -95,7 +85,8 @@ qse_cmgr_t* qse_findcmgr (const qse_char_t* name)
{ QSE_T("cp949"), QSE_CMGR_CP949 }, { QSE_T("cp949"), QSE_CMGR_CP949 },
{ QSE_T("cp950"), QSE_CMGR_CP950 }, { QSE_T("cp950"), QSE_CMGR_CP950 },
#endif #endif
{ QSE_T("slmb"), QSE_CMGR_UTF8 } { QSE_T("slmb"), QSE_CMGR_SLMB },
{ QSE_T("mb8"), QSE_CMGR_MB8 }
}; };
if (name) if (name)