updated doxyfile and added some bakefile description
This commit is contained in:
		
							
								
								
									
										29
									
								
								qse/bld/msw.bkgen
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								qse/bld/msw.bkgen
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,29 @@ | ||||
| <?xml version="1.0" ?> | ||||
|  | ||||
| <bakefile-gen xmlns="http://www.bakefile.org/schema/bakefile-gen"> | ||||
|  | ||||
| 	<input> | ||||
| 		qse.bkl | ||||
| 	</input> | ||||
|  | ||||
| 	<add-formats> | ||||
| 		borland,msvc,watcom | ||||
| 	</add-formats> | ||||
|  | ||||
| 	<add-flags> | ||||
| 		-DPLATFORM_WIN32=1 | ||||
| 	</add-flags> | ||||
|  | ||||
| 	<add-flags formats="borland"> | ||||
| 		-o$(INPUT_FILE_DIR)/msw-borland/makefile.bcc | ||||
| 	</add-flags> | ||||
|  | ||||
| 	<add-flags formats="msvc"> | ||||
| 		-o$(INPUT_FILE_DIR)/msw-msvc/makefile.bcc | ||||
| 	</add-flags> | ||||
|  | ||||
| 	<add-flags formats="watcom"> | ||||
| 		-o$(INPUT_FILE_DIR)/msw-watcom/makefile.wat | ||||
| 	</add-flags> | ||||
|  | ||||
| </bakefile-gen> | ||||
							
								
								
									
										21
									
								
								qse/bld/os2.bkgen
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								qse/bld/os2.bkgen
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,21 @@ | ||||
| <?xml version="1.0" ?> | ||||
|  | ||||
| <bakefile-gen xmlns="http://www.bakefile.org/schema/bakefile-gen"> | ||||
|  | ||||
| 	<input> | ||||
| 		qse.bkl | ||||
| 	</input> | ||||
|  | ||||
| 	<add-formats> | ||||
| 		watcom | ||||
| 	</add-formats> | ||||
|  | ||||
| 	<add-flags> | ||||
| 		-DPLATFORM_OS2=1 | ||||
| 	</add-flags> | ||||
|  | ||||
| 	<add-flags formats="watcom"> | ||||
| 		-o$(INPUT_FILE_DIR)/os2-watcom/makefile.wat | ||||
| 	</add-flags> | ||||
|  | ||||
| </bakefile-gen> | ||||
							
								
								
									
										191
									
								
								qse/bld/qse.bkl
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										191
									
								
								qse/bld/qse.bkl
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,191 @@ | ||||
| <?xml version="1.0" ?> | ||||
| <makefile> | ||||
| 	<set var="EOL_STYLE">unix</set> | ||||
|  | ||||
|  | ||||
| <!-- | ||||
| 	<if cond="FORMAT!='autoconf'"> | ||||
| 		<set var="OBJS" make_var="1"> | ||||
| 			$(COMPILER) | ||||
| 		</set> | ||||
|  | ||||
| 		<set var="BUILDDIR">$(OBJS)</set> | ||||
| 	</if> | ||||
| --> | ||||
|  | ||||
| 	<dll id="dllqsecmn"> | ||||
| 		<libname>qsecmn</libname> | ||||
| 		<dllname>qsecmn</dllname> | ||||
| 		<set-srcdir>../../lib/cmn</set-srcdir> | ||||
| 		<include>../../include</include> | ||||
| 		<sources> | ||||
| 			alg-base64.c  | ||||
| 			alg-rand.c  | ||||
| 			alg-search.c   | ||||
| 			alg-sort.c  | ||||
| 			assert.c  | ||||
| 			chr.c  | ||||
| 			dir.c  | ||||
| 			dll.c  | ||||
| 			env.c  | ||||
| 			gdl.c  | ||||
| 			htb.c  | ||||
| 			fio.c  | ||||
| 			fma.c  | ||||
| 			fmt.c  | ||||
| 			fs.c  | ||||
| 			fs-err.c  | ||||
| 			fs-move.c  | ||||
| 			glob.c  | ||||
| 			hton.c  | ||||
| 			ipad.c  | ||||
| 			lda.c  | ||||
| 			main.c  | ||||
| 			mbwc.c  | ||||
| 			mbwc-str.c  | ||||
| 			mem.c  | ||||
| 			mux.c  | ||||
| 			nwad.c  | ||||
| 			nwad-skad.c  | ||||
| 			nwif.c  | ||||
| 			nwif-cfg.c  | ||||
| 			nwio.c  | ||||
| 			oht.c  | ||||
| 			opt.c  | ||||
| 			path-basename.c  | ||||
| 			path-canon.c  | ||||
| 			pio.c  | ||||
| 			pma.c  | ||||
| 			rbt.c  | ||||
| 			rex.c  | ||||
| 			sio.c  | ||||
| 			sll.c  | ||||
| 			slmb.c  | ||||
| 			stdio.c  | ||||
| 			str-beg.c  | ||||
| 			str-cat.c  | ||||
| 			str-chr.c  | ||||
| 			str-cnv.c  | ||||
| 			str-cmp.c  | ||||
| 			str-cpy.c  | ||||
| 			str-del.c  | ||||
| 			str-dup.c  | ||||
| 			str-dynm.c  | ||||
| 			str-dynw.c  | ||||
| 			str-end.c  | ||||
| 			str-excl.c  | ||||
| 			str-fcpy.c  | ||||
| 			str-fnmat.c  | ||||
| 			str-incl.c  | ||||
| 			str-len.c  | ||||
| 			str-pac.c  | ||||
| 			str-pbrk.c  | ||||
| 			str-put.c  | ||||
| 			str-rev.c  | ||||
| 			str-rot.c  | ||||
| 			str-set.c  | ||||
| 			str-spl.c  | ||||
| 			str-spn.c  | ||||
| 			str-str.c  | ||||
| 			str-subst.c  | ||||
| 			str-tok.c  | ||||
| 			str-trm.c  | ||||
| 			str-word.c  | ||||
| 			task.c  | ||||
| 			time.c  | ||||
| 			tio.c  | ||||
| 			tre.c  | ||||
| 			tre-ast.c  | ||||
| 			tre-compile.c  | ||||
| 			tre-match-backtrack.c  | ||||
| 			tre-match-parallel.c  | ||||
| 			tre-parse.c  | ||||
| 			tre-stack.c  | ||||
| 			uri.c  | ||||
| 			utf8.c  | ||||
| 			xma.c | ||||
| 		</sources> | ||||
| 		<warnings>max</warnings> | ||||
| 		<debug-info>off</debug-info> | ||||
| 		<threading>multi</threading> | ||||
| 		<optimize>speed</optimize> | ||||
| 	</dll> | ||||
|  | ||||
| 	<dll id="dllqseawk"> | ||||
| 		<libname>qseawk</libname> | ||||
| 		<dllname>qseawk</dllname> | ||||
| 		<set-srcdir>../../lib/awk</set-srcdir> | ||||
| 		<include>../../include</include> | ||||
| 		<if cond="PLATFORM_OS2=='1'"> | ||||
| 			<cppflags> | ||||
| 				-DQSE_AWK_DEFAULT_MODPREFIX="awk-" | ||||
| 				-DQSE_AWK_DEFAULT_MODPOSTFIX=".dll" | ||||
| 			</cppflags> | ||||
| 		</if> | ||||
| 		<if cond="PLATFORM_MSDOS=='1'"> | ||||
| 			<cppflags> | ||||
| 				-DQSE_AWK_DEFAULT_MODPREFIX="awk-" | ||||
| 				-DQSE_AWK_DEFAULT_MODPOSTFIX=".dll" | ||||
| 			</cppflags> | ||||
| 		</if> | ||||
| 		<if cond="PLATFORM_WIN32=='1'"> | ||||
| 			<cppflags> | ||||
| 				-DQSE_AWK_DEFAULT_MODPREFIX="qseawk-" | ||||
| 				-DQSE_AWK_DEFAULT_MODPOSTFIX="" | ||||
| 			</cppflags> | ||||
| 		</if> | ||||
| 		<if cond="PLATFORM_UNIX=='1'"> | ||||
| 			<cppflags> | ||||
| 				-DQSE_AWK_DEFAULT_MODPREFIX="libqseawk-" | ||||
| 				-DQSE_AWK_DEFAULT_MODPOSTFIX="" | ||||
| 			</cppflags> | ||||
| 		</if> | ||||
| 		<sources> | ||||
| 			awk.c  | ||||
| 			err.c  | ||||
| 			tree.c  | ||||
| 			parse.c  | ||||
| 			run.c  | ||||
| 			rec.c  | ||||
| 			val.c  | ||||
| 			fnc.c  | ||||
| 			misc.c | ||||
| 			rio.c  | ||||
| 			std.c  | ||||
| 		</sources> | ||||
| 		<library>dllqsecmn</library> | ||||
| 		<warnings>max</warnings> | ||||
| 		<debug-info>off</debug-info> | ||||
| 		<threading>multi</threading> | ||||
| 		<optimize>speed</optimize> | ||||
| 	</dll> | ||||
|  | ||||
| 	<dll id="modqseawksys"> | ||||
| 		<dllname>awk-sys</dllname> | ||||
| 		<libname>awk-sys</libname> | ||||
| 		<set-srcdir>../../mod/awk</set-srcdir> | ||||
| 		<include>../../include</include> | ||||
| 		<sources>sys.c</sources> | ||||
| 		<library>dllqsecmn</library> | ||||
| 		<library>dllqseawk</library> | ||||
| 		<warnings>max</warnings> | ||||
| 		<debug-info>off</debug-info> | ||||
| 		<threading>multi</threading> | ||||
| 		<optimize>speed</optimize> | ||||
| 	</dll> | ||||
|  | ||||
| 	<exe id="exeqseawk"> | ||||
| 		<exename>qseawk</exename> | ||||
| 		<app-type>console</app-type> | ||||
| 		<set-srcdir>../../cmd/awk</set-srcdir> | ||||
| 		<include>../../include</include> | ||||
| 		<sources>awk.c</sources> | ||||
| 		<library>dllqsecmn</library> | ||||
| 		<library>dllqseawk</library> | ||||
| 		<warnings>max</warnings> | ||||
| 		<debug-info>off</debug-info> | ||||
| 		<threading>multi</threading> | ||||
| 		<optimize>speed</optimize> | ||||
| 	</exe> | ||||
|  | ||||
| </makefile> | ||||
| @ -622,7 +622,7 @@ static int comparg (int argc, qse_char_t* argv[], struct arg_t* arg) | ||||
| 			{ | ||||
| 				if (isfl >= isfc - 1) /* -1 for last QSE_NULL */ | ||||
| 				{ | ||||
| 					qse_awk_parsestd_t** tmp; | ||||
| 					qse_awk_parsestd_t* tmp; | ||||
| 					tmp = QSE_MMGR_REALLOC (arg->icf.mmgr, isf, QSE_SIZEOF(*isf)*(isfc+16)); | ||||
| 					if (tmp == QSE_NULL) | ||||
| 					{ | ||||
|  | ||||
							
								
								
									
										1780
									
								
								qse/doc/Doxyfile.in
									
									
									
									
									
								
							
							
						
						
									
										1780
									
								
								qse/doc/Doxyfile.in
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -2,9 +2,10 @@ | ||||
| AUTOMAKE_OPTIONS = no-dependencies | ||||
|  | ||||
| EXTRA_DIST = \ | ||||
| 	main.doc \ | ||||
| 	main.md \ | ||||
| 	mem.doc \ | ||||
| 	cenc.doc \ | ||||
| 	io.doc \ | ||||
| 	awk.doc \ | ||||
| 	awk-lang.md \ | ||||
| 	sed.doc | ||||
|  | ||||
| @ -226,11 +226,12 @@ top_builddir = @top_builddir@ | ||||
| top_srcdir = @top_srcdir@ | ||||
| AUTOMAKE_OPTIONS = no-dependencies | ||||
| EXTRA_DIST = \ | ||||
| 	main.doc \ | ||||
| 	main.md \ | ||||
| 	mem.doc \ | ||||
| 	cenc.doc \ | ||||
| 	io.doc \ | ||||
| 	awk.doc \ | ||||
| 	awk-lang.md \ | ||||
| 	sed.doc | ||||
|  | ||||
| all: all-am | ||||
|  | ||||
							
								
								
									
										671
									
								
								qse/doc/page/awk-lang.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										671
									
								
								qse/doc/page/awk-lang.md
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,671 @@ | ||||
| QSEAWK LANGUAGE {#awk-lang} | ||||
| =============== | ||||
|  | ||||
| QSEAWK implements the language described in the  | ||||
| [The AWK Programming Language][awkbook] with extensions. | ||||
|  | ||||
| QSEAWK reads an AWK program, recognizes various tokens contained while skipping  | ||||
| comments and whitespaces that don't constinute a token, analyses syntax, and | ||||
| tranforms them to an internal form for execution. | ||||
|  | ||||
| ### Comments ### | ||||
|  | ||||
| A single-line comment is introduced by a hash character #, and is terminated at  | ||||
| the end of the same line. Additionally, it supports a C-style multi-line comment | ||||
| enclosed in /* and */. The multi-line comment can't nest and can't appear within  | ||||
| string literals and regular expressions. | ||||
|  | ||||
|     x = y; # assign y to x. | ||||
|     /* | ||||
|     this line is ignored. | ||||
|     this line is ignored too. | ||||
|     */ | ||||
|  | ||||
| ## Tokens ## | ||||
|  | ||||
| A token is composed of one or more consecutive characters. | ||||
|  | ||||
| ### Numbers ### | ||||
|  | ||||
| An integer begins with a numeric digit between 0 and 9 inclusive and can be  | ||||
| followed by more numeric digits. If an integer is immediately followed by a  | ||||
| floating point, and optionally a series of numeric digits without whitespaces, | ||||
| it becomes a floting-point number. An integer or a simple floating-point number | ||||
| can be followed by e or E, and optionally a series of numeric digits with a | ||||
| optional single sign letter. A floating-point number may begin with a floting | ||||
| point with a preceeding number.  | ||||
|  | ||||
|     369   # integer | ||||
|     3.69  # floating-pointe number | ||||
|     13.   # 13.0 | ||||
|     .369  # 0.369 | ||||
|     34e-2 # 34 * (10 ** -2) | ||||
|     34e+2 # 34 * (10 ** 2) | ||||
|     34.56e # 34.56  | ||||
|     34.56E3  | ||||
|  | ||||
| An integer can be prefixed with 0x, 0, 0b for a hexa-decimal number, an octal number, | ||||
| and a binary number respectively. For a hexa-decimal number, letters from A to F | ||||
| can form a number case-insenstively in addition to numeric digits. | ||||
|  | ||||
|     0xA1   # 161 | ||||
|     0xB0b0 # 45232 | ||||
|     020    # 16 | ||||
|     0b101  # 5 | ||||
|  | ||||
| If the prefix is not followed by any numeric digits, it is still a valid token and | ||||
| represents the value of 0. | ||||
|  | ||||
|     0x # 0x0 but not desirable. | ||||
|     0b # 0b0 but not desirable. | ||||
|  | ||||
| ### Strings ### | ||||
|  | ||||
| A string is enclosed in a pair of double quotes or single quotes. | ||||
|  | ||||
| ### Regular Expressions ### | ||||
|  | ||||
| A regular expression is enclosed in a pair of forward slashes. | ||||
|  | ||||
|  | ||||
| ### Note ### | ||||
|  | ||||
| QSEAWK forms a token with the lognest valid sequences. | ||||
|  | ||||
| Tokenization cab confusing, especially for the implicit concatention. | ||||
| Let's take this as an example. | ||||
|  | ||||
|     0xT | ||||
|  | ||||
| Since 0x not followed by a digit is a valid token, and T is an identifier, | ||||
| it is the same expression as 0x concatenated with T (0x @@ T). | ||||
|  | ||||
| An AWK program can be composed of the following elements shown below.  | ||||
| Each language element requires the option in the second column to be on.  | ||||
|  | ||||
| <table> | ||||
| <tr><th>Element                    </th><th>Option             </th></tr> | ||||
| <tr><td>Comment                    </td><td>                   </td></tr> | ||||
| <tr><td>Global variable declaration</td><td>#QSE_AWK_EXPLICIT  </td></tr> | ||||
| <tr><td>Pattern-action block       </td><td>#QSE_AWK_PABLOCK   </td></tr> | ||||
| <tr><td>User-defined function      </td><td>                   </td></tr> | ||||
| <tr><td>\@include                  </td><td>#QSE_AWK_INCLUDE   </td></tr> | ||||
| </table> | ||||
|  | ||||
| Single line comments begin with the '#' letter and end at the end of the | ||||
| same line. The C style multi-line comments are supported as well. | ||||
| Comments are ignored. | ||||
|  | ||||
| - pattern-action-block := pattern action-block | ||||
| - pattern := BEGIN | END | expression | expression-range | ||||
| - expression-range := expression , expression | ||||
|  | ||||
| A pattern in a pattern action block can be omitted. | ||||
| The action part can be omitted if the pattern is not BEGIN nor END. | ||||
|  | ||||
| A pattern-action block, and a user-defined function can have the following elements. | ||||
|  | ||||
| <table> | ||||
| <tr><th>Element                    </th><th>Option            </th></tr> | ||||
| <tr><td>Local variable declaration</td><td>#QSE_AWK_EXPLICIT  </td></tr> | ||||
| <tr><td>Statement                 </td><td>                   </td></tr> | ||||
| <tr><td>getline                   </td><td>#QSE_AWK_RIO       </td></tr> | ||||
| <tr><td>print                     </td><td>#QSE_AWK_RIO       </td></tr> | ||||
| <tr><td>nextofile                 </td><td>#QSE_AWK_NEXTOFILE </td></tr> | ||||
| <tr><td>reset                     </td><td>#QSE_AWK_RESET     </td></tr> | ||||
| <tr><td>abort                     </td><td>#QSE_AWK_ABORT     </td></tr> | ||||
| </table> | ||||
|  | ||||
|  | ||||
| AWK has the following statement constructs. | ||||
| - if | ||||
| - while | ||||
| - for | ||||
| - do .. while | ||||
| - break | ||||
| - continue | ||||
| - return | ||||
| - exit | ||||
| - abort | ||||
| - next | ||||
| - nextfile | ||||
| - nextofile | ||||
| - delete | ||||
| - reset | ||||
| - print | ||||
| - printf | ||||
| - expression | ||||
|  | ||||
| @subsection awk_litvar LITERAL AND VARIABLE | ||||
|  | ||||
| Value type | ||||
| - Scalar | ||||
| -- String | ||||
| -- Integer | ||||
| -- Floating-Pointer number | ||||
| - Hashed Map | ||||
| - Regular expression | ||||
|  | ||||
| Scalar values are immutable while a hashed map value is mutable. | ||||
| A regular expression value is specially treated. | ||||
|  | ||||
| A variable is tied to a value when it is assigned with a value. | ||||
| If the variable is tied to a map value, it can't be assigned again. | ||||
| You can use 'reset' to untie the variable from the value, and thus | ||||
| restore the variable to the 'nil' state. | ||||
|  | ||||
| .... | ||||
|  | ||||
| @subsection awk_ext_teq TEQ OPERATOR | ||||
|  | ||||
| The === operator compares two values and evaluates to a non-zero value  | ||||
| if both have the same internal type and the actual values are the same. | ||||
| so 1 is not equal to 1.0 for the === operator. | ||||
|  | ||||
| A map comparison for the === operator is a bit special. The contents of | ||||
| the map is never inspected. Comparing two maps always result in inequality. | ||||
|  | ||||
| However, if two variables points to the same map value, it can evaluate | ||||
| to a non-zero value. This is possible if you allow assigning a map to  | ||||
| another non-map variable with #QSE_AWK_MAPTOVAR. In this case, a map | ||||
| is not deep-copied but the reference to it is copied. | ||||
|  | ||||
| @code | ||||
| BEGIN {  | ||||
| 	a[10]=20;  | ||||
| 	b=a;  | ||||
| 	b[20]=40; | ||||
| 	for (i in a) print i, a[i];   | ||||
| 	print a===b; | ||||
| } | ||||
| @endcode | ||||
|  | ||||
|  | ||||
| The === operator may be also useful when you want to indicate an error | ||||
| with an uninitialized variable. The following code check if the function | ||||
| returned a map. Since the variable 'nil' has never been assigned, its  | ||||
| internal type is 'NIL' and  | ||||
|  | ||||
| @code | ||||
| function a () | ||||
| { | ||||
| 	x[10] = 2; | ||||
| 	return x; | ||||
| } | ||||
|  | ||||
| BEGIN { | ||||
| 	t = a(); | ||||
| 	if (t === nil) | ||||
| 		print "nil"; | ||||
| 	else | ||||
| 		print "ok"; | ||||
| } | ||||
| @endcode. | ||||
|  | ||||
| The !== operator is a negated form of the === operator. | ||||
|  | ||||
|  | ||||
| @subsection awk_ext_vardecl VARIABLE DECLARATION | ||||
|  | ||||
| #QSE_AWK_EXPLICIT enables variable declaration. Variables declared are accessed | ||||
| directly bypassing the global named map that stores undeclared variables. | ||||
| The keyword @b global introduces a global variable and the keyword @b local  | ||||
| introduces local variable. Local variable declaraion in a block must be  | ||||
| located before an expression or a statement appears. | ||||
|  | ||||
| @code | ||||
| global g1, g2; #declares two global variables g1 and g2 | ||||
|  | ||||
| BEGIN { | ||||
| 	local a1, a2, a3; # declares three local variables  | ||||
|  | ||||
| 	g1 = 300; a1 = 200; | ||||
|  | ||||
| 	{ | ||||
| 		local a1; # a1 here hides the a1 at the outer scope | ||||
| 		local g1; # g1 here hides the global g1 | ||||
| 		a1 = 10; g1 = 5; | ||||
| 		print a1, g1; # it prints 10 and 5 | ||||
| 	} | ||||
|  | ||||
| 	print a1, g1; # it prints 200 and 300 | ||||
| } | ||||
|  | ||||
| @endcode | ||||
|  | ||||
| However, turning on #QSE_AWK_EXPLICIT does not disable named variables. | ||||
| To disable named variables, you must turn off #QSE_AWK_IMPLICIT. | ||||
|  | ||||
| @subsection awk_ext_include INCLUDE | ||||
|  | ||||
| The \@include directive inserts the contents of the object specified in the | ||||
| following string, typically a file name, as if they appeared in the source | ||||
| stream being processed. The directive can only be used at the outmost scope  | ||||
| where global variable declarations, @b BEGIN, @b END, and/or pattern-action  | ||||
| blocks appear. To use \@include, you must turn on #QSE_AWK_INCLUDE. | ||||
|  | ||||
| @code | ||||
| @include "abc.awk" | ||||
| BEGIN { func_in_abc (); } | ||||
| @endcode | ||||
|  | ||||
| A semicolon is optional after the included file name. The following is the  | ||||
| same as the sample above. | ||||
| @code | ||||
| @include "abc.awk"; | ||||
| BEGIN { func_in_abc(); } | ||||
| @endcode | ||||
|  | ||||
| If #QSE_AWK_NEWLINE is off, the semicolon is required. | ||||
|  | ||||
|  | ||||
| @subsection awk_ext_funcall FUNCTIONC CALL | ||||
|  | ||||
|  | ||||
| name(1); | ||||
| if there is no space between 'name' and the left parenthesis, the  | ||||
| name is treated as a function name. | ||||
|  | ||||
| name (1); | ||||
| If there is a space, the name is treated as a function name if the  | ||||
| name has been declared as the function or if #QSE_AWK_IMPLICIT is on, | ||||
| it may be 'name' concatenated with the expression in the parentheses. | ||||
|  | ||||
| The following is a valid program. | ||||
| @code | ||||
| @pragma implicit off | ||||
| BEGIN { name (1); } | ||||
| function name(a) { print a; }' | ||||
| @endcode | ||||
|  | ||||
| However, in this program, the first 'name' becomes a named global variable. | ||||
| so the function declaration with 'name' triggers the variable redefinition  | ||||
| error. | ||||
| @pragma implicit on | ||||
| BEGIN { name (1); } | ||||
| function name(a) { print a; }' | ||||
| @endcode | ||||
|  | ||||
| @subsection awk_ext_print EXTENDED PRINT/PRINTF | ||||
| When #QSE_AWK_TOLERANT is on, print and printf are treated as if | ||||
| they are function calls.  In this mode, they return a negative number | ||||
| on failure and a zero on success and any I/O failure doesn't abort | ||||
| a running program.  | ||||
|  | ||||
| @code | ||||
| BEGIN { | ||||
| 	a = print "hello, world" > "/dev/null"; | ||||
| 	print a;	 | ||||
| 	a = print ("hello, world") > "/dev/null"; | ||||
| 	print a;	 | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| Since print and printf are like function calls, you can use them | ||||
| in any context where a normal expression is allowed. For example, | ||||
| printf is used as a conditional expression in an 'if' statement  | ||||
| in the sample code below. | ||||
| @code | ||||
| BEGIN { | ||||
| 	if ((printf "hello, world\n" || "tcp://127.0.0.1:9999") <= -1) | ||||
| 		print "FAILURE"; | ||||
| 	else | ||||
| 		print "SUCCESS"; | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| @subsection awk_ext_exprgroup GROUPED EXPRESSION | ||||
| When #QSE_AWK_TOLERANT is on, you can use a grouped expression without | ||||
| the 'in' operator. A grouped expression is a parentheses-enclosed list | ||||
| of expressions separated with a comma. Each expression in the group is | ||||
| evaluated in the appearing order. The evaluation result of the last  | ||||
| expression in the group is returned as that of the group. | ||||
|  | ||||
| @code | ||||
| BEGIN { | ||||
| 	c = (1, 2, 9); | ||||
| 	a=((1*c, 3*c), (3 - c), ((k = 6+(c+1, c+2)), (-7 * c))); | ||||
| 	print c; # 9; | ||||
| 	print a; # -63	 | ||||
| 	print k; # 17 | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| @subsection awk_ext_rwpipe TWO-WAY PIPE | ||||
|  | ||||
| The two-way pipe indicated by @b || is supproted, in addition to the one-way  | ||||
| pipe indicated by @b |. Turn on #QSE_AWK_RWPIPE to enable the two-way pipe. | ||||
|  | ||||
| @code | ||||
| BEGIN { | ||||
| 	print "15" || "sort"; | ||||
| 	print "14" || "sort"; | ||||
| 	print "13" || "sort"; | ||||
| 	print "12" || "sort"; | ||||
| 	print "11" || "sort"; | ||||
| 	# close the input side of the pipe as 'sort' starts emitting result  | ||||
| 	# once the input is closed. | ||||
| 	close ("sort", "r"); | ||||
| 	while (("sort" || getline x) > 0) print "xx:", x;  | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| This two-way pipe can create a TCP or UDP connection if the pipe command | ||||
| string is prefixed with one of the followings: | ||||
|  | ||||
| - tcp:// - establishes a TCP connection to a specified IP address/port. | ||||
| - udp:// - establishes a TCP connection to a specified IP address/port. | ||||
| - tcpd:// - binds a TCP socket to a specified IP address/port and waits for the first connection. | ||||
| - udpd:// - binds a TCP socket to a specified IP address/port and waits for the first sender. | ||||
|  | ||||
| @code | ||||
| BEGIN {  | ||||
| 	# it binds a TCP socket to the IPv6 address :: and the port number  | ||||
| 	# 9999 and waits for the first coming connection. It repeats writing | ||||
| 	# "hello world" to the first connected peer and reading a line from | ||||
| 	# it until the session is torn down. | ||||
| 	do {  | ||||
| 		print "hello world" || "tcpd://[::]:9999";  | ||||
| 		if (("tcpd://[::]:9999" || getline x) <= 0) break;  | ||||
| 		print x;  | ||||
| 	}  | ||||
| 	while(1);   | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| You can specify TCP or UDP timeouts for connection, accepting, reading, and  | ||||
| writing with setioattr (pipe-name, timeout-name, timeout-value). timeout-name  | ||||
| should be one of "ctimeout", "atimeout", "rtimeout", and "wtimeout".  | ||||
| timeout-value is a number specifying the actual timeout in milliseconds.  | ||||
| A negative value indicates no timeout.  | ||||
|  | ||||
| You can call getioattr (pipe-name, timeout-name) to get the current  | ||||
| timeout-value set. | ||||
|  | ||||
| See the example below. | ||||
|  | ||||
| @code | ||||
| BEGIN {  | ||||
| 	setioattr ("tcp://127.0.0.1:9999", "ctimeout", 3000); | ||||
| 	setioattr ("tcp://127.0.0.1:9999", "rtimeout", 5000); | ||||
| 	print "hello world" || "tcp://127.0.0.1:9999";  | ||||
| 	"tcp://127.0.0.1:9999" || getline x;  | ||||
| 	print x; | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| Here is a more interesting example adopting Michael Sanders' | ||||
| AWK web server, modified for QSEAWK. | ||||
|  | ||||
| @code | ||||
| #  | ||||
| # Michael Sanders' AWK web server for QSEAWK. | ||||
| # Orginal code in http://awk.info/?tools/server | ||||
| # | ||||
| # qseawk --tolerant=on --rwpipe=on webserver.awk | ||||
| # | ||||
| BEGIN { | ||||
|   x        = 1                         # script exits if x < 1  | ||||
|   port     = 8080                      # port number  | ||||
|   host     = "tcpd://0.0.0.0:" port    # host string  | ||||
|   url      = "http://localhost:" port  # server url  | ||||
|   status   = 200                       # 200 == OK  | ||||
|   reason   = "OK"                      # server response  | ||||
|   RS = ORS = "\r\n"                    # header line terminators  | ||||
|   doc      = Setup()                   # html document  | ||||
|   len      = length(doc) + length(ORS) # length of document  | ||||
|   while (x) { | ||||
|      if ($1 == "GET") RunApp(substr($2, 2)) | ||||
|      if (! x) break | ||||
|      print "HTTP/1.0", status, reason || host | ||||
|      print "Connection: Close"        || host | ||||
|      print "Pragma: no-cache"         || host | ||||
|      print "Content-length:", len     || host | ||||
|      print ORS doc                    || host | ||||
|      close(host)     # close client connection  | ||||
|      host || getline # wait for new client request  | ||||
|   } | ||||
|   # server terminated...  | ||||
|   doc = Bye() | ||||
|   len = length(doc) + length(ORS) | ||||
|   print "HTTP/1.0", status, reason || host | ||||
|   print "Connection: Close"        || host | ||||
|   print "Pragma: no-cache"         || host | ||||
|   print "Content-length:", len     || host | ||||
|   print ORS doc                    || host | ||||
|   close(host) | ||||
| } | ||||
|  | ||||
| function Setup() { | ||||
|   tmp = "<html>\ | ||||
|   <head><title>Simple gawk server</title></head>\ | ||||
|   <body>\ | ||||
|   <p><a href=" url "/xterm>xterm</a>\ | ||||
|   <p><a href=" url "/xcalc>xcalc</a>\ | ||||
|   <p><a href=" url "/xload>xload</a>\ | ||||
|   <p><a href=" url "/exit>terminate script</a>\ | ||||
|   </body>\ | ||||
|   </html>" | ||||
|   return tmp | ||||
| } | ||||
|  | ||||
| function Bye() { | ||||
|   tmp = "<html>\ | ||||
|   <head><title>Simple gawk server</title></head>\ | ||||
|   <body><p>Script Terminated...</body>\ | ||||
|   </html>" | ||||
|   return tmp | ||||
| } | ||||
|  | ||||
| function RunApp(app) { | ||||
|   if (app == "xterm")  {system("xterm&"); return} | ||||
|   if (app == "xcalc" ) {system("xcalc&"); return} | ||||
|   if (app == "xload" ) {system("xload&"); return} | ||||
|   if (app == "exit")   {x = 0} | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| @subsection awk_ext_return RETURN | ||||
| The return statement is valid in pattern-action blocks as well as in functions. | ||||
| The execution of a calling block is aborted once the return statement is executed. | ||||
|  | ||||
| @code | ||||
| $ qseawk 'BEGIN { return 20; }' ; echo $? | ||||
| 20 | ||||
| @endcode | ||||
|  | ||||
| If #QSE_AWK_MAPTOVAR is on, you can return an arrayed value from a function. | ||||
| @code | ||||
| function getarray() { | ||||
| 	local a; | ||||
| 	a["one"] = 1; | ||||
| 	a["two"] = 2; | ||||
| 	a["three"] = 3; | ||||
| 	return a; | ||||
| } | ||||
|  | ||||
| BEGIN { | ||||
| 	local x; | ||||
|  | ||||
| 	x = getarray(); | ||||
| 	for (i in x) print i, x[i]; | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| @subsection awk_ext_reset RESET | ||||
| The reset statement resets an array variable back to the initial state. | ||||
| After that, the array variable can also be used as a scalar variable again. | ||||
| You must have #QSE_AWK_RESET on to be able to be able to use this  | ||||
| statement. | ||||
|  | ||||
| @code | ||||
| BEGIN { | ||||
| 	a[1] = 20; | ||||
| 	reset a; | ||||
| 	a = 20; # this is legal | ||||
| 	print a; | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| @subsection awk_ext_abort ABORT | ||||
| The abort statment is similar to the exit statement except that | ||||
| it skips executing the END block. You must have #QSE_AWK_ABORT on to be | ||||
| able to use this statement. | ||||
|  | ||||
| @code | ||||
| BEGIN { | ||||
| 	print "--- BEGIN ---"; | ||||
| 	abort 10; | ||||
| } | ||||
| END { | ||||
| 	print "--- END ---"; # this must not be printed | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| @subsection awk_ext_comment COMMENT | ||||
| You can use the C-style comment as well as the pound comment. | ||||
|  | ||||
| @subsection awk_ext_fnc EXTENDED FUNCTIONS | ||||
| index() and match() can accept the third parameter indicating the position  | ||||
| where the search begins. A negative value indicates a position from the back. | ||||
|  | ||||
| @code | ||||
| BEGIN { | ||||
| 	xstr = "abcdefabcdefabcdef"; | ||||
| 	xsub = "abc"; | ||||
| 	xlen = length(xsub); | ||||
|  | ||||
| 	i = 1; | ||||
| 	while ((i = index(xstr, xsub, i)) > 0) | ||||
| 	{ | ||||
| 		print i, substr(xstr, i, xlen); | ||||
| 		i += xlen; | ||||
| 	} | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| @subsection awk_ext_fs EXTENDED FS | ||||
|  | ||||
| If the value for FS begins with a question mark followed by 4  | ||||
| additional letters, QSEAWK can split a record with quoted fields  | ||||
| delimited by a single-letter separator. | ||||
|  | ||||
| The 4 additional letters are composed of a field separator, | ||||
| an escaper, a opening quote, and a closing quote. | ||||
|  | ||||
| @code | ||||
| $ cat x.awk | ||||
| BEGIN { FS="?:\\[]"; } | ||||
| { | ||||
|      for (i = 1; i <= NF; i++) | ||||
|           print "$" i ": " $i; | ||||
|      print "---------------"; | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| The value of FS above means the following. | ||||
| - : is a field separator. | ||||
| - a backslash is an escaper. | ||||
| - a left bracket is an opening quote. | ||||
| - a right bracket is a closing quote. | ||||
|  | ||||
| See the following output. | ||||
| @code | ||||
| $ cat x.dat | ||||
| [fx1]:[fx2]:[f\[x\]3] | ||||
| abc:def:[a b c] | ||||
| $ qseawk -f x.awk x.dat | ||||
| $1: fx1 | ||||
| $2: fx2 | ||||
| $3: f[x]3 | ||||
| --------------- | ||||
| $1: abc | ||||
| $2: def | ||||
| $3: a b c | ||||
| --------------- | ||||
| @endcode | ||||
| 	 | ||||
|  | ||||
|  | ||||
| @subsection awk_ext_binnum BINARY NUMBER | ||||
| Use 0b to begin a binary number sequence. | ||||
|  | ||||
| @code  | ||||
| $ qseawk 'BEGIN { printf ("%b %o %d %x\n", 0b1101, 0b1101, 0b1101, 0b1101); }' | ||||
| 1101 15 13 d | ||||
| @endcode | ||||
|  | ||||
|  | ||||
|  | ||||
| @subsection awk_ext_unicode UNICODE ESCAPE SEQUENCE  | ||||
|  | ||||
| If QSE is compiled for #QSE_CHAR_IS_WCHAR, you can use \\u and \\U in a  | ||||
| string to specify a character by unicode. | ||||
|  | ||||
| @code | ||||
| $ qseawk 'BEGIN { print "\uC720\uB2C8\uCF54\uB4DC \U00007D71\U00004E00\U000078BC"; }' | ||||
| 유니코드 統一碼 | ||||
| @endcode | ||||
|  | ||||
|  | ||||
| @subsection awk_ext_ioenc I/O ENCODING | ||||
| You can call setioattr() to set the character encoding of a stream resource  | ||||
| like a pipe or a file. See qse_findcmgr() for a list of supported encoding names. | ||||
|  | ||||
| Let's say you run this simple echoing script on a WIN32 platform that has | ||||
| the active code page of 949 and is reachable at the IP address 192.168.2.8. | ||||
|  | ||||
| @code | ||||
| C:\> chcp | ||||
| Active code page: 949 | ||||
| C:\> type s.awk | ||||
| BEGIN { | ||||
|      sock = "tcpd://0.0.0.0:9999"; | ||||
|      setioattr (sock, "codepage", "cp949"); # this is not needed since the active  | ||||
|                                             # code page is already 949. | ||||
|      do { | ||||
|  | ||||
|           if ((sock || getline x) <= 0) break; | ||||
|           print "PEER: " x; | ||||
|           print x || sock; | ||||
|      } | ||||
|      while(1); | ||||
| } | ||||
| C:\> qseawk --rwpipe=on -f r.awk | ||||
| PEER: 안녕 | ||||
| PEER: ?好! | ||||
| @endcode | ||||
|  | ||||
| Now you run the following script on a UTF-8 console of a Linux box. | ||||
|  | ||||
| @code | ||||
| $ echo $LANG | ||||
| en_US.UTF-8 | ||||
| $ cat  c.awk | ||||
| BEGIN { | ||||
|      peer = "tcp://192.168.2.8:9999"; | ||||
|      setioattr (peer, "codepage", "cp949"); | ||||
|      do | ||||
|      { | ||||
|           printf "> "; | ||||
|           if ((getline x) <= 0) break; | ||||
|           print x || peer; | ||||
|           if ((peer || getline line) <= -1) break; | ||||
|           print "PEER: " line; | ||||
|      } | ||||
|      while (1); | ||||
| } | ||||
| $ qseawk --rwpipe=on -f c.awk | ||||
| > 안녕 | ||||
| PEER: 안녕 | ||||
| > 你好! | ||||
| PEER: ?好! | ||||
| @endcode | ||||
|  | ||||
| Note that 你 has been converted to a question mark since the letter is | ||||
| not supported by cp949. | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| [awkbook]: http://cm.bell-labs.com/cm/cs/awkbook/  | ||||
| @ -1,29 +1,9 @@ | ||||
| /** @page awk AWK | ||||
|  | ||||
| @section awk_content CONTENTS | ||||
| - @ref awk_intro "INTRODUCTION" | ||||
| - @ref awk_lang "AWK LANGUAGE" | ||||
|  - @ref awk_litvar        "LITERAL AND VARIABLE" | ||||
|  - @ref awk_ext_teq       "TEQ OPERATOR" | ||||
|  - @ref awk_ext_vardecl   "VARIABLE DECLARATION" | ||||
|  - @ref awk_ext_include   "INCLUDE" | ||||
|  - @ref awk_ext_funcall   "FUNCTION CALL" | ||||
|  - @ref awk_ext_print     "EXTENDED PRINT/PRINTF" | ||||
|  - @ref awk_ext_exprgroup "GROUPED EXPRESSION" | ||||
|  - @ref awk_ext_rwpipe    "TWO-WAY PIPE" | ||||
|  - @ref awk_ext_return    "RETURN" | ||||
|  - @ref awk_ext_reset     "RESET" | ||||
|  - @ref awk_ext_abort     "ABORT" | ||||
|  - @ref awk_ext_comment   "COMMENT" | ||||
|  - @ref awk_ext_fnc       "EXTENDED FUNCTIONS" | ||||
|  - @ref awk_ext_fs        "EXTENDED FS" | ||||
|  - @ref awk_ext_binnum    "BINARY NUMBER" | ||||
|  - @ref awk_ext_unicode   "UNICODE ESCAPE SEQUENCE" | ||||
|  - @ref awk_ext_ioenc     "I/O ENCODING" | ||||
| @section awk-content CONTENTS | ||||
| - @ref awk-intro "INTRODUCTION" | ||||
|  | ||||
|  | ||||
|  | ||||
| @section awk_intro INTRODUCTION | ||||
| @section awk-intro INTRODUCTION | ||||
|  | ||||
| QSEAWK is an AWK interpreter and is a part of the @ref qse_intro "QSE" library.  | ||||
| Its design focuses on building a flexible and robust embedding API with minimal  | ||||
| @ -197,594 +177,4 @@ Where options are: | ||||
|  --abort              on/off       enable 'abort' | ||||
| @endcode | ||||
|  | ||||
| @section awk_lang AWK LANGUAGE | ||||
| QSEAWK implements the language described in the book | ||||
| <a class="el" href="http://cm.bell-labs.com/cm/cs/awkbook/">  | ||||
| The AWK Proramming Language</a> with various @ref awk_ext "extensions". | ||||
|  | ||||
| An AWK program can be composed of the following elements shown below.  | ||||
| Each language element requires the option in the second column to be on.  | ||||
|  | ||||
| <table> | ||||
| <tr><th>Element                    </th><th>Option             </th></tr> | ||||
| <tr><td>Comment                    </td><td>                   </td></tr> | ||||
| <tr><td>Global variable declaration</td><td>#QSE_AWK_EXPLICIT  </td></tr> | ||||
| <tr><td>Pattern-action block       </td><td>#QSE_AWK_PABLOCK   </td></tr> | ||||
| <tr><td>User-defined function      </td><td>                   </td></tr> | ||||
| <tr><td>\@include                  </td><td>#QSE_AWK_INCLUDE   </td></tr> | ||||
| </table> | ||||
|  | ||||
| Single line comments begin with the '#' letter and end at the end of the | ||||
| same line. The C style multi-line comments are supported as well. | ||||
| Comments are ignored. | ||||
|  | ||||
| - pattern-action-block := pattern action-block | ||||
| - pattern := BEGIN | END | expression | expression-range | ||||
| - expression-range := expression , expression | ||||
|  | ||||
| A pattern in a pattern action block can be omitted. | ||||
| The action part can be omitted if the pattern is not BEGIN nor END. | ||||
|  | ||||
| A pattern-action block, and a user-defined function can have the following elements. | ||||
|  | ||||
| <table> | ||||
| <tr><th>Element                    </th><th>Option            </th></tr> | ||||
| <tr><td>Local variable declaration</td><td>#QSE_AWK_EXPLICIT  </td></tr> | ||||
| <tr><td>Statement                 </td><td>                   </td></tr> | ||||
| <tr><td>getline                   </td><td>#QSE_AWK_RIO       </td></tr> | ||||
| <tr><td>print                     </td><td>#QSE_AWK_RIO       </td></tr> | ||||
| <tr><td>nextofile                 </td><td>#QSE_AWK_NEXTOFILE </td></tr> | ||||
| <tr><td>reset                     </td><td>#QSE_AWK_RESET     </td></tr> | ||||
| <tr><td>abort                     </td><td>#QSE_AWK_ABORT     </td></tr> | ||||
| </table> | ||||
|  | ||||
|  | ||||
| AWK has the following statement constructs. | ||||
| - if | ||||
| - while | ||||
| - for | ||||
| - do .. while | ||||
| - break | ||||
| - continue | ||||
| - return | ||||
| - exit | ||||
| - abort | ||||
| - next | ||||
| - nextfile | ||||
| - nextofile | ||||
| - delete | ||||
| - reset | ||||
| - print | ||||
| - printf | ||||
| - expression | ||||
|  | ||||
| @subsection awk_litvar LITERAL AND VARIABLE | ||||
|  | ||||
| Value type | ||||
| - Scalar | ||||
| -- String | ||||
| -- Integer | ||||
| -- Floating-Pointer number | ||||
| - Hashed Map | ||||
| - Regular expression | ||||
|  | ||||
| Scalar values are immutable while a hashed map value is mutable. | ||||
| A regular expression value is specially treated. | ||||
|  | ||||
| A variable is tied to a value when it is assigned with a value. | ||||
| If the variable is tied to a map value, it can't be assigned again. | ||||
| You can use 'reset' to untie the variable from the value, and thus | ||||
| restore the variable to the 'nil' state. | ||||
|  | ||||
| .... | ||||
|  | ||||
| @subsection awk_ext_teq TEQ OPERATOR | ||||
|  | ||||
| The === operator compares two values and evaluates to a non-zero value  | ||||
| if both have the same internal type and the actual values are the same. | ||||
| so 1 is not equal to 1.0 for the === operator. | ||||
|  | ||||
| A map comparison for the === operator is a bit special. The contents of | ||||
| the map is never inspected. Comparing two maps always result in inequality. | ||||
|  | ||||
| However, if two variables points to the same map value, it can evaluate | ||||
| to a non-zero value. This is possible if you allow assigning a map to  | ||||
| another non-map variable with #QSE_AWK_MAPTOVAR. In this case, a map | ||||
| is not deep-copied but the reference to it is copied. | ||||
|  | ||||
| @code | ||||
| BEGIN {  | ||||
| 	a[10]=20;  | ||||
| 	b=a;  | ||||
| 	b[20]=40; | ||||
| 	for (i in a) print i, a[i];   | ||||
| 	print a===b; | ||||
| } | ||||
| @endcode | ||||
|  | ||||
|  | ||||
| The === operator may be also useful when you want to indicate an error | ||||
| with an uninitialized variable. The following code check if the function | ||||
| returned a map. Since the variable 'nil' has never been assigned, its  | ||||
| internal type is 'NIL' and  | ||||
|  | ||||
| @code | ||||
| function a () | ||||
| { | ||||
| 	x[10] = 2; | ||||
| 	return x; | ||||
| } | ||||
|  | ||||
| BEGIN { | ||||
| 	t = a(); | ||||
| 	if (t === nil) | ||||
| 		print "nil"; | ||||
| 	else | ||||
| 		print "ok"; | ||||
| } | ||||
| @endcode. | ||||
|  | ||||
| The !== operator is a negated form of the === operator. | ||||
|  | ||||
|  | ||||
| @subsection awk_ext_vardecl VARIABLE DECLARATION | ||||
|  | ||||
| #QSE_AWK_EXPLICIT enables variable declaration. Variables declared are accessed | ||||
| directly bypassing the global named map that stores undeclared variables. | ||||
| The keyword @b global introduces a global variable and the keyword @b local  | ||||
| introduces local variable. Local variable declaraion in a block must be  | ||||
| located before an expression or a statement appears. | ||||
|  | ||||
| @code | ||||
| global g1, g2; #declares two global variables g1 and g2 | ||||
|  | ||||
| BEGIN { | ||||
| 	local a1, a2, a3; # declares three local variables  | ||||
|  | ||||
| 	g1 = 300; a1 = 200; | ||||
|  | ||||
| 	{ | ||||
| 		local a1; # a1 here hides the a1 at the outer scope | ||||
| 		local g1; # g1 here hides the global g1 | ||||
| 		a1 = 10; g1 = 5; | ||||
| 		print a1, g1; # it prints 10 and 5 | ||||
| 	} | ||||
|  | ||||
| 	print a1, g1; # it prints 200 and 300 | ||||
| } | ||||
|  | ||||
| @endcode | ||||
|  | ||||
| However, turning on #QSE_AWK_EXPLICIT does not disable named variables. | ||||
| To disable named variables, you must turn off #QSE_AWK_IMPLICIT. | ||||
|  | ||||
| @subsection awk_ext_include INCLUDE | ||||
|  | ||||
| The \@include directive inserts the contents of the object specified in the | ||||
| following string, typically a file name, as if they appeared in the source | ||||
| stream being processed. The directive can only be used at the outmost scope  | ||||
| where global variable declarations, @b BEGIN, @b END, and/or pattern-action  | ||||
| blocks appear. To use \@include, you must turn on #QSE_AWK_INCLUDE. | ||||
|  | ||||
| @code | ||||
| @include "abc.awk" | ||||
| BEGIN { func_in_abc (); } | ||||
| @endcode | ||||
|  | ||||
| A semicolon is optional after the included file name. The following is the  | ||||
| same as the sample above. | ||||
| @code | ||||
| @include "abc.awk"; | ||||
| BEGIN { func_in_abc(); } | ||||
| @endcode | ||||
|  | ||||
| If #QSE_AWK_NEWLINE is off, the semicolon is required. | ||||
|  | ||||
|  | ||||
| @subsection awk_ext_funcall FUNCTIONC CALL | ||||
|  | ||||
|  | ||||
| name(1); | ||||
| if there is no space between 'name' and the left parenthesis, the  | ||||
| name is treated as a function name. | ||||
|  | ||||
| name (1); | ||||
| If there is a space, the name is treated as a function name if the  | ||||
| name has been declared as the function or if #QSE_AWK_IMPLICIT is on, | ||||
| it may be 'name' concatenated with the expression in the parentheses. | ||||
|  | ||||
| The following is a valid program. | ||||
| @code | ||||
| @pragma implicit off | ||||
| BEGIN { name (1); } | ||||
| function name(a) { print a; }' | ||||
| @endcode | ||||
|  | ||||
| However, in this program, the first 'name' becomes a named global variable. | ||||
| so the function declaration with 'name' triggers the variable redefinition  | ||||
| error. | ||||
| @pragma implicit on | ||||
| BEGIN { name (1); } | ||||
| function name(a) { print a; }' | ||||
| @endcode | ||||
|  | ||||
| @subsection awk_ext_print EXTENDED PRINT/PRINTF | ||||
| When #QSE_AWK_TOLERANT is on, print and printf are treated as if | ||||
| they are function calls.  In this mode, they return a negative number | ||||
| on failure and a zero on success and any I/O failure doesn't abort | ||||
| a running program.  | ||||
|  | ||||
| @code | ||||
| BEGIN { | ||||
| 	a = print "hello, world" > "/dev/null"; | ||||
| 	print a;	 | ||||
| 	a = print ("hello, world") > "/dev/null"; | ||||
| 	print a;	 | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| Since print and printf are like function calls, you can use them | ||||
| in any context where a normal expression is allowed. For example, | ||||
| printf is used as a conditional expression in an 'if' statement  | ||||
| in the sample code below. | ||||
| @code | ||||
| BEGIN { | ||||
| 	if ((printf "hello, world\n" || "tcp://127.0.0.1:9999") <= -1) | ||||
| 		print "FAILURE"; | ||||
| 	else | ||||
| 		print "SUCCESS"; | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| @subsection awk_ext_exprgroup GROUPED EXPRESSION | ||||
| When #QSE_AWK_TOLERANT is on, you can use a grouped expression without | ||||
| the 'in' operator. A grouped expression is a parentheses-enclosed list | ||||
| of expressions separated with a comma. Each expression in the group is | ||||
| evaluated in the appearing order. The evaluation result of the last  | ||||
| expression in the group is returned as that of the group. | ||||
|  | ||||
| @code | ||||
| BEGIN { | ||||
| 	c = (1, 2, 9); | ||||
| 	a=((1*c, 3*c), (3 - c), ((k = 6+(c+1, c+2)), (-7 * c))); | ||||
| 	print c; # 9; | ||||
| 	print a; # -63	 | ||||
| 	print k; # 17 | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| @subsection awk_ext_rwpipe TWO-WAY PIPE | ||||
|  | ||||
| The two-way pipe indicated by @b || is supproted, in addition to the one-way  | ||||
| pipe indicated by @b |. Turn on #QSE_AWK_RWPIPE to enable the two-way pipe. | ||||
|  | ||||
| @code | ||||
| BEGIN { | ||||
| 	print "15" || "sort"; | ||||
| 	print "14" || "sort"; | ||||
| 	print "13" || "sort"; | ||||
| 	print "12" || "sort"; | ||||
| 	print "11" || "sort"; | ||||
| 	# close the input side of the pipe as 'sort' starts emitting result  | ||||
| 	# once the input is closed. | ||||
| 	close ("sort", "r"); | ||||
| 	while (("sort" || getline x) > 0) print "xx:", x;  | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| This two-way pipe can create a TCP or UDP connection if the pipe command | ||||
| string is prefixed with one of the followings: | ||||
|  | ||||
| - tcp:// - establishes a TCP connection to a specified IP address/port. | ||||
| - udp:// - establishes a TCP connection to a specified IP address/port. | ||||
| - tcpd:// - binds a TCP socket to a specified IP address/port and waits for the first connection. | ||||
| - udpd:// - binds a TCP socket to a specified IP address/port and waits for the first sender. | ||||
|  | ||||
| @code | ||||
| BEGIN {  | ||||
| 	# it binds a TCP socket to the IPv6 address :: and the port number  | ||||
| 	# 9999 and waits for the first coming connection. It repeats writing | ||||
| 	# "hello world" to the first connected peer and reading a line from | ||||
| 	# it until the session is torn down. | ||||
| 	do {  | ||||
| 		print "hello world" || "tcpd://[::]:9999";  | ||||
| 		if (("tcpd://[::]:9999" || getline x) <= 0) break;  | ||||
| 		print x;  | ||||
| 	}  | ||||
| 	while(1);   | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| You can specify TCP or UDP timeouts for connection, accepting, reading, and  | ||||
| writing with setioattr (pipe-name, timeout-name, timeout-value). timeout-name  | ||||
| should be one of "ctimeout", "atimeout", "rtimeout", and "wtimeout".  | ||||
| timeout-value is a number specifying the actual timeout in milliseconds.  | ||||
| A negative value indicates no timeout.  | ||||
|  | ||||
| You can call getioattr (pipe-name, timeout-name) to get the current  | ||||
| timeout-value set. | ||||
|  | ||||
| See the example below. | ||||
|  | ||||
| @code | ||||
| BEGIN {  | ||||
| 	setioattr ("tcp://127.0.0.1:9999", "ctimeout", 3000); | ||||
| 	setioattr ("tcp://127.0.0.1:9999", "rtimeout", 5000); | ||||
| 	print "hello world" || "tcp://127.0.0.1:9999";  | ||||
| 	"tcp://127.0.0.1:9999" || getline x;  | ||||
| 	print x; | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| Here is a more interesting example adopting Michael Sanders' | ||||
| AWK web server, modified for QSEAWK. | ||||
|  | ||||
| @code | ||||
| #  | ||||
| # Michael Sanders' AWK web server for QSEAWK. | ||||
| # Orginal code in http://awk.info/?tools/server | ||||
| # | ||||
| # qseawk --tolerant=on --rwpipe=on webserver.awk | ||||
| # | ||||
| BEGIN { | ||||
|   x        = 1                         # script exits if x < 1  | ||||
|   port     = 8080                      # port number  | ||||
|   host     = "tcpd://0.0.0.0:" port    # host string  | ||||
|   url      = "http://localhost:" port  # server url  | ||||
|   status   = 200                       # 200 == OK  | ||||
|   reason   = "OK"                      # server response  | ||||
|   RS = ORS = "\r\n"                    # header line terminators  | ||||
|   doc      = Setup()                   # html document  | ||||
|   len      = length(doc) + length(ORS) # length of document  | ||||
|   while (x) { | ||||
|      if ($1 == "GET") RunApp(substr($2, 2)) | ||||
|      if (! x) break | ||||
|      print "HTTP/1.0", status, reason || host | ||||
|      print "Connection: Close"        || host | ||||
|      print "Pragma: no-cache"         || host | ||||
|      print "Content-length:", len     || host | ||||
|      print ORS doc                    || host | ||||
|      close(host)     # close client connection  | ||||
|      host || getline # wait for new client request  | ||||
|   } | ||||
|   # server terminated...  | ||||
|   doc = Bye() | ||||
|   len = length(doc) + length(ORS) | ||||
|   print "HTTP/1.0", status, reason || host | ||||
|   print "Connection: Close"        || host | ||||
|   print "Pragma: no-cache"         || host | ||||
|   print "Content-length:", len     || host | ||||
|   print ORS doc                    || host | ||||
|   close(host) | ||||
| } | ||||
|  | ||||
| function Setup() { | ||||
|   tmp = "<html>\ | ||||
|   <head><title>Simple gawk server</title></head>\ | ||||
|   <body>\ | ||||
|   <p><a href=" url "/xterm>xterm</a>\ | ||||
|   <p><a href=" url "/xcalc>xcalc</a>\ | ||||
|   <p><a href=" url "/xload>xload</a>\ | ||||
|   <p><a href=" url "/exit>terminate script</a>\ | ||||
|   </body>\ | ||||
|   </html>" | ||||
|   return tmp | ||||
| } | ||||
|  | ||||
| function Bye() { | ||||
|   tmp = "<html>\ | ||||
|   <head><title>Simple gawk server</title></head>\ | ||||
|   <body><p>Script Terminated...</body>\ | ||||
|   </html>" | ||||
|   return tmp | ||||
| } | ||||
|  | ||||
| function RunApp(app) { | ||||
|   if (app == "xterm")  {system("xterm&"); return} | ||||
|   if (app == "xcalc" ) {system("xcalc&"); return} | ||||
|   if (app == "xload" ) {system("xload&"); return} | ||||
|   if (app == "exit")   {x = 0} | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| @subsection awk_ext_return RETURN | ||||
| The return statement is valid in pattern-action blocks as well as in functions. | ||||
| The execution of a calling block is aborted once the return statement is executed. | ||||
|  | ||||
| @code | ||||
| $ qseawk 'BEGIN { return 20; }' ; echo $? | ||||
| 20 | ||||
| @endcode | ||||
|  | ||||
| If #QSE_AWK_MAPTOVAR is on, you can return an arrayed value from a function. | ||||
| @code | ||||
| function getarray() { | ||||
| 	local a; | ||||
| 	a["one"] = 1; | ||||
| 	a["two"] = 2; | ||||
| 	a["three"] = 3; | ||||
| 	return a; | ||||
| } | ||||
|  | ||||
| BEGIN { | ||||
| 	local x; | ||||
|  | ||||
| 	x = getarray(); | ||||
| 	for (i in x) print i, x[i]; | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| @subsection awk_ext_reset RESET | ||||
| The reset statement resets an array variable back to the initial state. | ||||
| After that, the array variable can also be used as a scalar variable again. | ||||
| You must have #QSE_AWK_RESET on to be able to be able to use this  | ||||
| statement. | ||||
|  | ||||
| @code | ||||
| BEGIN { | ||||
| 	a[1] = 20; | ||||
| 	reset a; | ||||
| 	a = 20; # this is legal | ||||
| 	print a; | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| @subsection awk_ext_abort ABORT | ||||
| The abort statment is similar to the exit statement except that | ||||
| it skips executing the END block. You must have #QSE_AWK_ABORT on to be | ||||
| able to use this statement. | ||||
|  | ||||
| @code | ||||
| BEGIN { | ||||
| 	print "--- BEGIN ---"; | ||||
| 	abort 10; | ||||
| } | ||||
| END { | ||||
| 	print "--- END ---"; # this must not be printed | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| @subsection awk_ext_comment COMMENT | ||||
| You can use the C-style comment as well as the pound comment. | ||||
|  | ||||
| @subsection awk_ext_fnc EXTENDED FUNCTIONS | ||||
| index() and match() can accept the third parameter indicating the position  | ||||
| where the search begins. A negative value indicates a position from the back. | ||||
|  | ||||
| @code | ||||
| BEGIN { | ||||
| 	xstr = "abcdefabcdefabcdef"; | ||||
| 	xsub = "abc"; | ||||
| 	xlen = length(xsub); | ||||
|  | ||||
| 	i = 1; | ||||
| 	while ((i = index(xstr, xsub, i)) > 0) | ||||
| 	{ | ||||
| 		print i, substr(xstr, i, xlen); | ||||
| 		i += xlen; | ||||
| 	} | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| @subsection awk_ext_fs EXTENDED FS | ||||
|  | ||||
| If the value for FS begins with a question mark followed by 4  | ||||
| additional letters, QSEAWK can split a record with quoted fields  | ||||
| delimited by a single-letter separator. | ||||
|  | ||||
| The 4 additional letters are composed of a field separator, | ||||
| an escaper, a opening quote, and a closing quote. | ||||
|  | ||||
| @code | ||||
| $ cat x.awk | ||||
| BEGIN { FS="?:\\[]"; } | ||||
| { | ||||
|      for (i = 1; i <= NF; i++) | ||||
|           print "$" i ": " $i; | ||||
|      print "---------------"; | ||||
| } | ||||
| @endcode | ||||
|  | ||||
| The value of FS above means the following. | ||||
| - : is a field separator. | ||||
| - a backslash is an escaper. | ||||
| - a left bracket is an opening quote. | ||||
| - a right bracket is a closing quote. | ||||
|  | ||||
| See the following output. | ||||
| @code | ||||
| $ cat x.dat | ||||
| [fx1]:[fx2]:[f\[x\]3] | ||||
| abc:def:[a b c] | ||||
| $ qseawk -f x.awk x.dat | ||||
| $1: fx1 | ||||
| $2: fx2 | ||||
| $3: f[x]3 | ||||
| --------------- | ||||
| $1: abc | ||||
| $2: def | ||||
| $3: a b c | ||||
| --------------- | ||||
| @endcode | ||||
| 	 | ||||
|  | ||||
|  | ||||
| @subsection awk_ext_binnum BINARY NUMBER | ||||
| Use 0b to begin a binary number sequence. | ||||
|  | ||||
| @code  | ||||
| $ qseawk 'BEGIN { printf ("%b %o %d %x\n", 0b1101, 0b1101, 0b1101, 0b1101); }' | ||||
| 1101 15 13 d | ||||
| @endcode | ||||
|  | ||||
|  | ||||
|  | ||||
| @subsection awk_ext_unicode UNICODE ESCAPE SEQUENCE  | ||||
|  | ||||
| If QSE is compiled for #QSE_CHAR_IS_WCHAR, you can use \\u and \\U in a  | ||||
| string to specify a character by unicode. | ||||
|  | ||||
| @code | ||||
| $ qseawk 'BEGIN { print "\uC720\uB2C8\uCF54\uB4DC \U00007D71\U00004E00\U000078BC"; }' | ||||
| 유니코드 統一碼 | ||||
| @endcode | ||||
|  | ||||
|  | ||||
| @subsection awk_ext_ioenc I/O ENCODING | ||||
| You can call setioattr() to set the character encoding of a stream resource  | ||||
| like a pipe or a file. See qse_findcmgr() for a list of supported encoding names. | ||||
|  | ||||
| Let's say you run this simple echoing script on a WIN32 platform that has | ||||
| the active code page of 949 and is reachable at the IP address 192.168.2.8. | ||||
|  | ||||
| @code | ||||
| C:\> chcp | ||||
| Active code page: 949 | ||||
| C:\> type s.awk | ||||
| BEGIN { | ||||
|      sock = "tcpd://0.0.0.0:9999"; | ||||
|      setioattr (sock, "codepage", "cp949"); # this is not needed since the active  | ||||
|                                             # code page is already 949. | ||||
|      do { | ||||
|  | ||||
|           if ((sock || getline x) <= 0) break; | ||||
|           print "PEER: " x; | ||||
|           print x || sock; | ||||
|      } | ||||
|      while(1); | ||||
| } | ||||
| C:\> qseawk --rwpipe=on -f r.awk | ||||
| PEER: 안녕 | ||||
| PEER: ?好! | ||||
| @endcode | ||||
|  | ||||
| Now you run the following script on a UTF-8 console of a Linux box. | ||||
|  | ||||
| @code | ||||
| $ echo $LANG | ||||
| en_US.UTF-8 | ||||
| $ cat  c.awk | ||||
| BEGIN { | ||||
|      peer = "tcp://192.168.2.8:9999"; | ||||
|      setioattr (peer, "codepage", "cp949"); | ||||
|      do | ||||
|      { | ||||
|           printf "> "; | ||||
|           if ((getline x) <= 0) break; | ||||
|           print x || peer; | ||||
|           if ((peer || getline line) <= -1) break; | ||||
|           print "PEER: " line; | ||||
|      } | ||||
|      while (1); | ||||
| } | ||||
| $ qseawk --rwpipe=on -f c.awk | ||||
| > 안녕 | ||||
| PEER: 안녕 | ||||
| > 你好! | ||||
| PEER: ?好! | ||||
| @endcode | ||||
|  | ||||
| Note that 你 has been converted to a question mark since the letter is | ||||
| not supported by cp949. | ||||
|  | ||||
| */ | ||||
|  | ||||
| @ -1,5 +1,5 @@ | ||||
| /** @mainpage QSE | ||||
| 
 | ||||
| QSE                                       {#mainpage} | ||||
| =================== | ||||
| @image html qse-logo.png  | ||||
| 
 | ||||
| @section qse_intro INTRODUCTION | ||||
| @ -28,6 +28,7 @@ See the subpages for various modules available in this library. | ||||
| - @subpage io  "I/O Handling" | ||||
| - @subpage awk "AWK Interpreter"  | ||||
| - @subpage sed "SED Stream Editor"  | ||||
| - @subpage awk-lang "QSEAWK Language"  | ||||
| 
 | ||||
| @section installation INSTALLATION | ||||
| 
 | ||||
| @ -100,4 +101,3 @@ Under the wide character mode: | ||||
| - #qse_char_t maps to #qse_wchar_t. | ||||
| 
 | ||||
| #qse_mchar_t maps to @b char and #qse_wchar_t maps to @b wchar_t or equivalent. | ||||
| */ | ||||
| @ -24,15 +24,8 @@ | ||||
| #include <qse/types.h> | ||||
| #include <qse/macros.h> | ||||
|  | ||||
| /** @file | ||||
|  *  The file provides interface to a stx interpreter. | ||||
|  */ | ||||
|  | ||||
| typedef struct qse_stx_t qse_stx_t; | ||||
|  | ||||
| /**  | ||||
|  * The qse_stx_loc_t defines a structure to store location information. | ||||
|  */ | ||||
| struct qse_stx_loc_t | ||||
| { | ||||
| 	const qse_char_t* file; /**< file */ | ||||
| @ -41,9 +34,6 @@ struct qse_stx_loc_t | ||||
| }; | ||||
| typedef struct qse_stx_loc_t qse_stx_loc_t; | ||||
|  | ||||
| /** | ||||
|  * The qse_stx_io_cmd_t type defines I/O commands. | ||||
|  */ | ||||
| enum qse_stx_io_cmd_t | ||||
| { | ||||
| 	QSE_STX_IO_OPEN   = 0, | ||||
| @ -53,9 +43,6 @@ enum qse_stx_io_cmd_t | ||||
| }; | ||||
| typedef enum qse_stx_io_cmd_t qse_stx_io_cmd_t; | ||||
|  | ||||
| /** | ||||
|  * The qse_stx_io_arg_t type defines a data structure for an I/O handler. | ||||
|  */ | ||||
| struct qse_stx_io_arg_t | ||||
| { | ||||
| 	void*             handle; | ||||
| @ -63,9 +50,6 @@ struct qse_stx_io_arg_t | ||||
| }; | ||||
| typedef struct qse_stx_io_arg_t qse_stx_io_arg_t; | ||||
|  | ||||
| /** | ||||
|  * The qse_stx_io_impl_t type defines an I/O handler function. | ||||
|  */ | ||||
| typedef qse_ssize_t (*qse_stx_io_impl_t) ( | ||||
| 	qse_stx_t*        stx, | ||||
| 	qse_stx_io_cmd_t  cmd, | ||||
| @ -74,9 +58,6 @@ typedef qse_ssize_t (*qse_stx_io_impl_t) ( | ||||
| 	qse_size_t        count | ||||
| ); | ||||
|  | ||||
| /** | ||||
|  * The qse_stx_io_t type defines a I/O handler set. | ||||
|  */ | ||||
| struct qse_stx_io_t | ||||
| { | ||||
| 	qse_stx_io_impl_t in; | ||||
| @ -84,9 +65,6 @@ struct qse_stx_io_t | ||||
| }; | ||||
| typedef struct qse_stx_io_t qse_stx_io_t; | ||||
|  | ||||
| /** | ||||
|  * The qse_stx_errnum_t type defines error numbers. | ||||
|  */ | ||||
| enum qse_stx_errnum_t | ||||
| { | ||||
| 	QSE_STX_ENOERR, | ||||
| @ -189,21 +167,11 @@ void qse_stx_seterror ( | ||||
| 	const qse_stx_loc_t* errloc  /**< error location */ | ||||
| ); | ||||
|  | ||||
| /** | ||||
|  * The qse_stx_attachio() function attaches I/O handlers. | ||||
|  * Upon attachment, it opens input and output streams by calling | ||||
|  * the I/O handlers with the #QSE_STX_IO_OPEN command.  | ||||
|  */ | ||||
| int qse_stx_attachio ( | ||||
| 	qse_stx_t*    stx,  /**< stx */ | ||||
| 	qse_stx_io_t* io    /**< I/O handler set */ | ||||
| ); | ||||
|  | ||||
| /** | ||||
|  * The qse_stx_detachio() function detaches I/O handlers. | ||||
|  * It closes the streams for both input and output by calling the I/O handlers | ||||
|  * with the #QSE_STX_IO_CLOSE command. | ||||
|  */ | ||||
| void qse_stx_detachio ( | ||||
| 	qse_stx_t* stx   /**< stx */ | ||||
| ); | ||||
|  | ||||
| @ -381,6 +381,7 @@ static void* custom_awk_modopen (qse_awk_t* awk, const qse_awk_mod_spec_t* spec) | ||||
| 	const qse_char_t* tmp[4]; | ||||
| 	int count; | ||||
| 	UCHAR errbuf[CCHMAXPATH]; | ||||
| 	APIRET rc; | ||||
|  | ||||
| 	count = 0; | ||||
| 	if (spec->prefix) tmp[count++] = spec->prefix; | ||||
| @ -399,7 +400,10 @@ static void* custom_awk_modopen (qse_awk_t* awk, const qse_awk_mod_spec_t* spec) | ||||
| 		return QSE_NULL; | ||||
| 	} | ||||
|  | ||||
| 	if (DosLoadModule (errbuf, QSE_COUNTOF(errbuf) - 1, modpath, &h) != NO_ERROR) h = QSE_NULL; | ||||
| 	/* DosLoadModule() seems to have severe limitation on  | ||||
| 	 * the file name it can load (max-8-letters.xxx) */ | ||||
| 	rc = DosLoadModule (errbuf, QSE_COUNTOF(errbuf) - 1, modpath, &h); | ||||
| 	if (rc != NO_ERROR) h = QSE_NULL; | ||||
|  | ||||
| 	QSE_MMGR_FREE (awk->mmgr, modpath); | ||||
|  | ||||
|  | ||||
		Reference in New Issue
	
	Block a user