fixed a parser bug when handling the print/printf statement without redirection.

fixed a bug where a single dot was converted to 0 when passed via -v (e.g. -vTDIR=.) added more test cases
2020-12-01 10:22:17 +00:00 · 2020-12-01 10:22:17 +00:00 · 2dd6029064
commit 2dd6029064
parent 35829a524b
20 changed files with 64957 additions and 50 deletions
--- a/hawk/Makefile.in
+++ b/hawk/Makefile.in
@ -170,8 +170,8 @@ am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/ac/ar-lib \
 	$(top_srcdir)/ac/config.sub $(top_srcdir)/ac/install-sh \
 	$(top_srcdir)/ac/ltmain.sh $(top_srcdir)/ac/missing \
 	$(top_srcdir)/pkgs/hawk.spec.in ac/ar-lib ac/compile \
-	ac/config.guess ac/config.sub ac/install-sh ac/ltmain.sh \
+	ac/config.guess ac/config.sub ac/depcomp ac/install-sh \
-	ac/missing
+	ac/ltmain.sh ac/missing
 DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
 distdir = $(PACKAGE)-$(VERSION)
 top_distdir = $(distdir)
@ -354,7 +354,6 @@ pdfdir = @pdfdir@
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
 runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
--- a/hawk/bin/Makefile.in
+++ b/hawk/bin/Makefile.in
@ -323,7 +323,6 @@ pdfdir = @pdfdir@
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
 runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
--- a/hawk/configure
+++ b/hawk/configure
@ -783,7 +783,6 @@ infodir
 docdir
 oldincludedir
 includedir
 runstatedir
 localstatedir
 sharedstatedir
 sysconfdir
@ -884,7 +883,6 @@ datadir='${datarootdir}'
 sysconfdir='${prefix}/etc'
 sharedstatedir='${prefix}/com'
 localstatedir='${prefix}/var'
 runstatedir='${localstatedir}/run'
 includedir='${prefix}/include'
 oldincludedir='/usr/include'
 docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
@ -1137,15 +1135,6 @@ do
  | -silent | --silent | --silen | --sile | --sil)
    silent=yes ;;
  -runstatedir | --runstatedir | --runstatedi | --runstated \
  | --runstate | --runstat | --runsta | --runst | --runs \
  | --run | --ru | --r)
    ac_prev=runstatedir ;;
  -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
  | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
  | --run=* | --ru=* | --r=*)
    runstatedir=$ac_optarg ;;
  -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
    ac_prev=sbindir ;;
  -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
@ -1283,7 +1272,7 @@ fi
 for ac_var in	exec_prefix prefix bindir sbindir libexecdir datarootdir \
 		datadir sysconfdir sharedstatedir localstatedir includedir \
 		oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
-		libdir localedir mandir runstatedir
+		libdir localedir mandir
 do
  eval ac_val=\$$ac_var
  # Remove trailing slashes.
@ -1436,7 +1425,6 @@ Fine tuning of the installation directories:
  --sysconfdir=DIR        read-only single-machine data [PREFIX/etc]
  --sharedstatedir=DIR    modifiable architecture-independent data [PREFIX/com]
  --localstatedir=DIR     modifiable single-machine data [PREFIX/var]
  --runstatedir=DIR       modifiable per-process data [LOCALSTATEDIR/run]
  --libdir=DIR            object code libraries [EPREFIX/lib]
  --includedir=DIR        C header files [PREFIX/include]
  --oldincludedir=DIR     C header files for non-gcc [/usr/include]
@ -18080,7 +18068,7 @@ else
    We can't simply define LARGE_OFF_T to be 9223372036854775807,
    since some C++ compilers masquerading as C compilers
    incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
  int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@ -18126,7 +18114,7 @@ else
    We can't simply define LARGE_OFF_T to be 9223372036854775807,
    since some C++ compilers masquerading as C compilers
    incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
  int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@ -18150,7 +18138,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
    We can't simply define LARGE_OFF_T to be 9223372036854775807,
    since some C++ compilers masquerading as C compilers
    incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
  int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@ -18195,7 +18183,7 @@ else
    We can't simply define LARGE_OFF_T to be 9223372036854775807,
    since some C++ compilers masquerading as C compilers
    incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
  int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@ -18219,7 +18207,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
    We can't simply define LARGE_OFF_T to be 9223372036854775807,
    since some C++ compilers masquerading as C compilers
    incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
  int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
--- a/hawk/lib/Makefile.in
+++ b/hawk/lib/Makefile.in
@ -561,7 +561,6 @@ pdfdir = @pdfdir@
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
 runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
--- a/hawk/lib/hawk-prv.h
+++ b/hawk/lib/hawk-prv.h
@ -208,6 +208,7 @@ typedef struct hawk_tok_t hawk_tok_t;
 struct hawk_tok_t
 {
 	int           type;
 	int           flags;
 	hawk_ooecs_t* name;
 	hawk_loc_t    loc;
 };
--- a/hawk/lib/parse.c
+++ b/hawk/lib/parse.c
@ -69,6 +69,7 @@
 #define FMT_EUNDEF        HAWK_T("undefined identifier '%.*js'")
 #define FMT_EXKWNR        HAWK_T("'%.*js' not recognized")
 #define TOK_FLAGS_LPAREN_CLOSER (1 << 0)
 enum tok_t
 {
@ -3117,7 +3118,7 @@ static hawk_nde_t* parse_print (hawk_t* hawk, const hawk_loc_t* xloc)
 		}
 		eloc = hawk->tok.loc;
-		args = parse_expr_withdc (hawk, &eloc);
+		args = parse_expr_withdc(hawk, &eloc);
 		if (args == HAWK_NULL) goto oops;
 		args_tail = args;
@ -3150,7 +3151,7 @@ static hawk_nde_t* parse_print (hawk_t* hawk, const hawk_loc_t* xloc)
 				}
 				eloc = hawk->tok.loc;
-				args_tail->next = parse_expr_withdc (hawk, &eloc);
+				args_tail->next = parse_expr_withdc(hawk, &eloc);
 				if (args_tail->next == HAWK_NULL) goto oops;
 				tail_prev = args_tail;
@ -3184,7 +3185,7 @@ static hawk_nde_t* parse_print (hawk_t* hawk, const hawk_loc_t* xloc)
 		 * print 1, 2, (3 > 4) > 5;
 		 * print 1, 2, (3 > 4) > 5 + 6;
 		 */
-		if (in_parens == 1 && hawk->ptok.type == TOK_RPAREN && 
+		if (in_parens == 1 && hawk->ptok.type == TOK_RPAREN && (hawk->ptok.flags & TOK_FLAGS_LPAREN_CLOSER) &&
 		    hawk->parse.lparen_last_closed == opening_lparen_seq) 
 		{
 			in_parens = 2; /* confirmed */
@ -3192,6 +3193,10 @@ static hawk_nde_t* parse_print (hawk_t* hawk, const hawk_loc_t* xloc)
 		if (in_parens != 2 && gm_in_parens != 2 && args_tail->type == HAWK_NDE_EXP_BIN)
 		{
 			/* check if the expression ends with an output redirector
 			 * and take out the part after the redirector and make it
 			 * the output target */
 			int i;
 			hawk_nde_exp_t* ep = (hawk_nde_exp_t*)args_tail;
 			static struct 
@ -3233,8 +3238,7 @@ static hawk_nde_t* parse_print (hawk_t* hawk, const hawk_loc_t* xloc)
 					tmp = args_tail;
-					if (tail_prev != HAWK_NULL) 
+					if (tail_prev) tail_prev->next = ep->left;
 						tail_prev->next = ep->left;
 					else args = ep->left;
 					out = ep->right;
@ -3261,7 +3265,7 @@ static hawk_nde_t* parse_print (hawk_t* hawk, const hawk_loc_t* xloc)
 			if (get_token(hawk) <= -1) goto oops;
 			eloc = hawk->tok.loc;
-			out = parse_expr_withdc (hawk, &eloc);
+			out = parse_expr_withdc(hawk, &eloc);
 			if (out == HAWK_NULL) goto oops;
 		}
 	}
@ -3272,8 +3276,8 @@ static hawk_nde_t* parse_print (hawk_t* hawk, const hawk_loc_t* xloc)
 		goto oops;
 	}
-	nde = (hawk_nde_print_t*) hawk_callocmem (hawk, HAWK_SIZEOF(*nde));
+	nde = (hawk_nde_print_t*)hawk_callocmem(hawk, HAWK_SIZEOF(*nde));
-	if (nde == HAWK_NULL) 
+	if (HAWK_UNLIKELY(!nde)) 
 	{
 		ADJERR_LOC (hawk, xloc);
 		goto oops;
@ -3621,7 +3625,7 @@ static hawk_nde_t* parse_expr (hawk_t* hawk, const hawk_loc_t* xloc)
 	{
 		hawk_loc_t eloc;
 		eloc = hawk->tok.loc;
-		y = parse_expr_withdc (hawk, &eloc);
+		y = parse_expr_withdc(hawk, &eloc);
 	}
 	if (y == HAWK_NULL) 
 	{
@ -4944,8 +4948,9 @@ static hawk_nde_t* parse_primary_lparen (hawk_t* hawk, const hawk_loc_t* xloc)
 	}
 	/* remember the sequence number of the left parenthesis
-	 * that' been just closed by the matching right parenthesis */
+	 * that's been just closed by the matching right parenthesis */
 	hawk->parse.lparen_last_closed = opening_lparen_seq;
 	hawk->tok.flags |= TOK_FLAGS_LPAREN_CLOSER; /* indicate that this RPAREN is closing LPAREN */
 	if (get_token(hawk) <= -1) goto oops;
@ -5151,7 +5156,7 @@ static hawk_nde_t* parse_primary_nopipe (hawk_t* hawk, const hawk_loc_t* xloc)
 			    (MATCH(hawk,TOK_PRINT) || MATCH(hawk,TOK_PRINTF)))
 			{
 				if (get_token(hawk) <= -1) return HAWK_NULL;
-				return parse_print (hawk, xloc);
+				return parse_print(hawk, xloc);
 			}
 			/* valid expression introducer is expected */
@ -6635,6 +6640,7 @@ retry:
 	while (n >= 1);
 	hawk_ooecs_clear (tok->name);
 	tok->flags = 0;
 	tok->loc.file = hawk->sio.last.file;
 	tok->loc.line = hawk->sio.last.line;
 	tok->loc.colm = hawk->sio.last.colm;
@ -6895,6 +6901,7 @@ retry:
 static int get_token (hawk_t* hawk)
 {
 	hawk->ptok.type = hawk->tok.type;
 	hawk->ptok.flags = hawk->tok.flags;
 	hawk->ptok.loc.file = hawk->tok.loc.file;
 	hawk->ptok.loc.line = hawk->tok.loc.line;
 	hawk->ptok.loc.colm = hawk->tok.loc.colm;
@ -6903,6 +6910,7 @@ static int get_token (hawk_t* hawk)
 	if (HAWK_OOECS_LEN(hawk->ntok.name) > 0)
 	{
 		hawk->tok.type = hawk->ntok.type;
 		hawk->tok.flags = hawk->ntok.flags;
 		hawk->tok.loc.file = hawk->ntok.loc.file;
 		hawk->tok.loc.line = hawk->ntok.loc.line;
 		hawk->tok.loc.colm = hawk->ntok.loc.colm;	
@ -6913,7 +6921,7 @@ static int get_token (hawk_t* hawk)
 		return 0;
 	}
-	return get_token_into (hawk, &hawk->tok);
+	return get_token_into(hawk, &hawk->tok);
 }
 static int preget_token (hawk_t* hawk)
@ -6956,7 +6964,7 @@ static int preget_token (hawk_t* hawk)
 	{
 		/* if there is no token pre-read, we get a new
 		 * token and place it to hawk->ntok. */ 
-		return get_token_into (hawk, &hawk->ntok);
+		return get_token_into(hawk, &hawk->ntok);
 	}
 }
--- a/hawk/lib/val.c
+++ b/hawk/lib/val.c
@ -724,10 +724,13 @@ hawk_val_t* hawk_rtx_makenumorstrvalwithuchars (hawk_rtx_t* rtx, const hawk_uch_
 	hawk_int_t l;
 	hawk_flt_t r;
 	if (ptr[0] == '.' && len == 1) goto make_str;
 	x = hawk_uchars_to_num(HAWK_OOCHARS_TO_NUM_MAKE_OPTION(1, 1, HAWK_RTX_IS_STRIPSTRSPC_ON(rtx), 0), ptr, len, &l, &r);
 	if (x == 0) return hawk_rtx_makeintval(rtx, l);
 	else if (x >= 1) return hawk_rtx_makefltval(rtx, r);
 make_str:
 	return hawk_rtx_makestrvalwithuchars(rtx, ptr, len);
 }
@ -737,10 +740,13 @@ hawk_val_t* hawk_rtx_makenumorstrvalwithbchars (hawk_rtx_t* rtx, const hawk_bch_
 	hawk_int_t l;
 	hawk_flt_t r;
 	if (ptr[0] == '.' && len == 1) goto make_str;
 	x = hawk_bchars_to_num(HAWK_OOCHARS_TO_NUM_MAKE_OPTION(1, 1, HAWK_RTX_IS_STRIPSTRSPC_ON(rtx), 0), ptr, len, &l, &r);
 	if (x == 0) return hawk_rtx_makeintval(rtx, l);
 	else if (x >= 1) return hawk_rtx_makefltval(rtx, r);
 make_str:
 	return hawk_rtx_makestrvalwithbchars(rtx, ptr, len);
 }
--- a/hawk/mod/Makefile.in
+++ b/hawk/mod/Makefile.in
@ -379,7 +379,6 @@ pdfdir = @pdfdir@
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
 runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
--- a/hawk/samples/Makefile.in
+++ b/hawk/samples/Makefile.in
@ -353,7 +353,6 @@ pdfdir = @pdfdir@
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
 runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
--- a/hawk/t/Makefile.am
+++ b/hawk/t/Makefile.am
@ -13,9 +13,11 @@ LDFLAGS_COMMON=-L$(abs_builddir)/../lib  -L$(libdir)
 ## for more information.
 LIBADD_COMMON = ../lib/libhawk.la $(LIBM)
-check_SCRIPTS = h-001.hawk h-002.hawk
+check_SCRIPTS = h-001.hawk h-002.hawk h-003.hawk
 ##noinst_SCRIPTS = $(check_SCRIPTS)
-EXTRA_DIST = $(check_SCRIPTS) ensure.inc
+EXTRA_DIST = $(check_SCRIPTS) ensure.inc \
 	journal-toc.hawk journal-toc.in journal-toc.out journal-toc-html.out \
 	bibtex-to-html.hawk bibtex-to-html.out
 check_PROGRAMS = t-001 t-002 t-003 t-004 t-005 t-006
@ -57,4 +59,4 @@ TESTS = $(check_PROGRAMS) $(check_SCRIPTS)
 TEST_EXTENSIONS = .hawk
 HAWK_LOG_COMPILER = ../bin/hawk
-AM_HAWK_LOG_FLAGS = --modlibdirs=../lib/.libs:../mod/.libs -f
+AM_HAWK_LOG_FLAGS = -vTDIR=${abs_srcdir} --modlibdirs=../lib/.libs:../mod/.libs -f
--- a/hawk/t/Makefile.in
+++ b/hawk/t/Makefile.in
@ -558,7 +558,6 @@ pdfdir = @pdfdir@
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
 runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
@ -578,8 +577,11 @@ CPPFLAGS_COMMON = \
 CFLAGS_COMMON = 
 LDFLAGS_COMMON = -L$(abs_builddir)/../lib  -L$(libdir)
 LIBADD_COMMON = ../lib/libhawk.la $(LIBM)
-check_SCRIPTS = h-001.hawk h-002.hawk
+check_SCRIPTS = h-001.hawk h-002.hawk h-003.hawk
-EXTRA_DIST = $(check_SCRIPTS) ensure.inc
+EXTRA_DIST = $(check_SCRIPTS) ensure.inc \
 	journal-toc.hawk journal-toc.in journal-toc.out journal-toc-html.out \
 	bibtex-to-html.hawk bibtex-to-html.out
 t_001_SOURCES = t-001.c t.h
 t_001_CPPFLAGS = $(CPPFLAGS_COMMON)
 t_002_SOURCES = t-002.c t.h
@ -610,7 +612,7 @@ t_006_LDADD = $(LIBADD_COMMON)
 TESTS = $(check_PROGRAMS) $(check_SCRIPTS)
 TEST_EXTENSIONS = .hawk
 HAWK_LOG_COMPILER = ../bin/hawk
-AM_HAWK_LOG_FLAGS = --modlibdirs=../lib/.libs:../mod/.libs -f
+AM_HAWK_LOG_FLAGS = -vTDIR=${abs_srcdir} --modlibdirs=../lib/.libs:../mod/.libs -f
 all: all-am
 .SUFFIXES:
--- a/hawk/t/bibtex-to-html.hawk
+++ b/hawk/t/bibtex-to-html.hawk
@ -0,0 +1,545 @@
 # http://www.netlib.org/bibnet/tools/software/bibtex-to-html.awk
 #
 ### ====================================================================
 ###  @Awk-file{
 ###     author          = "Nelson H. F. Beebe",
 ###     version         = "1.02",
 ###     date            = "05 July 1997",
 ###     time            = "12:04:52 MDT",
 ###     filename        = "bibtex-to-html.awk",
 ###     address         = "Center for Scientific Computing
 ###                        Department of Mathematics
 ###                        University of Utah
 ###                        Salt Lake City, UT 84112
 ###                        USA",
 ###     telephone       = "+1 801 581 5254",
 ###     FAX             = "+1 801 581 4148",
 ###     URL             = "http://www.math.utah.edu/~beebe",
 ###     checksum        = "08699 482 2173 18348",
 ###     email           = "beebe@math.utah.edu (Internet)",
 ###     codetable       = "ISO/ASCII",
 ###     keywords        = "bibliography, BibTeX, HTML, World-Wide Web,
 ###                        WWW",
 ###     supported       = "yes",
 ###     docstring       = "This program converts BibTeX bibliographies
 ###                        to HTML, suitable for viewing on the
 ###                        World-Wide Web.
 ###
 ###                        The level of HTML produced is version 3.2,
 ###                        adopted 14-Jan-1997, and defined in the SGML
 ###                        document type definition (DTD) available at
 ###
 ###                            http://www.w3.org/MarkUp/Wilbur/HTML32.dtd
 ###
 ###                        and documented at
 ###
 ###                            http://www.w3.org/MarkUp/Wilbur/
 ###                            http://www.w3.org/TR/REC-html32.html
 ###
 ###                        HTML markup is added to provide hypertext
 ###                        links for:
 ###
 ###                            * all URLs in the BibTeX file, both in
 ###                              comments, and inside string values;
 ###                            * all bibliography entry crossref
 ###                              values;
 ###                            * all \cite{} references;
 ###                            * all @String{name = "value"} names.
 ###
 ###                        In addition, every BibTeX citation label in
 ###                        @Entry lines, and every @String name, will
 ###                        be marked as an HTML label, allowing
 ###                        hypertext links to each from elsewhere in
 ###                        the same HTML file, or from other HTML
 ###                        files.  In particular, every bibliography
 ###                        entry can be directly referenced by
 ###                        hypertext links from anywhere on the
 ###                        Internet.
 ###
 ###                        Each such linkable-name will be displayed
 ###                        in bold text to draw attention to the fact
 ###                        that it can be directly referenced by a
 ###                        suitable URL.  In principle, this should be
 ###                        an option that WWW browsers provide, but
 ###                        none that I have used currently do.
 ###
 ###                        Although no browsers to my knowledge yet
 ###                        provide the capability of partial
 ###                        downloading of HTML files, the possibility
 ###                        has been discussed for future versions of
 ###                        the HTTP protocol.  Such support would make
 ###                        it possible to construct bibliographies in
 ###                        electronic documents as links to large
 ###                        bibliography database files, without the
 ###                        browser having to load the entire database,
 ###                        but just individual entries.  Since these
 ###                        in turn can have URLs that point to other
 ###                        electronic sources of the publication, a
 ###                        reader could easily follow links from a
 ###                        publication to a bibliography and then to
 ###                        abstracts and to the complete original
 ###                        text.  Some journals, such as the Digital
 ###                        Technical Journal (electronically accessible
 ###                        at http://www.digital.com:80/info/DTJ/home.html),
 ###                        already could offer this possibility.
 ###
 ###                        The Web browser user will see material that
 ###                        looks just like normal BibTeX entries,
 ###                        except that some portions may be
 ###                        highlighted to indicate hypertext links.
 ###                        However, window cut-and-paste actions will
 ###                        recover a BibTeX entry in a form suitable
 ###                        for pasting into another BibTeX file,
 ###                        without any need for further editing.
 ###
 ###                        This program assumes that the BibTeX
 ###                        bibliography is formatted in the style
 ###                        produced by bibclean, and that embedded
 ###                        URLs and "key = stringname" pairs are coded
 ###                        on a single line, so that simple pattern
 ###                        matching suffices to recognize text in need
 ###                        of additional HTML markup.
 ###
 ###                        Usage:
 ###                            nawk -f bibtex-to-html.awk \
 ###                                [-v PREFIX=prefix] [-v SUFFIX=suffix] \
 ###                                BibTeX-file(s)
 ###
 ###                        An input file with a filename of the form
 ###                        abc.xyz is output to a file named
 ###                        PREFIXabcSUFFIX.  The default PREFIX is
 ###                        empty, and the default SUFFIX is ".html".
 ###
 ###			   If no   file  names are specified    on the
 ###			   command line, then   the PREFIX and  SUFFIX
 ###			   settings  are ignored,   and input is  read
 ###			   from   stdin,  and  output  is   written to
 ###			   stdout, so that the program  can be used in
 ###			   a UNIX pipeline.
 ###
 ###                        In the current version, no provision is
 ###                        made for splitting the output files into
 ###                        smaller pieces to speed network file
 ###                        transfer.  While this would improve browser
 ###                        responsiveness over slow network
 ###                        connections, it would also significantly
 ###                        complicate hypertext link generation for
 ###                        this program, and seriously damage browser
 ###                        search capability within the bibliography
 ###                        file.  Perhaps the solution will come in
 ###                        (a) browsers' adopting the netscape browser
 ###                        practice of displaying data as soon as
 ###                        enough to fill a screen is available, and
 ###                        (b) faster network connections.
 ###
 ###                        In the TUG bibliography collection at
 ###                        ftp://ftp.math.utah.edu/, bibliography
 ###                        file sizes range from 3K to 4700K, with an
 ###                        average of 370K.  These are rather large,
 ###                        since typical WWW file sizes need to be
 ###                        about 16K or less for good responsiveness.
 ###
 ###                        The checksum field above contains a CRC-16
 ###                        checksum as the first value, followed by the
 ###                        equivalent of the standard UNIX wc (word
 ###                        count) utility output of lines, words, and
 ###                        characters.  This is produced by Robert
 ###                        Solovay's checksum utility.",
 ###  }
 ### ====================================================================
 BEGIN \
 	{
 	    ######################################################################
 	    VERSION = "1.02 [05-Jul-1997]" # <-- NB: Change this with each update!
 	    ######################################################################
 	    PROGRAM = "bibtex-to-html"
 	    UNSET_FILENAME = "/dev/unset"
 	    LASTFILENAME = UNSET_FILENAME
 	    _last_input_filename = UNSET_FILENAME
 	    if (SUFFIX == "")
 		SUFFIX = ".html"
 	    USER = ENVIRON["USER"]
 	    if (USER == "")
 		USER = ENVIRON["LOGNAME"]
 	    if (USER == "")
 		USER = "????"
 	    "hostname" | getline HOSTNAME
 	    "date" | getline DATE
 #	    [01-Aug-2019] ypcat no longer available: replace by getent
 #	    ("ypcat passwd | grep '^" USER ":' | awk -F: '{print $5}'") | getline PERSONAL_NAME
 	    ("getent passwd " USER " | " ARGV[0] " -F: '{print $5}'") | getline PERSONAL_NAME
 	    if (PERSONAL_NAME == "")
 		##("grep  '^" USER ":' /etc/passwd | awk -F: '{print $5}'") | getline PERSONAL_NAME
 		("grep  '^" USER ":' /etc/passwd | " ARGV[0] " -F: '{print $5}'") | getline PERSONAL_NAME
 	    # NB: " has become &#34; before this pattern is used
 	    CROSSREF_EQUALS_LABEL_PATTERN = "^[ \t]*crossref[ \t]*=[ \t]*&#34;"
 	    # Pattern to match a line like this:
 	    # %%%     email           = "beebe at math.utah.edu (Internet)",
 	    BIBTEX_EMAIL_PATTERN = "= &#34;[A-Za-z0-9-]+ at [A-Za-z0-9.-]+"
 	    BIBTEX_EMAIL_OFFSET = 7 # was 8 before &quot; became &#34;
 	    BIBTEX_EMAIL_PREFIX = "mailto:"
 	    BIBTEX_EMAIL_SAVE_LABEL = 0
 	    ##CITE_PATTERN = "\\\\cite{[^}]+}"
 	    CITE_PATTERN = "\\\\cite\\{[^\\}]+}"
 	    CITE_OFFSET = 6
 	    CITE_PREFIX = ""
 	    CITE_SAVE_LABEL = 1
 	    EMAIL_PATTERN = "[A-Za-z0-9-]+@[A-Za-z0-9.-]+"
 	    EMAIL_OFFSET = 0
 	    EMAIL_PREFIX = "mailto:"
 	    EMAIL_SAVE_LABEL = 0
 	    # See Nelson H. F. Beebe, ``Bibliography prettyprinting
 	    # and syntax checking'', TUGboat 14(3), 222-222, October
 	    # (1993), and 14(4), 395--419, December (1993) for the
 	    # syntax of BibTeX names used here in ENTRY_PATTERN,
 	    # KEY_EQUALS_NAME_PATTERN and STRING_PATTERN.
 	    ##ENTRY_PATTERN = "^[ \t]*@[ \t]*[A-Za-z][A-Za-z0-9:.+/'-]*[ \t]*{[A-Za-z][A-Za-z0-9:.+/'-]*,[ \t]*$"
 	    ENTRY_PATTERN = "^[ \t]*@[ \t]*[A-Za-z][A-Za-z0-9:.+/'-]*[ \t]*\\{[A-Za-z][A-Za-z0-9:.+/'-]*,[ \t]*$"
 	    KEY_EQUALS_NAME_PATTERN = "^[ \t]*[A-Za-z][A-Za-z0-9:.+/'-]*[ \t]*=[ \t]*[A-Za-z]"
 	    ##STRING_PATTERN = "^@[Ss][Tt][Rr][Ii][Nn][gG]{[A-Za-z][A-Za-z0-9:.+/'-]*"
 	    STRING_PATTERN = "^@[Ss][Tt][Rr][Ii][Nn][gG]\\{[A-Za-z][A-Za-z0-9:.+/'-]*"
 	    STRING_OFFSET = 8
 	    STRING_PREFIX = ""
 	    STRING_SAVE_LABEL = 1
 	    # According to Internet RFC 1614 (May 1994), a URL is
 	    # defined in the document T. Berners-Lee, ``Uniform
 	    # Resource Locators'', March 1993, available at URL
 	    # ftp://info.cern.ch/pub/ietf/url4.ps.  Unfortunately,
 	    # that address is no longer valid.  However, I was able to
 	    # track down pointers from http://www.w3.org/ to locate a
 	    # suitable description in Internet RFC 1630 (June 1994).
 	    # NB: We additionally disallow & in a URL because it is
 	    # needed in SGML entities "&name;".  We also disallow =
 	    # and | because these are commonly used in \path=...= and
 	    # \path|...| strings in BibTeX files.  These restrictions
 	    # could be removed if we went to the trouble of first
 	    # encoding these special characters in %xy hexadecimal
 	    # format, but they are rare enough that I am not going to
 	    # do so for now.  The worst that will happen from this
 	    # decision is that an occasional URL in a BibTeX file will
 	    # be missing a surrounding anchor.
 	    URL_PATTERN = "[A-Za-z]+://[^ \",&=|]+"
 	    URL_OFFSET = 0
 	    URL_PREFIX = ""
 	    URL_SAVE_LABEL = 0
 	    # [24-May-2016] support for background coloring of block comments
 	    IN_BLOCK_COMMENT = 0
 	}
 # Each line receives identical processing.
 	{ do_line() }
 END   \
 	{
 	    if (LASTFILENAME != UNSET_FILENAME)
 		end_file(LASTFILENAME)
 	}
 function add_entry(array,value)
 {
    if (value in array)
 	array[value] = array[value] " " FNR
    else
 	array[value] = FNR
 }
 function anchor(s,type,pattern,offset,prefix,save_label, name,rstart,rlength)
 {
    # Add anchors <A type="....">...</A> around text in s matching
    # pattern.  A non-zero offset discards that many characters from
    # the start of the match, allowing the pattern to contain leading
    # context which goes outside the anchored region.  The prefix is
    # attached to the start of the matched string, inside the value
    # quotes in the anchor.
    if (match(s,pattern))
    {
 	rstart = RSTART		# need private copies of these globals because
 	rlength = RLENGTH	# recursion will change them
 	rstart += offset	# adjust by offset to discard leading
 	rlength -= offset	# context in pattern
 	name = substr(s,rstart,rlength)
 	sub(/ +at +/,"@",name)	# reduce "user at host" to "user@host"
 	s = substr(s,1,rstart-1) \
 	    "<A " type "=\"" prefix name "\">" \
 	    ((type == "NAME") ? "<STRONG>" : "") \
 	    substr(s,rstart,rlength) \
 	    ((type == "NAME") ? "</STRONG>" : "") \
 	    "</A>" \
 	    anchor(substr(s,rstart+rlength),type,pattern,offset,prefix,save)
 	if (save_label)
 	{
 	    if (type == "HREF")
 		add_entry(label_hrefs, name)
 	    else if (type == "NAME")
 		add_entry(label_names, name)
 	}
    }
    return (s)
 }
 function begin_file( f)
 {
    f = output_filename(FILENAME)
    ## NB: If Transitional is eliminated in DOCTYPE, background coloring is lost! Why?
    slash_pos = str::rindex(FILENAME, "/");
    BASE_FILENAME = (slash_pos > 0)? str::substr(FILENAME, slash_pos + 1): FILENAME;
    print "<!-- -*-html-*- -->"									> f
    print ""											> f
 ##    print "<!-- " FILENAME " -->"								> f
    print "<!-- " BASE_FILENAME " -->"								> f
    print "<!-- WARNING: Do NOT edit this file.  It was converted from -->"			> f
    print "<!-- BibTeX format to HTML by " PROGRAM " version " VERSION " -->"			> f
 ##    print "<!-- on " DATE " -->"								> f
 ##    print "<!-- for " PERSONAL_NAME " (" USER "@" HOSTNAME ") -->"				> f
    print ""											> f
    print "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/1998/REC-html40-19980424/loose.dtd\">" > f
    print ""											> f
    print ""											> f
    print "<HTML>"										> f
    print "    <HEAD>"										> f
    print "        <META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=iso-8859-1\">"> f
    print "        <TITLE>"									> f
 ##    print "            BibTeX bibliography " FILENAME						> f
    print "            BibTeX bibliography " BASE_FILENAME						> f
    print "        </TITLE>"									> f
    print "        <LINK REV=\"made\" HREF=\"mailto:" USER "@" HOSTNAME "\">"			> f
    print "        <LINK HREF=\"http://www.math.utah.edu/pub/tex/bib/tugbib.css\" TYPE=\"text/css\" REL=\"stylesheet\">"	> f
    print "    </HEAD>"										> f
    print ""											> f
    print "    <BODY>"										> f
    print "        <DIV  ALIGN=\"right\">"							> f
    print "            <A HREF=\"http://validator.w3.org/check/referer\">"			> f
    print "                <IMG ALIGN=\"MIDDLE\" BORDER=\"0\" SRC=\"/images/valid-html40.png\" ALT=\"Valid HTML 4.0!\" HEIGHT=\"31\" WIDTH=\"88\">" > f
    print "            </A>"									> f
    print "            <A HREF=\"http://jigsaw.w3.org/css-validator/check/referer\">"		> f
    print "                <IMG ALIGN=\"MIDDLE\" BORDER=\"0\" SRC=\"/images/valid-css.gif\" ALT=\"Valid CSS!\" HEIGHT=\"31\" WIDTH=\"88\">" > f
    print "            </A>"									> f
    print "        </DIV>"									> f
    print "<PRE>"										> f
    clear_array(label_names)
    clear_array(label_hrefs)
 }
 function check_for_file_change()
 {
    if (LASTFILENAME != FILENAME)
    {
 	if (LASTFILENAME != UNSET_FILENAME)
 	{
 	    end_file(LASTFILENAME)
 	    if (LASTFILENAME != "/dev/stdout")
 	        close (output_filename(LASTFILENAME))
 	}
 	LASTFILENAME = FILENAME
 	begin_file()
    }
 }
 function check_refs( label)
 {
    for (label in label_hrefs)
    {
 	if (!(label in label_names))
 	    warning("undefined label " label " at line(s) " label_hrefs[label])
    }
 }
 function clear_array(array, key)
 {
    for (key in array)
 	delete array[key]
 }
 function end_file(filename, f)
 {
    f = output_filename(filename)
    print "</PRE>" > f
    print "    </BODY>" > f
    print "</HTML>" > f
    check_refs()
 }
 function do_cite(s, k,n,labels,t)
 {
    n = split(substr(s,RSTART + CITE_OFFSET,RLENGTH - 1 - CITE_OFFSET),labels,",")
    t = substr(s,1,RSTART+CITE_OFFSET-1)
    for (k = 1; k <= n; ++k)
    {
 	t = t ((k > 1) ? "," : "") "<A HREF=\"#" labels[k] "\">" labels[k] "</A>"
 	add_entry(label_hrefs, labels[k])
    }
    t = t substr(s,RSTART + RLENGTH - 1)
    return (t)
 }
 function do_line( n,name,s)
 {
    s = protect_SGML_characters($0)
    if (match(s,STRING_PATTERN)) # remember name from @String{name = "value"}
    {
 	name = substr(s,RSTART + STRING_OFFSET,RLENGTH - STRING_OFFSET)
 	string_name[name] = 1
 	# print "DEBUG 1: name =", name >"/dev/stderr"
    }
    if (match(s,/^%+[ \t]*email[ \t]*=/)) # special handling because BibTeX does not allow @ in comments
 	s = anchor(s,"HREF",BIBTEX_EMAIL_PATTERN,BIBTEX_EMAIL_OFFSET,BIBTEX_EMAIL_PREFIX,\
 		   BIBTEX_EMAIL_SAVE_LABEL)
    else
 	s = anchor(s,"HREF",EMAIL_PATTERN,EMAIL_OFFSET,EMAIL_PREFIX,EMAIL_SAVE_LABEL)
    s = anchor(s,"HREF",URL_PATTERN,URL_OFFSET,URL_PREFIX,URL_SAVE_LABEL)
    s = anchor(s,"NAME",STRING_PATTERN,STRING_OFFSET,STRING_PREFIX,STRING_SAVE_LABEL)
    if (match(s,CITE_PATTERN))
 	s = do_cite(s)
    if (match(s,ENTRY_PATTERN))	# then have ``@Entry{label,''
    {
 	n = index(s,"{")
 	name = substr(s,n+1)
 	gsub(/^[ \t]*/,"",name)	# trim optional leading space
 	gsub(/,[ \t]*$/,"",name) # trim trailing comma and optional space
 	# print "DEBUG 2: name =", name >"/dev/stderr"
 	s = substr(s,1,n) \
 	    "<A NAME=\"" name "\"><STRONG>" name "</STRONG></A>" \
 	    substr(s,n+1+length(name))
 	add_entry(label_names, name)
    }
    else if (match(s,KEY_EQUALS_NAME_PATTERN)) # then have ``key = name''
    {
 	name = substr(s,RSTART+RLENGTH-1)
 	sub(/,?[ \t]*$/,"",name) # trim optional trailing comma and space
 	# print "DEBUG 3: name =", name >"/dev/stderr"
 	if (name in string_name) # then we have a definition of this name
 	{
 	    s = substr(s,1,RSTART+RLENGTH-2) \
 		"<A HREF=\"#" name "\">" name "</A>" substr(s,RSTART+RLENGTH-1+length(name))
 	    add_entry(label_hrefs, name)
 	}
    }
    else if (match(s,CROSSREF_EQUALS_LABEL_PATTERN)) # then have `` crossref = "label"''
    {
 	name = substr(s,RSTART+RLENGTH)
 	sub(/&#34;,?[ \t]*$/,"",name) # trim trailing quote and optional comma and space
 	# print "DEBUG 4: name =", name >"/dev/stderr"
 	s = substr(s,1,RSTART+RLENGTH-1) \
 	    "<A HREF=\"#" name "\">" name "</A>" substr(s,RSTART+RLENGTH+length(name))
 	add_entry(label_hrefs, name)
    }
    check_for_file_change()
    if ( (s ~ "^%") && !IN_BLOCK_COMMENT)
    {
 	printf("</PRE><PRE CLASS=\"blockcomment\">")	> output_filename(FILENAME)
 	IN_BLOCK_COMMENT = 1
    }
    else if ( (s !~ "^%") && IN_BLOCK_COMMENT)
    {
 	printf("</PRE><PRE>")				> output_filename(FILENAME)
 	IN_BLOCK_COMMENT = 0
    }
    print s					>output_filename(FILENAME)
 }
 function output_filename(input_filename)
 {
 ## HAWK - for use in t/h-003.hawk
 return "/dev/stdout";
    if (input_filename != _last_input_filename)
    {			# optimization: we cache last function result for speed
 	_last_input_filename = input_filename
 	sub(/.[^.]*$/,"",input_filename)
 	if ((input_filename == "") || (input_filename == "/dev/stdin"))
 	    _last_output_filename = "/dev/stdout"
 	else
 	    _last_output_filename = PREFIX input_filename SUFFIX
    }
    return (_last_output_filename)
 }
 function protect_SGML_characters(s)
 {
    gsub(/&/,"\\&amp;",s)	# NB: this one MUST be first
    gsub(/</,"\\&lt;",s)
    gsub(/>/,"\\&gt;",s)
    ## [24-May-2016] with the change from HTML 3.2 to 4.0, we can use &quot; again!	
    ## gsub(/\"/,"\\&#34;",s)	# this was &quot; in earlier HTML
 				# versions, including the HTML 3.2
 				# draft, but was stupidly eliminated in
 				# the final HTML 3.2 version: see
 				# http://www.w3.org/pub/WWW/MarkUp/Wilbur/
 				# in the section ``What happened to &quot;?''
    gsub(/\"/,"\\&quot;",s)
    return (s)
 }
 function warning(message)
 {
    # print FILENAME ":" FNR ":%%" message >"/dev/stderr"
    #
    # The only place that we need warning messages above is
    # checkrefs(), after the current file has been closed, and a new
    # one started, so we need LASTFILENAME instead of FILENAME here,
    # and we omit the FNR, since we have no record of it for
    # LASTFILENAME.
    print LASTFILENAME ":%%" message >"/dev/stderr"
 }
--- a/hawk/t/bibtex-to-html.out
+++ b/hawk/t/bibtex-to-html.out
--- a/hawk/t/h-002.hawk
+++ b/hawk/t/h-002.hawk
@ -279,7 +279,9 @@ function main()
 	{
-		@local a;
+		# split, str::split, str::splita
 		@local a, str;
 		ensure (split("Here===Is=Some=====Data", a, "=+"),     4, @SCRIPTNAME, @SCRIPTLINE);
 		ensure (a[1] === "Here",                               1, @SCRIPTNAME, @SCRIPTLINE);
 		ensure (a[2] === "Is",                                 1, @SCRIPTNAME, @SCRIPTLINE);
@ -315,14 +317,37 @@ function main()
 		ensure (str::splita("Here===Is=Some=====Data", a, ""),  23,    @SCRIPTNAME, @SCRIPTLINE);
 		ensure (hawk::typename(a),                             "array",@SCRIPTNAME, @SCRIPTLINE);
-		ensure (split("Here  Is Some   Data", a, / /),         7, @SCRIPTNAME, @SCRIPTLINE);
+		ensure (str::split("Here  Is Some   Data", a, / /),    7, @SCRIPTNAME, @SCRIPTLINE);
-		ensure (split("Here  Is Some   Data", a, " "),         4, @SCRIPTNAME, @SCRIPTLINE);
+		ensure (str::split("Here  Is Some   Data", a, " "),    4, @SCRIPTNAME, @SCRIPTLINE);
 		ensure (a[1] === "Here",                               1, @SCRIPTNAME, @SCRIPTLINE);
 		ensure (a[2] === "Is",                                 1, @SCRIPTNAME, @SCRIPTLINE);
 		ensure (a[3] === "Some",                               1, @SCRIPTNAME, @SCRIPTLINE);
 		ensure (a[4] === "Data",                               1, @SCRIPTNAME, @SCRIPTLINE);
 		str = "a   b\t\tc d";
 		ensure (str::splita(str, a, " "), 4,              @SCRIPTNAME, @SCRIPTLINE);
 		ensure (a[1],               "a",            @SCRIPTNAME, @SCRIPTLINE);
 		ensure (a[2],               "b",            @SCRIPTNAME, @SCRIPTLINE);
 		ensure (a[3],               "c",            @SCRIPTNAME, @SCRIPTLINE);
 		ensure (a[4],               "d",            @SCRIPTNAME, @SCRIPTLINE);
 		ensure (str::splita(str, a, / /), 5, @SCRIPTNAME, @SCRIPTLINE);
 		ensure (a[1],               "a",            @SCRIPTNAME, @SCRIPTLINE);
 		ensure (a[2],               "",             @SCRIPTNAME, @SCRIPTLINE);
 		ensure (a[3],               "",             @SCRIPTNAME, @SCRIPTLINE);
 		ensure (a[4],               "b\t\tc",       @SCRIPTNAME, @SCRIPTLINE);
 		ensure (a[5],               "d",            @SCRIPTNAME, @SCRIPTLINE);
 	}
 	{
 		## back reference in a regular expression
 		ensure (("a2b" ~ /(a)\12b/), 0, @SCRIPTNAME, @SCRIPTLINE);
 		ensure (("aa2b" ~ /(a)\12b/), 1, @SCRIPTNAME, @SCRIPTLINE);
 		ensure (("aaa2b" ~ /(a)\12b/), 1, @SCRIPTNAME, @SCRIPTLINE);
 	}
 	print "SUCCESS";
 }
--- a/hawk/t/h-003.hawk
+++ b/hawk/t/h-003.hawk
@ -0,0 +1,76 @@
@pragma entry main
@pragma implicit off
@include "ensure.inc";
 function are_files_identical(a, b)
 {
 	@local f1, f2, x, y, diff;
 	f1 = sys::open(a, sys::O_RDONLY);
 	if (f1 <= -1)
 	{
 		printf ("ERROR: unable to open %s\n", a);
 		return -1;
 	}
 	f2 = sys::open(b, sys::O_RDONLY);
 	if (f2 <= -1)
 	{
 		sys::close (a);
 		printf ("ERROR: unable to open %s\n", b);
 		return -1;
 	}
 	diff = 0;
 	while (sys::read(f1, x, 1) > 0)
 	{
 		if (sys::read(f2, y, 1) <= 0 || x !== y) 
 		{
 			diff = 1;
 			break;
 		}
 	}
 	if (sys::read(f2, y, 1) > 0) diff = 1;
 	sys::close (f2);
 	sys::close (f1);
 	return !diff;
 }
 function run_test (x, more_opts, in_name, out_name)
 {
 	@local cmd, inf, expf, outf, same;
 	if (hawk::isnil(in_name)) in_name = x;
 	if (hawk::isnil(out_name)) out_name = x;
 	inf = sprintf("%s/%s.in", TDIR, in_name);
 	expf = sprintf("%s/%s.out", TDIR, out_name);
 	outf = sprintf("/tmp/%s.%d.out", out_name, sys::getpid());
 ##print TDIR, inf, expf, outf;
 	cmd=sprintf("%s %s -f %s/%s.hawk %s > %s", ARGV[0], more_opts, TDIR, x, inf, outf);
 ##print cmd;
 	system (cmd);
 	same = are_files_identical(expf, outf);
 	if (same <= 0) 
 	{
 		## don't delete the output file for review.
 		printf ("FAILURE: %s - %s and %s differ\n", x, expf, outf);
 		exit (1);
 	}
 	sys::unlink (outf);
 }
 function main()
 {
 	run_test ("journal-toc");
 	run_test ("journal-toc", "-vHTML=1", "journal-toc", "journal-toc-html");
 	run_test ("bibtex-to-html", "", "journal-toc", "bibtex-to-html");
 	print "SUCCESS";
 }
--- a/hawk/t/journal-toc-html.out
+++ b/hawk/t/journal-toc-html.out
--- a/hawk/t/journal-toc.hawk
+++ b/hawk/t/journal-toc.hawk
@ -0,0 +1,993 @@
 # http://www.netlib.org/bibnet/tools/software/journal-toc.awk
 #
 ### ====================================================================
 ###  @Awk-file{
 ###     author          = "Nelson H. F. Beebe",
 ###     version         = "1.00",
 ###     date            = "09 October 1996",
 ###     time            = "15:57:06 MDT",
 ###     filename        = "journal-toc.awk",
 ###     address         = "Center for Scientific Computing
 ###                        Department of Mathematics
 ###                        University of Utah
 ###                        Salt Lake City, UT 84112
 ###                        USA",
 ###     telephone       = "+1 801 581 5254",
 ###     FAX             = "+1 801 581 4148",
 ###     URL             = "http://www.math.utah.edu/~beebe",
 ###     checksum        = "25092 977 3357 26493",
 ###     email           = "beebe@math.utah.edu (Internet)",
 ###     codetable       = "ISO/ASCII",
 ###     keywords        = "BibTeX, bibliography, HTML, journal table of
 ###                        contents",
 ###     supported       = "yes",
 ###     docstring       = "Create a journal cover table of contents from
 ###                        <at>Article{...} entries in a journal BibTeX
 ###                        .bib file for checking the bibliography
 ###                        database against the actual journal covers.
 ###                        The output can be either plain text, or HTML.
 ###
 ###                        Usage:
 ###                            bibclean -max-width 0 BibTeX-file(s) | \
 ###                                bibsort -byvolume | \
 ###                                awk -f journal-toc.awk \
 ###                                    [-v HTML=nnn] [-v INDENT=nnn] \
 ###                                    [-v BIBFILEURL=url] >foo.toc
 ###
 ###                            or if the bibliography is already sorted
 ###                            by volume,
 ###
 ###                            bibclean -max-width 0 BibTeX-file(s) | \
 ###                                awk -f journal-toc.awk \
 ###                                    [-v HTML=nnn] [-v INDENT=nnn] \
 ###                                    [-v BIBFILEURL=url] >foo.toc
 ###
 ###                        A non-zero value of the command-line option,
 ###                        HTML=nnn, results in HTML output instead of
 ###                        the default plain ASCII text (corresponding
 ###                        to HTML=0).  The
 ###
 ###                        The INDENT=nnn command-line option specifies
 ###                        the number of blanks to indent each logical
 ###                        level of HTML.  The default is INDENT=4.
 ###                        INDENT=0 suppresses indentation.  The INDENT
 ###                        option has no effect when the default HTML=0
 ###                        (plain text output) option is in effect.
 ###
 ###                        When HTML output is selected, the
 ###                        BIBFILEURL=url command-line option provides a
 ###                        way to request hypertext links from table of
 ###                        contents page numbers to the complete BibTeX
 ###                        entry for the article.  These links are
 ###                        created by appending a sharp (#) and the
 ###                        citation label to the BIBFILEURL value, which
 ###                        conforms with the practice of
 ###                        bibtex-to-html.awk.
 ###
 ###                        The HTML output form may be useful as a more
 ###                        compact representation of journal article
 ###                        bibliography data than the original BibTeX
 ###                        file provides.  Of course, the
 ###                        table-of-contents format provides less
 ###                        information, and is considerably more
 ###                        troublesome for a computer program to parse.
 ###
 ###                        When URL key values are provided, they will
 ###                        be used to create hypertext links around
 ###                        article titles.  This supports journals that
 ###                        provide article contents on the World-Wide
 ###                        Web.
 ###
 ###                        For parsing simplicity, this program requires
 ###                        that BibTeX
 ###
 ###                            key = "value"
 ###
 ###                        and
 ###
 ###                            @String{name = "value"}
 ###
 ###                        specifications be entirely contained on
 ###                        single lines, which is readily provided by
 ###                        the `bibclean -max-width 0' filter.  It also
 ###                        requires that bibliography entries begin and
 ###                        end at the start of a line, and that
 ###                        quotation marks, rather than balanced braces,
 ###                        delimit string values.  This is a
 ###                        conventional format that again can be
 ###                        guaranteed by bibclean.
 ###
 ###                        This program requires `new' awk, as described
 ###                        in the book
 ###
 ###                            Alfred V. Aho, Brian W. Kernighan, and
 ###                            Peter J. Weinberger,
 ###                            ``The AWK Programming Language'',
 ###                            Addison-Wesley (1988), ISBN
 ###                            0-201-07981-X,
 ###
 ###                        such as provided by programs named (GNU)
 ###                        gawk, nawk, and recent AT&T awk.
 ###
 ###                        The checksum field above contains a CRC-16
 ###                        checksum as the first value, followed by the
 ###                        equivalent of the standard UNIX wc (word
 ###                        count) utility output of lines, words, and
 ###                        characters.  This is produced by Robert
 ###                        Solovay's checksum utility.",
 ###  }
 ### ====================================================================
 BEGIN						{ initialize() }
 /^ *@ *[Ss][Tt][Rr][Ii][Nn][Gg] *\{/		{ do_String(); next }
 /^ *@ *[Pp][Rr][Ee][Aa][Mm][Bb][Ll][Ee]/	{ next }
 /^ *@ *[Aa][Rr][Tt][Ii][Cc][Ll][Ee]/		{ do_Article(); next }
 /^ *@/						{ do_Other(); next }
 /^ *author *= *\"/ 				{ do_author(); next }
 /^ *journal *= */				{ do_journal(); next }
 /^ *volume *= *\"/				{ do_volume(); next }
 /^ *number *= *\"/				{ do_number(); next }
 /^ *year *= *\"/				{ do_year(); next }
 /^ *month *= */					{ do_month(); next }
 /^ *title *= *\"/				{ do_title(); next }
 /^ *pages *= *\"/				{ do_pages(); next }
 /^ *URL *= *\"/					{ do_URL(); next }
 /^ *} *$/					{ if (In_Article) do_end_entry(); next }
 END						{ terminate() }
 ########################################################################
 # NB: The programming conventions for variables in this program are:   #
 #	UPPERCASE		global constants and user options      #
 #	Initialuppercase	global variables                       #
 #	lowercase		local variables                        #
 # Any deviation is an error!                                           #
 ########################################################################
 function do_Article()
 {
 	In_Article = 1
 	Citation_label = $0
 	sub(/^[^\{]*\{/,"",Citation_label)
 	sub(/ *, *$/,"",Citation_label)
 	Author = ""
        Title = ""
        Journal = ""
        Volume = ""
        Number = ""
        Month = ""
        Year = ""
        Pages = ""
        Url = ""
 }
 function do_author()
 {
 	Author = TeX_to_HTML(get_value($0))
 }
 function do_end_entry( k,n,parts)
 {
 	n = split(Author,parts," and ")
 	if (Last_number != Number)
 		do_new_issue()
 	for (k = 1; k < n; ++k)
 		print_toc_line(parts[k] " and", "", "")
 	Title_prefix = html_begin_title()
 	Title_suffix = html_end_title()
 	if (html_length(Title) <= (MAX_TITLE_CHARS + MIN_LEADERS)) # complete title fits on line
 		print_toc_line(parts[n], Title, html_begin_pages() Pages html_end_pages())
 	else			# need to split long title over multiple lines
 		do_long_title(parts[n], Title, html_begin_pages() Pages html_end_pages())
 }
 function do_journal()
 {
 	if ($0 ~ /[=] *"/)	# have journal = "quoted journal name",
 		Journal = get_value($0)
 	else			# have journal = journal-abbreviation,
 	{
        	Journal = get_abbrev($0)
 		if (Journal in String) # replace abbrev by its expansion
 			Journal = String[Journal]
 	}
 	gsub(/\\-/,"",Journal)	# remove discretionary hyphens
 }
 function do_long_title(author,title,pages, last_title,n)
 {
 	title = trim(title)			# discard leading and trailing space
 	while (length(title) > 0)
 	{
 		n = html_breakpoint(title,MAX_TITLE_CHARS+MIN_LEADERS)
 		last_title = substr(title,1,n)
 		title = substr(title,n+1)
 		sub(/^ +/,"",title)		# discard any leading space
 		print_toc_line(author, last_title, (length(title) == 0) ? pages : "")
 		author = ""
 	}
 }
 function do_month( k,n,parts)
 {
 	Month = ($0 ~ /[=] *"/) ? get_value($0) : get_abbrev($0)
 	gsub(/[\"]/,"",Month)
 	gsub(/ *# *\\slash *# */," / ",Month)
 	gsub(/ *# *-+ *# */," / ",Month)
 	n = split(Month,parts," */ *")
 	Month = ""
 	for (k = 1; k <= n; ++k)
 		Month = Month ((k > 1) ? " / " : "") \
 			((parts[k] in Month_expansion) ? Month_expansion[parts[k]] : parts[k])
 }
 function do_new_issue()
 {
 	Last_number = Number
 	if (HTML)
 	{
 		if (Last_volume != Volume)
 		{
 			Last_volume = Volume
 			print_line(prefix(2) "<BR>")
 		}
 		html_end_toc()
 		html_begin_issue()
 		print_line(prefix(2) Journal "<BR>")
 	}
 	else
 	{
 		print_line("")
 		print_line(Journal)
 	}
 	print_line(strip_html(vol_no_month_year()))
 	if (HTML)
 	{
 		html_end_issue()
 		html_toc_entry()
 		html_begin_toc()
 	}
 	else
 		print_line("")
 }
 function do_number()
 {
 	Number = get_value($0)
 }
 function do_Other()
 {
 	In_Article = 0
 }
 function do_pages()
 {
 	Pages = get_value($0)
 	sub(/--[?][?]/,"",Pages)
 }
 function do_String()
 {
 	sub(/^[^\{]*\{/,"",$0)	# discard up to and including open brace
 	sub(/\} *$/,"",$0)	# discard from optional whitespace and trailing brace to end of line
 	String[get_key($0)] = get_value($0)
 }
 function do_title()
 {
 	Title = TeX_to_HTML(get_value($0))
 }
 function do_URL( parts)
 {
 	Url = get_value($0)
 	split(Url,parts,"[,;]")			# in case we have multiple URLs
 	Url = trim(parts[1])
 }
 function do_volume()
 {
 	Volume = get_value($0)
 }
 function do_year()
 {
 	Year = get_value($0)
 }
 function get_abbrev(s)
 {	# return abbrev from ``key = abbrev,''
 	sub(/^[^=]*= */,"",s)	# discard text up to start of non-blank value
 	sub(/ *,? *$/,"",s)	# discard trailing optional whitspace, quote,
 				# optional comma, and optional space
 	return (s)
 }
 function get_key(s)
 {	# return kay from ``key = "value",''
 	sub(/^ */,"",s)		# discard leading space
 	sub(/ *=.*$/,"",s)	# discard everthing after key
 	return (s)
 }
 function get_value(s)
 {	# return value from ``key = "value",''
 	sub(/^[^\"]*\" */,"",s)	# discard text up to start of non-blank value
 	sub(/ *\",? *$/,"",s)	# discard trailing optional whitspace, quote,
 				# optional comma, and optional space
 	return (s)
 }
 function html_accents(s)
 {
 	if (index(s,"\\") > 0)			# important optimization
 	{
 		# Convert common lower-case accented letters according to the
 		# table on p. 169 of in Peter Flynn's ``The World Wide Web
 		# Handbook'', International Thomson Computer Press, 1995, ISBN
 		# 1-85032-205-8.  The official table of ISO Latin 1 SGML
 		# entities used in HTML can be found in the file
 		# /usr/local/lib/html-check/lib/ISOlat1.sgml (your path
 		# may differ).
 		gsub(/\{\\\a}/,	"\\&agrave;",	s)
 		gsub(/\{\\'a}/,	"\\&aacute;",	s)
 		gsub(/\{\\[\^]a}/,"\\&acirc;",	s)
 		gsub(/\{\\~a}/,	"\\&atilde;",	s)
 		##gsub(/\{\\\"a\}/,	"\\&auml;",	s)
 		gsub(/\{\\\"\{a\}\}/,	"\\&auml;",	s)
 		gsub(/\{\\aa}/,	"\\&aring;",	s)
 		gsub(/\{\\ae}/,	"\\&aelig;",	s)
 		gsub(/\{\\c\{c\}}/,"\\&ccedil;",	s)
 		gsub(/\{\\\e}/,	"\\&egrave;",	s)
 		gsub(/\{\\'e}/,	"\\&eacute;",	s)
 		gsub(/\{\\[\^]e}/,"\\&ecirc;",	s)
 		gsub(/\{\\\"e}/,	"\\&euml;",	s)
 		gsub(/\{\\\i}/,	"\\&igrave;",	s)
 		gsub(/\{\\'i}/,	"\\&iacute;",	s)
 		gsub(/\{\\[\^]i}/,"\\&icirc;",	s)
 		gsub(/\{\\\"i}/,	"\\&iuml;",	s)
 		# ignore eth and thorn
 		gsub(/\{\\~n}/,	"\\&ntilde;",	s)
 		gsub(/\{\\\o}/,	"\\&ograve;",	s)
 		gsub(/\{\\'o}/,	"\\&oacute;",	s)
 		gsub(/\{\\[\^]o}/, "\\&ocirc;",	s)
 		gsub(/\{\\~o}/,	"\\&otilde;",	s)
 		##gsub(/\{\\\"o}/,	"\\&ouml;",	s)
 		gsub(/\{\\\"\{o\}}/,	"\\&ouml;",	s)
 		gsub(/\{\\o}/,	"\\&oslash;",	s)
 		gsub(/\{\\\u}/,	"\\&ugrave;",	s)
 		gsub(/\{\\'u}/,	"\\&uacute;",	s)
 		gsub(/\{\\[\^]u}/,"\\&ucirc;",	s)
 		##gsub(/\{\\\"u}/,	"\\&uuml;",	s)
 		gsub(/\{\\\"\{u\}\}/,	"\\&uuml;",	s)
 		gsub(/\{\\'y}/,	"\\&yacute;",	s)
 		gsub(/\{\\\"y}/,	"\\&yuml;",	s)
 		# Now do the same for upper-case accents
 		gsub(/\{\\\A}/,	"\\&Agrave;",	s)
 		gsub(/\{\\'A}/,	"\\&Aacute;",	s)
 		gsub(/\{\\[\^]A}/,	"\\&Acirc;",	s)
 		gsub(/\{\\~A}/,	"\\&Atilde;",	s)
 		##gsub(/\{\\\"A}/,	"\\&Auml;",	s)
 		gsub(/\{\\\"\{A\}\}/,	"\\&Auml;",	s)
 		gsub(/\{\\AA}/,	"\\&Aring;",	s)
 		gsub(/\{\\AE}/,	"\\&AElig;",	s)
 		gsub(/\{\\c\{C\}}/,"\\&Ccedil;",	s)
 		gsub(/\{\\\e}/,	"\\&Egrave;",	s)
 		gsub(/\{\\'E}/,	"\\&Eacute;",	s)
 		gsub(/\{\\[\^]E}/,	"\\&Ecirc;",	s)
 		gsub(/\{\\\"E}/,	"\\&Euml;",	s)
 		gsub(/\{\\\I}/,	"\\&Igrave;",	s)
 		gsub(/\{\\'I}/,	"\\&Iacute;",	s)
 		gsub(/\{\\[\^]I}/,	"\\&Icirc;",	s)
 		gsub(/\{\\\"I}/,	"\\&Iuml;",	s)
 		# ignore eth and thorn
 		gsub(/\{\\~N}/,	"\\&Ntilde;",	s)
 		gsub(/\{\\\O}/,	"\\&Ograve;",	s)
 		gsub(/\{\\'O}/,	"\\&Oacute;",	s)
 		gsub(/\{\\[\^]O}/,	"\\&Ocirc;",	s)
 		gsub(/\{\\~O}/,	"\\&Otilde;",	s)
 		##gsub(/\{\\\"O}/,	"\\&Ouml;",	s)
 		gsub(/\{\\\"\{O\}\}/,	"\\&Ouml;",	s)
 		gsub(/\{\\O}/,	"\\&Oslash;",	s)
 		gsub(/\{\\\U}/,	"\\&Ugrave;",	s)
 		gsub(/\{\\'U}/,	"\\&Uacute;",	s)
 		gsub(/\{\\[\^]U}/,	"\\&Ucirc;",	s)
 		##gsub(/\{\\\"U}/,	"\\&Uuml;",	s)
 		gsub(/\{\\\"\{U\}\}/,	"\\&Uuml;",	s)
 		gsub(/\{\\'Y}/,	"\\&Yacute;",	s)
 		gsub(/\{\\ss}/,	"\\&szlig;",	s)
 		# Others not mentioned in Flynn's book
 		gsub(/\{\\'\\i}/,"\\&iacute;",	s)
 		gsub(/\{\\'\\j}/,"j",		s)
 	}
 	return (s)
 }
 function html_begin_issue()
 {
 	print_line("")
 	print_line(prefix(2) "<HR>")
 	print_line("")
 	print_line(prefix(2) "<H1>")
 	print_line(prefix(3) "<A NAME=\"" html_label() "\">")
 }
 function html_begin_pages()
 {
 	return ((HTML && (BIBFILEURL != "")) ? ("<A HREF=\"" BIBFILEURL "#" Citation_label "\">") : "")
 }
 function html_begin_pre()
 {
 	In_PRE = 1
 	print_line("<PRE>")
 }
 function html_begin_title()
 {
 	return ((HTML && (Url != "")) ? ("<A HREF=\"" Url "\">") : "")
 }
 function html_begin_toc()
 {
 	html_end_toc()
 	html_begin_pre()
 }
 function html_body( k)
 {
 	for (k = 1; k <= BodyLines; ++k)
 		print Body[k]
 }
 function html_breakpoint(title,maxlength, break_after,k)
 {
 	# Return the largest character position in title AFTER which we
 	# can break the title across lines, without exceeding maxlength
 	# visible characters.
 	if (html_length(title) > maxlength)	# then need to split title across lines
 	{
 		# In the presence of HTML markup, the initialization of
 		# k here is complicated, because we need to advance it
 		# until html_length(title) is at least maxlength,
 		# without invoking the expensive html_length() function
 		# too frequently.  The need to split the title makes the
 		# alternative of delayed insertion of HTML markup much
 		# more complicated.
 		break_after = 0
 		for (k = min(maxlength,length(title)); k < length(title); ++k)
 		{
 			if (substr(title,k+1,1) == " ")
 			{		# could break after position k
 				if (html_length(substr(title,1,k)) <= maxlength)
 					break_after = k
 				else	# advanced too far, retreat back to last break_after
 					break
 			}
 		}
 		if (break_after == 0)		# no breakpoint found by forward scan
 		{				# so switch to backward scan
 			for (k = min(maxlength,length(title)) - 1; \
 				(k > 0) && (substr(title,k+1,1) != " "); --k)
 				;		# find space at which to break title
 			if (k < 1)		# no break point found
 				k = length(title) # so must print entire string
 		}
 		else
 			k = break_after
 	}
 	else					# title fits on one line
 		k = length(title)
 	return (k)
 }
 function html_end_issue()
 {
 	print_line(prefix(3) "</A>")
 	print_line(prefix(2) "</H1>")
 }
 function html_end_pages()
 {
 	return ((HTML && (BIBFILEURL != "")) ? "</A>" : "")
 }
 function html_end_pre()
 {
 	if (In_PRE)
 	{
 		print_line("</PRE>")
 		In_PRE = 0
 	}
 }
 function html_end_title()
 {
 	return ((HTML && (Url != "")) ? "</A>" : "")
 }
 function html_end_toc()
 {
 	html_end_pre()
 }
 function html_fonts(s, arg,control_word,k,level,n,open_brace)
 {
 	open_brace = index(s,"{")
 	if (open_brace > 0)			# important optimization
 	{
 		level = 1
 		for (k = open_brace + 1; (level != 0) && (k <= length(s)); ++k)
 		{
 			if (substr(s,k,1) == "{")
 				level++
 			else if (substr(s,k,1) == "}")
 				level--
 		}
 		# {...} is now found at open_brace ... (k-1)
 		for (control_word in Font_decl_map)	# look for {\xxx ...}
 		{
 			if (substr(s,open_brace+1,length(control_word)+1) ~ \
 				("\\" control_word "[^A-Za-z]"))
 			{
 				n = open_brace + 1 + length(control_word)
 				arg = trim(substr(s,n,k - n))
 				if (Font_decl_map[control_word] == "toupper") # arg -> ARG
 					arg = toupper(arg)
 				else if (Font_decl_map[control_word] != "") # arg -> <TAG>arg</TAG>
 					arg = "<" Font_decl_map[control_word] ">" arg "</" Font_decl_map[control_word] ">"
 				return (substr(s,1,open_brace-1) arg html_fonts(substr(s,k)))
 			}
 		}
 		for (control_word in Font_cmd_map)	# look for \xxx{...}
 		{
 			if (substr(s,open_brace - length(control_word),length(control_word)) ~ \
 				("\\" control_word))
 			{
 				n = open_brace + 1
 				arg = trim(substr(s,n,k - n))
 				if (Font_cmd_map[control_word] == "toupper") # arg -> ARG
 					arg = toupper(arg)
 				else if (Font_cmd_map[control_word] != "") # arg -> <TAG>arg</TAG>
 					arg = "<" Font_cmd_map[control_word] ">" arg "</" Font_cmd_map[control_word] ">"
 				n = open_brace - length(control_word) - 1
 				return (substr(s,1,n) arg html_fonts(substr(s,k)))
 			}
 		}
 	}
 	return (s)
 }
 function html_header()
 {
 	USER = ENVIRON["USER"]
 	if (USER == "")
 	    USER = ENVIRON["LOGNAME"]
 	if (USER == "")
 	    USER = "????"
 	"hostname" | getline HOSTNAME
 	"date" | getline DATE
 	##("ypcat passwd | grep '^" USER ":' | awk -F: '{print $5}'") | getline PERSONAL_NAME
 	("getent passwd " USER " | awk -F: '{print $5}'") | getline PERSONAL_NAME
 	if (PERSONAL_NAME == "")
 	    ("grep  '^" USER ":' /etc/passwd | awk -F: '{print $5}'") | getline PERSONAL_NAME
 	print "<!-- WARNING: Do NOT edit this file.  It was converted from -->"
 	print "<!-- BibTeX format to HTML by journal-toc.awk version " VERSION_NUMBER " " VERSION_DATE " -->"
 	##print "<!-- on " DATE " -->"
 	##print "<!-- for " PERSONAL_NAME " (" USER "@" HOSTNAME ") -->"
 	print ""
 	print ""
 	print "<!DOCTYPE HTML public \"-//IETF//DTD HTML//EN\">"
 	print ""
 	print "<HTML>"
 	print prefix(1) "<HEAD>"
 	print prefix(2) "<TITLE>"
 	print prefix(3)  Journal
 	print prefix(2) "</TITLE>"
 	print prefix(2) "<LINK REV=\"made\" HREF=\"mailto:" USER "@" HOSTNAME "\">"
 	print prefix(1) "</HEAD>"
 	print ""
 	print prefix(1) "<BODY>"
 }
 function html_label( label)
 {
 	label = Volume "(" Number "):" Month ":" Year
 	gsub(/[^A-Za-z0-9():,;.\/\-]/,"",label)
 	return (label)
 }
 function html_length(s)
 {	# Return visible length of s, ignoring any HTML markup
 	if (HTML)
 	{
 		gsub(/<\/?[^>]*>/,"",s)		# remove SGML tags
 		gsub(/&[A-Za-z0-9]+;/,"",s)	# remove SGML entities
 	}
 	return (length(s))
 }
 function html_toc()
 {
 	print prefix(2) "<H1>"
 	print prefix(3) "Table of contents for issues of " Journal
 	print prefix(2) "</H1>"
 	print HTML_TOC
 }
 function html_toc_entry()
 {
 	HTML_TOC = HTML_TOC "        <A HREF=\"#" html_label() "\">"
 	HTML_TOC = HTML_TOC vol_no_month_year()
 	HTML_TOC = HTML_TOC "</A><BR>" "\n"
 }
 function html_trailer()
 {
 	html_end_pre()
 	print prefix(1) "</BODY>"
 	print "</HTML>"
 }
 function initialize()
 {
 	# NB: Update these when the program changes
 	VERSION_DATE = "[09-Oct-1996]"
 	VERSION_NUMBER = "1.00"
 	HTML = (HTML == "") ? 0 : (0 + HTML)
 	if (INDENT == "")
 		INDENT = 4
 	if (HTML == 0)
 		INDENT = 0	# indentation suppressed in ASCII mode
 	LEADERS = " . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ."
 	MAX_TITLE_CHARS = 36	# 36 produces a 79-char output line when there is
 				# just an initial page number.  If this is
 				# increased, the LEADERS string may need to be
 				# lengthened.
 	MIN_LEADERS = 4		# Minimum number of characters from LEADERS
 				# required when leaders are used.  The total
 				# number of characters that can appear in a
 				# title line is MAX_TITLE_CHARS + MIN_LEADERS.
 				# Leaders are omitted when the title length is
 				# between MAX_TITLE_CHARS and this sum.
 	MIN_LEADERS_SPACE = "        "	# must be at least MIN_LEADERS characters long
 	Month_expansion["jan"]	= "January"
 	Month_expansion["feb"]	= "February"
 	Month_expansion["mar"]	= "March"
 	Month_expansion["apr"]	= "April"
 	Month_expansion["may"]	= "May"
 	Month_expansion["jun"]	= "June"
 	Month_expansion["jul"]	= "July"
 	Month_expansion["aug"]	= "August"
 	Month_expansion["sep"]	= "September"
 	Month_expansion["oct"]	= "October"
 	Month_expansion["nov"]	= "November"
 	Month_expansion["dec"]	= "December"
 	Font_cmd_map["\\emph"]		= "EM"
 	Font_cmd_map["\\textbf"]	= "B"
 	Font_cmd_map["\\textit"]	= "I"
 	Font_cmd_map["\\textmd"]	= ""
 	Font_cmd_map["\\textrm"]	= ""
 	Font_cmd_map["\\textsc"]	= "toupper"
 	Font_cmd_map["\\textsl"]	= "I"
 	Font_cmd_map["\\texttt"]	= "t"
 	Font_cmd_map["\\textup"]	= ""
 	Font_decl_map["\\bf"]		= "B"
 	Font_decl_map["\\em"]		= "EM"
 	Font_decl_map["\\it"]		= "I"
 	Font_decl_map["\\rm"]		= ""
 	Font_decl_map["\\sc"]		= "toupper"
 	Font_decl_map["\\sf"]		= ""
 	Font_decl_map["\\tt"]		= "TT"
 	Font_decl_map["\\itshape"]	= "I"
 	Font_decl_map["\\upshape"]	= ""
 	Font_decl_map["\\slshape"]	= "I"
 	Font_decl_map["\\scshape"]	= "toupper"
 	Font_decl_map["\\mdseries"]	= ""
 	Font_decl_map["\\bfseries"]	= "B"
 	Font_decl_map["\\rmfamily"]	= ""
 	Font_decl_map["\\sffamily"]	= ""
 	Font_decl_map["\\ttfamily"]	= "TT"
 }
 function min(a,b)
 {
 	return (a < b) ? a : b
 }
 function prefix(level)
 {
 	# Return a prefix of up to 60 blanks
 	if (In_PRE)
 		return ("")
 	else
 		return (substr("                                                            ", \
 			1, INDENT * level))
 }
 function print_line(line)
 {
 	if (HTML)		# must buffer in memory so that we can accumulate TOC
 		Body[++BodyLines] = line
 	else
 		print line
 }
 function print_toc_line(author,title,pages, extra,leaders,n,t)
 {
 	# When we have a multiline title, the hypertext link goes only
 	# on the first line.  A multiline hypertext link looks awful
 	# because of long underlines under the leading indentation.
 	if (pages == "")	# then no leaders needed in title lines other than last one
 		t = sprintf("%31s   %s%s%s", author, Title_prefix, title, Title_suffix)
 	else					# last title line, with page number
 	{
 		n = html_length(title)		# potentially expensive
 		extra = n % 2			# extra space for aligned leader dots
 		if (n <= MAX_TITLE_CHARS) 	# then need leaders
 			leaders = substr(LEADERS, 1, MAX_TITLE_CHARS + MIN_LEADERS - extra - \
 				   min(MAX_TITLE_CHARS,n))
 		else				# title (almost) fills line, so no leaders
 			leaders = substr(MIN_LEADERS_SPACE,1, \
 					 (MAX_TITLE_CHARS + MIN_LEADERS - extra - n))
 		t = sprintf("%31s   %s%s%s%s%s %4s", \
 			    author, Title_prefix, title, Title_suffix, \
 			    (extra ? " " : ""), leaders, pages)
 	}
 	Title_prefix = ""	# forget any hypertext
 	Title_suffix = ""	# link material
 	# Efficency note: an earlier version accumulated the body in a
 	# single scalar like this: "Body = Body t".  Profiling revealed
 	# this statement as the major hot spot, and the change to array
 	# storage made the program more than twice as fast.  This
 	# suggests that awk might benefit from an optimization of
 	# "s = s t" that uses realloc() instead of malloc().
 	if (HTML)
 		Body[++BodyLines] = t
 	else
 		print t
 }
 function protect_SGML_characters(s)
 {
    gsub(/&/,"\\&amp;",s)	# NB: this one MUST be first
    gsub(/</,"\\&lt;",s)
    gsub(/>/,"\\&gt;",s)
    gsub(/\"/,"\\&quot;",s)
    ##gsub(/\"/,"\\&#34;",s)
    return (s)
 }
 function strip_braces(s, k)
 {	# strip non-backslashed braces from s and return the result
 	return (strip_char(strip_char(s,"{"),"}"))
 }
 function strip_char(s,c, k)
 {	# strip non-backslashed instances of c from s, and return the result
 	k = index(s,c)
 	if (k > 0)		# then found the character
 	{
 		if (substr(s,k-1,1) != "\\") # then not backslashed char
 			s = substr(s,1,k-1) strip_char(substr(s,k+1),c) # so remove it (recursively)
 		else		# preserve backslashed char
 			s = substr(s,1,k) strip_char(s,k+1,c)
 	}
 	return (s)
 }
 function strip_html(s)
 {
 	gsub(/<\/?[^>]*>/,"",s)
 	return (s)
 }
 function terminate()
 {
 	if (HTML)
 	{
 		html_end_pre()
 		HTML = 0	# NB: stop line buffering
 		html_header()
 		html_toc()
 		html_body()
 		html_trailer()
 	}
 }
 function TeX_to_HTML(s, k,n,parts)
 {
 	# First convert the four SGML reserved characters to SGML entities
 	if (HTML)
 	{
 	    gsub(/>/,	"\\&gt;",	s)
 	    gsub(/</,	"\\&lt;",	s)
 	    ##gsub(/"/,	"\\&quot;",	s)
 	}
 	gsub(/[$][$]/,"$$",s)	# change display math to triple dollars for split
 	n = split(s,parts,/[$]/)# split into non-math (odd) and math (even) parts
 	s = ""
 	for (k = 1; k <= n; ++k) # unbrace non-math part, leaving math mode intact
 		s = s ((k > 1) ? "$" : "") \
 			((k % 2) ? strip_braces(TeX_to_HTML_nonmath(parts[k])) : \
 			TeX_to_HTML_math(parts[k]))
 	gsub(/[$][$][$]/,"$$",s) # restore display math
 	if (HTML)
 	{
 	    gsub(/"/,	"\\&quot;",	s)
 	}
 	return (s)
 }
 function TeX_to_HTML_math(s)
 {
 	# Mostly a dummy for now, but HTML 3 could support some math translation
 	gsub(/\\&/,"\\&amp;",s)	# reduce TeX ampersands to SGML entities
 	return (s)
 }
 function TeX_to_HTML_nonmath(s)
 {
 	if (index(s,"\\") > 0)			# important optimization
 	{
 		gsub(/\\slash +/,"/",s)		# replace TeX slashes with conventional ones
 		gsub(/ *\\emdash +/," --- ",s)	# replace BibNet emdashes with conventional ones
 		gsub(/\\%/,"%",s)		# reduce TeX percents to conventional ones
 		gsub(/\\[$]/,"$",s)		# reduce TeX dollars to conventional ones
 		gsub(/\\#/,"#",s)		# reduce TeX sharps to conventional ones
 		if (HTML)			# translate TeX markup to HTML
 		{
 			gsub(/\\&/,"\\&amp;",s)	# reduce TeX ampersands to SGML entities
 			s = html_accents(s)
 			s = html_fonts(s)
 		}
 		else				# plain ASCII text output: discard all TeX markup
 		{
 			gsub(/\\\&/, "\\&", s)	# reduce TeX ampersands to conventional ones
 			gsub(/\\[a-z][a-z] +/,"",s) # remove TeX font changes
 			gsub(/\\[^A-Za-z]/,"",s) # remove remaining TeX control symbols
 		}
 	}
 	return (s)
 }
 function trim(s)
 {
    gsub(/^[ \t]+/,"",s)
    gsub(/[ \t]+$/,"",s)
    return (s)
 }
 function vol_no_month_year()
 {
 	return ("Volume " wrap(Volume)  ",  Number " wrap(Number) ", " wrap(Month) ", " wrap(Year))
 }
 function wrap(value)
 {
 	return (HTML ? ("<STRONG>" value "</STRONG>") : value)
 }
--- a/hawk/t/journal-toc.in
+++ b/hawk/t/journal-toc.in
--- a/hawk/t/journal-toc.out
+++ b/hawk/t/journal-toc.out
--- a/hawk/tools/Makefile.in
+++ b/hawk/tools/Makefile.in
@ -330,7 +330,6 @@ pdfdir = @pdfdir@
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
 runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@