547 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			547 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| # http://www.netlib.org/bibnet/tools/software/bibtex-to-html.awk
 | |
| #
 | |
| ### ====================================================================
 | |
| ###  @Awk-file{
 | |
| ###     author          = "Nelson H. F. Beebe",
 | |
| ###     version         = "1.02",
 | |
| ###     date            = "05 July 1997",
 | |
| ###     time            = "12:04:52 MDT",
 | |
| ###     filename        = "bibtex-to-html.awk",
 | |
| ###     address         = "Center for Scientific Computing
 | |
| ###                        Department of Mathematics
 | |
| ###                        University of Utah
 | |
| ###                        Salt Lake City, UT 84112
 | |
| ###                        USA",
 | |
| ###     telephone       = "+1 801 581 5254",
 | |
| ###     FAX             = "+1 801 581 4148",
 | |
| ###     URL             = "http://www.math.utah.edu/~beebe",
 | |
| ###     checksum        = "08699 482 2173 18348",
 | |
| ###     email           = "beebe@math.utah.edu (Internet)",
 | |
| ###     codetable       = "ISO/ASCII",
 | |
| ###     keywords        = "bibliography, BibTeX, HTML, World-Wide Web,
 | |
| ###                        WWW",
 | |
| ###     supported       = "yes",
 | |
| ###     docstring       = "This program converts BibTeX bibliographies
 | |
| ###                        to HTML, suitable for viewing on the
 | |
| ###                        World-Wide Web.
 | |
| ###
 | |
| ###                        The level of HTML produced is version 3.2,
 | |
| ###                        adopted 14-Jan-1997, and defined in the SGML
 | |
| ###                        document type definition (DTD) available at
 | |
| ###
 | |
| ###                            http://www.w3.org/MarkUp/Wilbur/HTML32.dtd
 | |
| ###
 | |
| ###                        and documented at
 | |
| ###
 | |
| ###                            http://www.w3.org/MarkUp/Wilbur/
 | |
| ###                            http://www.w3.org/TR/REC-html32.html
 | |
| ###
 | |
| ###                        HTML markup is added to provide hypertext
 | |
| ###                        links for:
 | |
| ###
 | |
| ###                            * all URLs in the BibTeX file, both in
 | |
| ###                              comments, and inside string values;
 | |
| ###                            * all bibliography entry crossref
 | |
| ###                              values;
 | |
| ###                            * all \cite{} references;
 | |
| ###                            * all @String{name = "value"} names.
 | |
| ###
 | |
| ###                        In addition, every BibTeX citation label in
 | |
| ###                        @Entry lines, and every @String name, will
 | |
| ###                        be marked as an HTML label, allowing
 | |
| ###                        hypertext links to each from elsewhere in
 | |
| ###                        the same HTML file, or from other HTML
 | |
| ###                        files.  In particular, every bibliography
 | |
| ###                        entry can be directly referenced by
 | |
| ###                        hypertext links from anywhere on the
 | |
| ###                        Internet.
 | |
| ###
 | |
| ###                        Each such linkable-name will be displayed
 | |
| ###                        in bold text to draw attention to the fact
 | |
| ###                        that it can be directly referenced by a
 | |
| ###                        suitable URL.  In principle, this should be
 | |
| ###                        an option that WWW browsers provide, but
 | |
| ###                        none that I have used currently do.
 | |
| ###
 | |
| ###                        Although no browsers to my knowledge yet
 | |
| ###                        provide the capability of partial
 | |
| ###                        downloading of HTML files, the possibility
 | |
| ###                        has been discussed for future versions of
 | |
| ###                        the HTTP protocol.  Such support would make
 | |
| ###                        it possible to construct bibliographies in
 | |
| ###                        electronic documents as links to large
 | |
| ###                        bibliography database files, without the
 | |
| ###                        browser having to load the entire database,
 | |
| ###                        but just individual entries.  Since these
 | |
| ###                        in turn can have URLs that point to other
 | |
| ###                        electronic sources of the publication, a
 | |
| ###                        reader could easily follow links from a
 | |
| ###                        publication to a bibliography and then to
 | |
| ###                        abstracts and to the complete original
 | |
| ###                        text.  Some journals, such as the Digital
 | |
| ###                        Technical Journal (electronically accessible
 | |
| ###                        at http://www.digital.com:80/info/DTJ/home.html),
 | |
| ###                        already could offer this possibility.
 | |
| ###
 | |
| ###                        The Web browser user will see material that
 | |
| ###                        looks just like normal BibTeX entries,
 | |
| ###                        except that some portions may be
 | |
| ###                        highlighted to indicate hypertext links.
 | |
| ###                        However, window cut-and-paste actions will
 | |
| ###                        recover a BibTeX entry in a form suitable
 | |
| ###                        for pasting into another BibTeX file,
 | |
| ###                        without any need for further editing.
 | |
| ###
 | |
| ###                        This program assumes that the BibTeX
 | |
| ###                        bibliography is formatted in the style
 | |
| ###                        produced by bibclean, and that embedded
 | |
| ###                        URLs and "key = stringname" pairs are coded
 | |
| ###                        on a single line, so that simple pattern
 | |
| ###                        matching suffices to recognize text in need
 | |
| ###                        of additional HTML markup.
 | |
| ###
 | |
| ###                        Usage:
 | |
| ###                            nawk -f bibtex-to-html.awk \
 | |
| ###                                [-v PREFIX=prefix] [-v SUFFIX=suffix] \
 | |
| ###                                BibTeX-file(s)
 | |
| ###
 | |
| ###                        An input file with a filename of the form
 | |
| ###                        abc.xyz is output to a file named
 | |
| ###                        PREFIXabcSUFFIX.  The default PREFIX is
 | |
| ###                        empty, and the default SUFFIX is ".html".
 | |
| ###
 | |
| ###			   If no   file  names are specified    on the
 | |
| ###			   command line, then   the PREFIX and  SUFFIX
 | |
| ###			   settings  are ignored,   and input is  read
 | |
| ###			   from   stdin,  and  output  is   written to
 | |
| ###			   stdout, so that the program  can be used in
 | |
| ###			   a UNIX pipeline.
 | |
| ###
 | |
| ###                        In the current version, no provision is
 | |
| ###                        made for splitting the output files into
 | |
| ###                        smaller pieces to speed network file
 | |
| ###                        transfer.  While this would improve browser
 | |
| ###                        responsiveness over slow network
 | |
| ###                        connections, it would also significantly
 | |
| ###                        complicate hypertext link generation for
 | |
| ###                        this program, and seriously damage browser
 | |
| ###                        search capability within the bibliography
 | |
| ###                        file.  Perhaps the solution will come in
 | |
| ###                        (a) browsers' adopting the netscape browser
 | |
| ###                        practice of displaying data as soon as
 | |
| ###                        enough to fill a screen is available, and
 | |
| ###                        (b) faster network connections.
 | |
| ###
 | |
| ###                        In the TUG bibliography collection at
 | |
| ###                        ftp://ftp.math.utah.edu/, bibliography
 | |
| ###                        file sizes range from 3K to 4700K, with an
 | |
| ###                        average of 370K.  These are rather large,
 | |
| ###                        since typical WWW file sizes need to be
 | |
| ###                        about 16K or less for good responsiveness.
 | |
| ###
 | |
| ###                        The checksum field above contains a CRC-16
 | |
| ###                        checksum as the first value, followed by the
 | |
| ###                        equivalent of the standard UNIX wc (word
 | |
| ###                        count) utility output of lines, words, and
 | |
| ###                        characters.  This is produced by Robert
 | |
| ###                        Solovay's checksum utility.",
 | |
| ###  }
 | |
| ### ====================================================================
 | |
| BEGIN \
 | |
| 	{
 | |
| 	    ######################################################################
 | |
| 	    VERSION = "1.02 [05-Jul-1997]" # <-- NB: Change this with each update!
 | |
| 	    ######################################################################
 | |
| 
 | |
| 	    PROGRAM = "bibtex-to-html"
 | |
| 
 | |
| 	    UNSET_FILENAME = "/dev/unset"
 | |
| 	    LASTFILENAME = UNSET_FILENAME
 | |
| 	    _last_input_filename = UNSET_FILENAME
 | |
| 
 | |
| 	    if (SUFFIX == "")
 | |
| 		SUFFIX = ".html"
 | |
| 
 | |
| 	    USER = ENVIRON["USER"]
 | |
| 
 | |
| 	    if (USER == "")
 | |
| 		USER = ENVIRON["LOGNAME"]
 | |
| 
 | |
| 	    if (USER == "")
 | |
| 		USER = "????"
 | |
| 
 | |
| 	    "hostname" | getline HOSTNAME
 | |
| 	    "date" | getline DATE
 | |
| #	    [01-Aug-2019] ypcat no longer available: replace by getent
 | |
| #	    ("ypcat passwd | grep '^" USER ":' | awk -F: '{print $5}'") | getline PERSONAL_NAME
 | |
| 	    ("getent passwd " USER " | " ARGV[0] " -F: '{print $5}'") | getline PERSONAL_NAME
 | |
| 
 | |
| 	    if (PERSONAL_NAME == "")
 | |
| 		##("grep  '^" USER ":' /etc/passwd | awk -F: '{print $5}'") | getline PERSONAL_NAME
 | |
| 		("grep  '^" USER ":' /etc/passwd | " ARGV[0] " -F: '{print $5}'") | getline PERSONAL_NAME
 | |
| 
 | |
| 	    # NB: " has become " before this pattern is used
 | |
| 	    CROSSREF_EQUALS_LABEL_PATTERN = "^[ \t]*crossref[ \t]*=[ \t]*""
 | |
| 
 | |
| 	    # Pattern to match a line like this:
 | |
| 	    # %%%     email           = "beebe at math.utah.edu (Internet)",
 | |
| 
 | |
| 	    BIBTEX_EMAIL_PATTERN = "= "[A-Za-z0-9-]+ at [A-Za-z0-9.-]+"
 | |
| 	    BIBTEX_EMAIL_OFFSET = 7 # was 8 before " became "
 | |
| 	    BIBTEX_EMAIL_PREFIX = "mailto:"
 | |
| 	    BIBTEX_EMAIL_SAVE_LABEL = 0
 | |
| 
 | |
| 	    ##CITE_PATTERN = "\\\\cite{[^}]+}"
 | |
| 	    CITE_PATTERN = "\\\\cite\\{[^\\}]+}"
 | |
| 	    CITE_OFFSET = 6
 | |
| 	    CITE_PREFIX = ""
 | |
| 	    CITE_SAVE_LABEL = 1
 | |
| 
 | |
| 	    EMAIL_PATTERN = "[A-Za-z0-9-]+@[A-Za-z0-9.-]+"
 | |
| 	    EMAIL_OFFSET = 0
 | |
| 	    EMAIL_PREFIX = "mailto:"
 | |
| 	    EMAIL_SAVE_LABEL = 0
 | |
| 
 | |
| 	    # See Nelson H. F. Beebe, ``Bibliography prettyprinting
 | |
| 	    # and syntax checking'', TUGboat 14(3), 222-222, October
 | |
| 	    # (1993), and 14(4), 395--419, December (1993) for the
 | |
| 	    # syntax of BibTeX names used here in ENTRY_PATTERN,
 | |
| 	    # KEY_EQUALS_NAME_PATTERN and STRING_PATTERN.
 | |
| 
 | |
| 	    ##ENTRY_PATTERN = "^[ \t]*@[ \t]*[A-Za-z][A-Za-z0-9:.+/'-]*[ \t]*{[A-Za-z][A-Za-z0-9:.+/'-]*,[ \t]*$"
 | |
| 	    ENTRY_PATTERN = "^[ \t]*@[ \t]*[A-Za-z][A-Za-z0-9:.+/'-]*[ \t]*\\{[A-Za-z][A-Za-z0-9:.+/'-]*,[ \t]*$"
 | |
| 
 | |
| 	    KEY_EQUALS_NAME_PATTERN = "^[ \t]*[A-Za-z][A-Za-z0-9:.+/'-]*[ \t]*=[ \t]*[A-Za-z]"
 | |
| 
 | |
| 	    ##STRING_PATTERN = "^@[Ss][Tt][Rr][Ii][Nn][gG]{[A-Za-z][A-Za-z0-9:.+/'-]*"
 | |
| 	    STRING_PATTERN = "^@[Ss][Tt][Rr][Ii][Nn][gG]\\{[A-Za-z][A-Za-z0-9:.+/'-]*"
 | |
| 	    STRING_OFFSET = 8
 | |
| 	    STRING_PREFIX = ""
 | |
| 	    STRING_SAVE_LABEL = 1
 | |
| 
 | |
| 	    # According to Internet RFC 1614 (May 1994), a URL is
 | |
| 	    # defined in the document T. Berners-Lee, ``Uniform
 | |
| 	    # Resource Locators'', March 1993, available at URL
 | |
| 	    # ftp://info.cern.ch/pub/ietf/url4.ps.  Unfortunately,
 | |
| 	    # that address is no longer valid.  However, I was able to
 | |
| 	    # track down pointers from http://www.w3.org/ to locate a
 | |
| 	    # suitable description in Internet RFC 1630 (June 1994).
 | |
| 
 | |
| 	    # NB: We additionally disallow & in a URL because it is
 | |
| 	    # needed in SGML entities "&name;".  We also disallow =
 | |
| 	    # and | because these are commonly used in \path=...= and
 | |
| 	    # \path|...| strings in BibTeX files.  These restrictions
 | |
| 	    # could be removed if we went to the trouble of first
 | |
| 	    # encoding these special characters in %xy hexadecimal
 | |
| 	    # format, but they are rare enough that I am not going to
 | |
| 	    # do so for now.  The worst that will happen from this
 | |
| 	    # decision is that an occasional URL in a BibTeX file will
 | |
| 	    # be missing a surrounding anchor.
 | |
| 
 | |
| 	    URL_PATTERN = "[A-Za-z]+://[^ \",&=|]+"
 | |
| 	    URL_OFFSET = 0
 | |
| 	    URL_PREFIX = ""
 | |
| 	    URL_SAVE_LABEL = 0
 | |
| 
 | |
| 	    # [24-May-2016] support for background coloring of block comments
 | |
| 	    IN_BLOCK_COMMENT = 0
 | |
| 	}
 | |
| 
 | |
| # Each line receives identical processing.
 | |
| 	{ do_line() }
 | |
| 
 | |
| END   \
 | |
| 	{
 | |
| 	    if (LASTFILENAME != UNSET_FILENAME)
 | |
| 		end_file(LASTFILENAME)
 | |
| 	}
 | |
| 
 | |
| 
 | |
| function add_entry(array,value)
 | |
| {
 | |
|     if (value in array)
 | |
| 	array[value] = array[value] " " FNR
 | |
|     else
 | |
| 	array[value] = FNR
 | |
| }
 | |
| 
 | |
| 
 | |
| function anchor(s,type,pattern,offset,prefix,save_label, name,rstart,rlength)
 | |
| {
 | |
|     # Add anchors <A type="....">...</A> around text in s matching
 | |
|     # pattern.  A non-zero offset discards that many characters from
 | |
|     # the start of the match, allowing the pattern to contain leading
 | |
|     # context which goes outside the anchored region.  The prefix is
 | |
|     # attached to the start of the matched string, inside the value
 | |
|     # quotes in the anchor.
 | |
| 
 | |
|     if (match(s,pattern))
 | |
|     {
 | |
| 	rstart = RSTART		# need private copies of these globals because
 | |
| 	rlength = RLENGTH	# recursion will change them
 | |
| 
 | |
| 	rstart += offset	# adjust by offset to discard leading
 | |
| 	rlength -= offset	# context in pattern
 | |
| 
 | |
| 	name = substr(s,rstart,rlength)
 | |
| 	sub(/ +at +/,"@",name)	# reduce "user at host" to "user@host"
 | |
| 
 | |
| 	s = substr(s,1,rstart-1) \
 | |
| 	    "<A " type "=\"" prefix name "\">" \
 | |
| 	    ((type == "NAME") ? "<STRONG>" : "") \
 | |
| 	    substr(s,rstart,rlength) \
 | |
| 	    ((type == "NAME") ? "</STRONG>" : "") \
 | |
| 	    "</A>" \
 | |
| 	    anchor(substr(s,rstart+rlength),type,pattern,offset,prefix,save)
 | |
| 
 | |
| 	if (save_label)
 | |
| 	{
 | |
| 	    if (type == "HREF")
 | |
| 		add_entry(label_hrefs, name)
 | |
| 	    else if (type == "NAME")
 | |
| 		add_entry(label_names, name)
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|     return (s)
 | |
| }
 | |
| 
 | |
| 
 | |
| function begin_file( f)
 | |
| {
 | |
|     f = output_filename(FILENAME)
 | |
| 
 | |
|     ## NB: If Transitional is eliminated in DOCTYPE, background coloring is lost! Why?
 | |
|     slash_pos = str::rindex(FILENAME, "/");
 | |
|     BASE_FILENAME = (slash_pos > 0)? str::substr(FILENAME, slash_pos + 1): FILENAME;
 | |
| 
 | |
|     print "<!-- -*-html-*- -->"									> f
 | |
|     print ""											> f
 | |
| ##    print "<!-- " FILENAME " -->"								> f
 | |
|     print "<!-- " BASE_FILENAME " -->"								> f
 | |
|     print "<!-- WARNING: Do NOT edit this file.  It was converted from -->"			> f
 | |
|     print "<!-- BibTeX format to HTML by " PROGRAM " version " VERSION " -->"			> f
 | |
| ##    print "<!-- on " DATE " -->"								> f
 | |
| ##    print "<!-- for " PERSONAL_NAME " (" USER "@" HOSTNAME ") -->"				> f
 | |
|     print ""											> f
 | |
|     print "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/1998/REC-html40-19980424/loose.dtd\">" > f
 | |
|     print ""											> f
 | |
|     print ""											> f
 | |
|     print "<HTML>"										> f
 | |
|     print "    <HEAD>"										> f
 | |
|     print "        <META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=iso-8859-1\">"> f
 | |
|     print "        <TITLE>"									> f
 | |
| ##    print "            BibTeX bibliography " FILENAME						> f
 | |
|     print "            BibTeX bibliography " BASE_FILENAME						> f
 | |
|     print "        </TITLE>"									> f
 | |
| ##    print "        <LINK REV=\"made\" HREF=\"mailto:" USER "@" HOSTNAME "\">"			> f
 | |
|     print "        <LINK HREF=\"http://www.math.utah.edu/pub/tex/bib/tugbib.css\" TYPE=\"text/css\" REL=\"stylesheet\">"	> f
 | |
|     print "    </HEAD>"										> f
 | |
|     print ""											> f
 | |
|     print "    <BODY>"										> f
 | |
|     print "        <DIV  ALIGN=\"right\">"							> f
 | |
|     print "            <A HREF=\"http://validator.w3.org/check/referer\">"			> f
 | |
|     print "                <IMG ALIGN=\"MIDDLE\" BORDER=\"0\" SRC=\"/images/valid-html40.png\" ALT=\"Valid HTML 4.0!\" HEIGHT=\"31\" WIDTH=\"88\">" > f
 | |
|     print "            </A>"									> f
 | |
|     print "            <A HREF=\"http://jigsaw.w3.org/css-validator/check/referer\">"		> f
 | |
|     print "                <IMG ALIGN=\"MIDDLE\" BORDER=\"0\" SRC=\"/images/valid-css.gif\" ALT=\"Valid CSS!\" HEIGHT=\"31\" WIDTH=\"88\">" > f
 | |
|     print "            </A>"									> f
 | |
|     print "        </DIV>"									> f
 | |
|     print "<PRE>"										> f
 | |
| 
 | |
|     clear_array(label_names)
 | |
|     clear_array(label_hrefs)
 | |
| }
 | |
| 
 | |
| 
 | |
| function check_for_file_change()
 | |
| {
 | |
|     if (LASTFILENAME != FILENAME)
 | |
|     {
 | |
| 	if (LASTFILENAME != UNSET_FILENAME)
 | |
| 	{
 | |
| 	    end_file(LASTFILENAME)
 | |
| 
 | |
| 	    if (LASTFILENAME != "/dev/stdout")
 | |
| 	        close (output_filename(LASTFILENAME))
 | |
| 	}
 | |
| 
 | |
| 	LASTFILENAME = FILENAME
 | |
| 	begin_file()
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| function check_refs( label)
 | |
| {
 | |
|     for (label in label_hrefs)
 | |
|     {
 | |
| 	if (!(label in label_names))
 | |
| 	    warning("undefined label " label " at line(s) " label_hrefs[label])
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| function clear_array(array, key)
 | |
| {
 | |
|     for (key in array)
 | |
| 	delete array[key]
 | |
| }
 | |
| 
 | |
| 
 | |
| function end_file(filename, f)
 | |
| {
 | |
|     f = output_filename(filename)
 | |
| 
 | |
|     print "</PRE>" > f
 | |
|     print "    </BODY>" > f
 | |
|     print "</HTML>" > f
 | |
|     check_refs()
 | |
| }
 | |
| 
 | |
| 
 | |
| function do_cite(s, k,n,labels,t)
 | |
| {
 | |
|     n = split(substr(s,RSTART + CITE_OFFSET,RLENGTH - 1 - CITE_OFFSET),labels,",")
 | |
|     t = substr(s,1,RSTART+CITE_OFFSET-1)
 | |
| 
 | |
|     for (k = 1; k <= n; ++k)
 | |
|     {
 | |
| 	t = t ((k > 1) ? "," : "") "<A HREF=\"#" labels[k] "\">" labels[k] "</A>"
 | |
| 	add_entry(label_hrefs, labels[k])
 | |
|     }
 | |
| 
 | |
|     t = t substr(s,RSTART + RLENGTH - 1)
 | |
| 
 | |
|     return (t)
 | |
| }
 | |
| 
 | |
| 
 | |
| function do_line( n,name,s)
 | |
| {
 | |
|     s = protect_SGML_characters($0)
 | |
| 
 | |
|     if (match(s,STRING_PATTERN)) # remember name from @String{name = "value"}
 | |
|     {
 | |
| 	name = substr(s,RSTART + STRING_OFFSET,RLENGTH - STRING_OFFSET)
 | |
| 	string_name[name] = 1
 | |
| 	# print "DEBUG 1: name =", name >"/dev/stderr"
 | |
|     }
 | |
| 
 | |
|     if (match(s,/^%+[ \t]*email[ \t]*=/)) # special handling because BibTeX does not allow @ in comments
 | |
| 	s = anchor(s,"HREF",BIBTEX_EMAIL_PATTERN,BIBTEX_EMAIL_OFFSET,BIBTEX_EMAIL_PREFIX,\
 | |
| 		   BIBTEX_EMAIL_SAVE_LABEL)
 | |
|     else
 | |
| 	s = anchor(s,"HREF",EMAIL_PATTERN,EMAIL_OFFSET,EMAIL_PREFIX,EMAIL_SAVE_LABEL)
 | |
| 
 | |
|     s = anchor(s,"HREF",URL_PATTERN,URL_OFFSET,URL_PREFIX,URL_SAVE_LABEL)
 | |
|     s = anchor(s,"NAME",STRING_PATTERN,STRING_OFFSET,STRING_PREFIX,STRING_SAVE_LABEL)
 | |
| 
 | |
|     if (match(s,CITE_PATTERN))
 | |
| 	s = do_cite(s)
 | |
| 
 | |
|     if (match(s,ENTRY_PATTERN))	# then have ``@Entry{label,''
 | |
|     {
 | |
| 	n = index(s,"{")
 | |
| 	name = substr(s,n+1)
 | |
| 	gsub(/^[ \t]*/,"",name)	# trim optional leading space
 | |
| 	gsub(/,[ \t]*$/,"",name) # trim trailing comma and optional space
 | |
| 	# print "DEBUG 2: name =", name >"/dev/stderr"
 | |
| 	s = substr(s,1,n) \
 | |
| 	    "<A NAME=\"" name "\"><STRONG>" name "</STRONG></A>" \
 | |
| 	    substr(s,n+1+length(name))
 | |
| 	add_entry(label_names, name)
 | |
|     }
 | |
|     else if (match(s,KEY_EQUALS_NAME_PATTERN)) # then have ``key = name''
 | |
|     {
 | |
| 	name = substr(s,RSTART+RLENGTH-1)
 | |
| 	sub(/,?[ \t]*$/,"",name) # trim optional trailing comma and space
 | |
| 	# print "DEBUG 3: name =", name >"/dev/stderr"
 | |
| 
 | |
| 	if (name in string_name) # then we have a definition of this name
 | |
| 	{
 | |
| 	    s = substr(s,1,RSTART+RLENGTH-2) \
 | |
| 		"<A HREF=\"#" name "\">" name "</A>" substr(s,RSTART+RLENGTH-1+length(name))
 | |
| 	    add_entry(label_hrefs, name)
 | |
| 	}
 | |
|     }
 | |
|     else if (match(s,CROSSREF_EQUALS_LABEL_PATTERN)) # then have `` crossref = "label"''
 | |
|     {
 | |
| 	name = substr(s,RSTART+RLENGTH)
 | |
| 	sub(/",?[ \t]*$/,"",name) # trim trailing quote and optional comma and space
 | |
| 	# print "DEBUG 4: name =", name >"/dev/stderr"
 | |
| 	s = substr(s,1,RSTART+RLENGTH-1) \
 | |
| 	    "<A HREF=\"#" name "\">" name "</A>" substr(s,RSTART+RLENGTH+length(name))
 | |
| 	add_entry(label_hrefs, name)
 | |
|     }
 | |
| 
 | |
|     check_for_file_change()
 | |
| 
 | |
|     if ( (s ~ "^%") && !IN_BLOCK_COMMENT)
 | |
|     {
 | |
| 	printf("</PRE><PRE CLASS=\"blockcomment\">")	> output_filename(FILENAME)
 | |
| 	IN_BLOCK_COMMENT = 1
 | |
|     }
 | |
|     else if ( (s !~ "^%") && IN_BLOCK_COMMENT)
 | |
|     {
 | |
| 	printf("</PRE><PRE>")				> output_filename(FILENAME)
 | |
| 	IN_BLOCK_COMMENT = 0
 | |
|     }
 | |
| 
 | |
|     print s					>output_filename(FILENAME)
 | |
| }
 | |
| 
 | |
| 
 | |
| function output_filename(input_filename)
 | |
| {
 | |
|     ## HAWK - for use in t/h-003.hawk
 | |
|     if (length(T_OUT_NAME) > 0) return T_OUT_NAME;
 | |
|     ## END HAWK
 | |
| 
 | |
|     if (input_filename != _last_input_filename)
 | |
|     {			# optimization: we cache last function result for speed
 | |
| 	_last_input_filename = input_filename
 | |
| 	sub(/.[^.]*$/,"",input_filename)
 | |
| 
 | |
| 	if ((input_filename == "") || (input_filename == "/dev/stdin"))
 | |
| 	    _last_output_filename = "/dev/stdout"
 | |
| 	else
 | |
| 	    _last_output_filename = PREFIX input_filename SUFFIX
 | |
| 
 | |
|     }
 | |
| 
 | |
|     return (_last_output_filename)
 | |
| }
 | |
| 
 | |
| 
 | |
| function protect_SGML_characters(s)
 | |
| {
 | |
|     gsub(/&/,"\\&",s)	# NB: this one MUST be first
 | |
|     gsub(/</,"\\<",s)
 | |
|     gsub(/>/,"\\>",s)
 | |
| 
 | |
|     ## [24-May-2016] with the change from HTML 3.2 to 4.0, we can use " again!
 | |
|     ## gsub(/\"/,"\\"",s)	# this was " in earlier HTML
 | |
| 				# versions, including the HTML 3.2
 | |
| 				# draft, but was stupidly eliminated in
 | |
| 				# the final HTML 3.2 version: see
 | |
| 				# http://www.w3.org/pub/WWW/MarkUp/Wilbur/
 | |
| 				# in the section ``What happened to "?''
 | |
|     gsub(/\"/,"\\"",s)
 | |
| 
 | |
|     return (s)
 | |
| }
 | |
| 
 | |
| 
 | |
| function warning(message)
 | |
| {
 | |
|     # print FILENAME ":" FNR ":%%" message >"/dev/stderr"
 | |
|     #
 | |
|     # The only place that we need warning messages above is
 | |
|     # checkrefs(), after the current file has been closed, and a new
 | |
|     # one started, so we need LASTFILENAME instead of FILENAME here,
 | |
|     # and we omit the FNR, since we have no record of it for
 | |
|     # LASTFILENAME.
 | |
|     print LASTFILENAME ":%%" message >"/dev/stderr"
 | |
| }
 |