2020-12-01 10:22:17 +00:00
|
|
|
# http://www.netlib.org/bibnet/tools/software/bibtex-to-html.awk
|
|
|
|
#
|
|
|
|
### ====================================================================
|
|
|
|
### @Awk-file{
|
|
|
|
### author = "Nelson H. F. Beebe",
|
|
|
|
### version = "1.02",
|
|
|
|
### date = "05 July 1997",
|
|
|
|
### time = "12:04:52 MDT",
|
|
|
|
### filename = "bibtex-to-html.awk",
|
|
|
|
### address = "Center for Scientific Computing
|
|
|
|
### Department of Mathematics
|
|
|
|
### University of Utah
|
|
|
|
### Salt Lake City, UT 84112
|
|
|
|
### USA",
|
|
|
|
### telephone = "+1 801 581 5254",
|
|
|
|
### FAX = "+1 801 581 4148",
|
|
|
|
### URL = "http://www.math.utah.edu/~beebe",
|
|
|
|
### checksum = "08699 482 2173 18348",
|
|
|
|
### email = "beebe@math.utah.edu (Internet)",
|
|
|
|
### codetable = "ISO/ASCII",
|
|
|
|
### keywords = "bibliography, BibTeX, HTML, World-Wide Web,
|
|
|
|
### WWW",
|
|
|
|
### supported = "yes",
|
|
|
|
### docstring = "This program converts BibTeX bibliographies
|
|
|
|
### to HTML, suitable for viewing on the
|
|
|
|
### World-Wide Web.
|
|
|
|
###
|
|
|
|
### The level of HTML produced is version 3.2,
|
|
|
|
### adopted 14-Jan-1997, and defined in the SGML
|
|
|
|
### document type definition (DTD) available at
|
|
|
|
###
|
|
|
|
### http://www.w3.org/MarkUp/Wilbur/HTML32.dtd
|
|
|
|
###
|
|
|
|
### and documented at
|
|
|
|
###
|
|
|
|
### http://www.w3.org/MarkUp/Wilbur/
|
|
|
|
### http://www.w3.org/TR/REC-html32.html
|
|
|
|
###
|
|
|
|
### HTML markup is added to provide hypertext
|
|
|
|
### links for:
|
|
|
|
###
|
|
|
|
### * all URLs in the BibTeX file, both in
|
|
|
|
### comments, and inside string values;
|
|
|
|
### * all bibliography entry crossref
|
|
|
|
### values;
|
|
|
|
### * all \cite{} references;
|
|
|
|
### * all @String{name = "value"} names.
|
|
|
|
###
|
|
|
|
### In addition, every BibTeX citation label in
|
|
|
|
### @Entry lines, and every @String name, will
|
|
|
|
### be marked as an HTML label, allowing
|
|
|
|
### hypertext links to each from elsewhere in
|
|
|
|
### the same HTML file, or from other HTML
|
|
|
|
### files. In particular, every bibliography
|
|
|
|
### entry can be directly referenced by
|
|
|
|
### hypertext links from anywhere on the
|
|
|
|
### Internet.
|
|
|
|
###
|
|
|
|
### Each such linkable-name will be displayed
|
|
|
|
### in bold text to draw attention to the fact
|
|
|
|
### that it can be directly referenced by a
|
|
|
|
### suitable URL. In principle, this should be
|
|
|
|
### an option that WWW browsers provide, but
|
|
|
|
### none that I have used currently do.
|
|
|
|
###
|
|
|
|
### Although no browsers to my knowledge yet
|
|
|
|
### provide the capability of partial
|
|
|
|
### downloading of HTML files, the possibility
|
|
|
|
### has been discussed for future versions of
|
|
|
|
### the HTTP protocol. Such support would make
|
|
|
|
### it possible to construct bibliographies in
|
|
|
|
### electronic documents as links to large
|
|
|
|
### bibliography database files, without the
|
|
|
|
### browser having to load the entire database,
|
|
|
|
### but just individual entries. Since these
|
|
|
|
### in turn can have URLs that point to other
|
|
|
|
### electronic sources of the publication, a
|
|
|
|
### reader could easily follow links from a
|
|
|
|
### publication to a bibliography and then to
|
|
|
|
### abstracts and to the complete original
|
|
|
|
### text. Some journals, such as the Digital
|
|
|
|
### Technical Journal (electronically accessible
|
|
|
|
### at http://www.digital.com:80/info/DTJ/home.html),
|
|
|
|
### already could offer this possibility.
|
|
|
|
###
|
|
|
|
### The Web browser user will see material that
|
|
|
|
### looks just like normal BibTeX entries,
|
|
|
|
### except that some portions may be
|
|
|
|
### highlighted to indicate hypertext links.
|
|
|
|
### However, window cut-and-paste actions will
|
|
|
|
### recover a BibTeX entry in a form suitable
|
|
|
|
### for pasting into another BibTeX file,
|
|
|
|
### without any need for further editing.
|
|
|
|
###
|
|
|
|
### This program assumes that the BibTeX
|
|
|
|
### bibliography is formatted in the style
|
|
|
|
### produced by bibclean, and that embedded
|
|
|
|
### URLs and "key = stringname" pairs are coded
|
|
|
|
### on a single line, so that simple pattern
|
|
|
|
### matching suffices to recognize text in need
|
|
|
|
### of additional HTML markup.
|
|
|
|
###
|
|
|
|
### Usage:
|
|
|
|
### nawk -f bibtex-to-html.awk \
|
|
|
|
### [-v PREFIX=prefix] [-v SUFFIX=suffix] \
|
|
|
|
### BibTeX-file(s)
|
|
|
|
###
|
|
|
|
### An input file with a filename of the form
|
|
|
|
### abc.xyz is output to a file named
|
|
|
|
### PREFIXabcSUFFIX. The default PREFIX is
|
|
|
|
### empty, and the default SUFFIX is ".html".
|
|
|
|
###
|
|
|
|
### If no file names are specified on the
|
|
|
|
### command line, then the PREFIX and SUFFIX
|
|
|
|
### settings are ignored, and input is read
|
|
|
|
### from stdin, and output is written to
|
|
|
|
### stdout, so that the program can be used in
|
|
|
|
### a UNIX pipeline.
|
|
|
|
###
|
|
|
|
### In the current version, no provision is
|
|
|
|
### made for splitting the output files into
|
|
|
|
### smaller pieces to speed network file
|
|
|
|
### transfer. While this would improve browser
|
|
|
|
### responsiveness over slow network
|
|
|
|
### connections, it would also significantly
|
|
|
|
### complicate hypertext link generation for
|
|
|
|
### this program, and seriously damage browser
|
|
|
|
### search capability within the bibliography
|
|
|
|
### file. Perhaps the solution will come in
|
|
|
|
### (a) browsers' adopting the netscape browser
|
|
|
|
### practice of displaying data as soon as
|
|
|
|
### enough to fill a screen is available, and
|
|
|
|
### (b) faster network connections.
|
|
|
|
###
|
|
|
|
### In the TUG bibliography collection at
|
|
|
|
### ftp://ftp.math.utah.edu/, bibliography
|
|
|
|
### file sizes range from 3K to 4700K, with an
|
|
|
|
### average of 370K. These are rather large,
|
|
|
|
### since typical WWW file sizes need to be
|
|
|
|
### about 16K or less for good responsiveness.
|
|
|
|
###
|
|
|
|
### The checksum field above contains a CRC-16
|
|
|
|
### checksum as the first value, followed by the
|
|
|
|
### equivalent of the standard UNIX wc (word
|
|
|
|
### count) utility output of lines, words, and
|
|
|
|
### characters. This is produced by Robert
|
|
|
|
### Solovay's checksum utility.",
|
|
|
|
### }
|
|
|
|
### ====================================================================
|
|
|
|
BEGIN \
|
|
|
|
{
|
|
|
|
######################################################################
|
|
|
|
VERSION = "1.02 [05-Jul-1997]" # <-- NB: Change this with each update!
|
|
|
|
######################################################################
|
|
|
|
|
|
|
|
PROGRAM = "bibtex-to-html"
|
|
|
|
|
|
|
|
UNSET_FILENAME = "/dev/unset"
|
|
|
|
LASTFILENAME = UNSET_FILENAME
|
|
|
|
_last_input_filename = UNSET_FILENAME
|
|
|
|
|
|
|
|
if (SUFFIX == "")
|
|
|
|
SUFFIX = ".html"
|
|
|
|
|
|
|
|
USER = ENVIRON["USER"]
|
|
|
|
|
|
|
|
if (USER == "")
|
|
|
|
USER = ENVIRON["LOGNAME"]
|
|
|
|
|
|
|
|
if (USER == "")
|
|
|
|
USER = "????"
|
|
|
|
|
|
|
|
"hostname" | getline HOSTNAME
|
|
|
|
"date" | getline DATE
|
|
|
|
# [01-Aug-2019] ypcat no longer available: replace by getent
|
|
|
|
# ("ypcat passwd | grep '^" USER ":' | awk -F: '{print $5}'") | getline PERSONAL_NAME
|
|
|
|
("getent passwd " USER " | " ARGV[0] " -F: '{print $5}'") | getline PERSONAL_NAME
|
|
|
|
|
|
|
|
if (PERSONAL_NAME == "")
|
|
|
|
##("grep '^" USER ":' /etc/passwd | awk -F: '{print $5}'") | getline PERSONAL_NAME
|
|
|
|
("grep '^" USER ":' /etc/passwd | " ARGV[0] " -F: '{print $5}'") | getline PERSONAL_NAME
|
|
|
|
|
|
|
|
# NB: " has become " before this pattern is used
|
|
|
|
CROSSREF_EQUALS_LABEL_PATTERN = "^[ \t]*crossref[ \t]*=[ \t]*""
|
|
|
|
|
|
|
|
# Pattern to match a line like this:
|
|
|
|
# %%% email = "beebe at math.utah.edu (Internet)",
|
|
|
|
|
|
|
|
BIBTEX_EMAIL_PATTERN = "= "[A-Za-z0-9-]+ at [A-Za-z0-9.-]+"
|
|
|
|
BIBTEX_EMAIL_OFFSET = 7 # was 8 before " became "
|
|
|
|
BIBTEX_EMAIL_PREFIX = "mailto:"
|
|
|
|
BIBTEX_EMAIL_SAVE_LABEL = 0
|
|
|
|
|
|
|
|
##CITE_PATTERN = "\\\\cite{[^}]+}"
|
|
|
|
CITE_PATTERN = "\\\\cite\\{[^\\}]+}"
|
|
|
|
CITE_OFFSET = 6
|
|
|
|
CITE_PREFIX = ""
|
|
|
|
CITE_SAVE_LABEL = 1
|
|
|
|
|
|
|
|
EMAIL_PATTERN = "[A-Za-z0-9-]+@[A-Za-z0-9.-]+"
|
|
|
|
EMAIL_OFFSET = 0
|
|
|
|
EMAIL_PREFIX = "mailto:"
|
|
|
|
EMAIL_SAVE_LABEL = 0
|
|
|
|
|
|
|
|
# See Nelson H. F. Beebe, ``Bibliography prettyprinting
|
|
|
|
# and syntax checking'', TUGboat 14(3), 222-222, October
|
|
|
|
# (1993), and 14(4), 395--419, December (1993) for the
|
|
|
|
# syntax of BibTeX names used here in ENTRY_PATTERN,
|
|
|
|
# KEY_EQUALS_NAME_PATTERN and STRING_PATTERN.
|
|
|
|
|
|
|
|
##ENTRY_PATTERN = "^[ \t]*@[ \t]*[A-Za-z][A-Za-z0-9:.+/'-]*[ \t]*{[A-Za-z][A-Za-z0-9:.+/'-]*,[ \t]*$"
|
|
|
|
ENTRY_PATTERN = "^[ \t]*@[ \t]*[A-Za-z][A-Za-z0-9:.+/'-]*[ \t]*\\{[A-Za-z][A-Za-z0-9:.+/'-]*,[ \t]*$"
|
|
|
|
|
|
|
|
KEY_EQUALS_NAME_PATTERN = "^[ \t]*[A-Za-z][A-Za-z0-9:.+/'-]*[ \t]*=[ \t]*[A-Za-z]"
|
|
|
|
|
|
|
|
##STRING_PATTERN = "^@[Ss][Tt][Rr][Ii][Nn][gG]{[A-Za-z][A-Za-z0-9:.+/'-]*"
|
|
|
|
STRING_PATTERN = "^@[Ss][Tt][Rr][Ii][Nn][gG]\\{[A-Za-z][A-Za-z0-9:.+/'-]*"
|
|
|
|
STRING_OFFSET = 8
|
|
|
|
STRING_PREFIX = ""
|
|
|
|
STRING_SAVE_LABEL = 1
|
|
|
|
|
|
|
|
# According to Internet RFC 1614 (May 1994), a URL is
|
|
|
|
# defined in the document T. Berners-Lee, ``Uniform
|
|
|
|
# Resource Locators'', March 1993, available at URL
|
|
|
|
# ftp://info.cern.ch/pub/ietf/url4.ps. Unfortunately,
|
|
|
|
# that address is no longer valid. However, I was able to
|
|
|
|
# track down pointers from http://www.w3.org/ to locate a
|
|
|
|
# suitable description in Internet RFC 1630 (June 1994).
|
|
|
|
|
|
|
|
# NB: We additionally disallow & in a URL because it is
|
|
|
|
# needed in SGML entities "&name;". We also disallow =
|
|
|
|
# and | because these are commonly used in \path=...= and
|
|
|
|
# \path|...| strings in BibTeX files. These restrictions
|
|
|
|
# could be removed if we went to the trouble of first
|
|
|
|
# encoding these special characters in %xy hexadecimal
|
|
|
|
# format, but they are rare enough that I am not going to
|
|
|
|
# do so for now. The worst that will happen from this
|
|
|
|
# decision is that an occasional URL in a BibTeX file will
|
|
|
|
# be missing a surrounding anchor.
|
|
|
|
|
|
|
|
URL_PATTERN = "[A-Za-z]+://[^ \",&=|]+"
|
|
|
|
URL_OFFSET = 0
|
|
|
|
URL_PREFIX = ""
|
|
|
|
URL_SAVE_LABEL = 0
|
|
|
|
|
|
|
|
# [24-May-2016] support for background coloring of block comments
|
|
|
|
IN_BLOCK_COMMENT = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
# Each line receives identical processing.
|
|
|
|
{ do_line() }
|
|
|
|
|
|
|
|
END \
|
|
|
|
{
|
|
|
|
if (LASTFILENAME != UNSET_FILENAME)
|
|
|
|
end_file(LASTFILENAME)
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function add_entry(array,value)
|
|
|
|
{
|
|
|
|
if (value in array)
|
|
|
|
array[value] = array[value] " " FNR
|
|
|
|
else
|
|
|
|
array[value] = FNR
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function anchor(s,type,pattern,offset,prefix,save_label, name,rstart,rlength)
|
|
|
|
{
|
|
|
|
# Add anchors <A type="....">...</A> around text in s matching
|
|
|
|
# pattern. A non-zero offset discards that many characters from
|
|
|
|
# the start of the match, allowing the pattern to contain leading
|
|
|
|
# context which goes outside the anchored region. The prefix is
|
|
|
|
# attached to the start of the matched string, inside the value
|
|
|
|
# quotes in the anchor.
|
|
|
|
|
|
|
|
if (match(s,pattern))
|
|
|
|
{
|
|
|
|
rstart = RSTART # need private copies of these globals because
|
|
|
|
rlength = RLENGTH # recursion will change them
|
|
|
|
|
|
|
|
rstart += offset # adjust by offset to discard leading
|
|
|
|
rlength -= offset # context in pattern
|
|
|
|
|
|
|
|
name = substr(s,rstart,rlength)
|
|
|
|
sub(/ +at +/,"@",name) # reduce "user at host" to "user@host"
|
|
|
|
|
|
|
|
s = substr(s,1,rstart-1) \
|
|
|
|
"<A " type "=\"" prefix name "\">" \
|
|
|
|
((type == "NAME") ? "<STRONG>" : "") \
|
|
|
|
substr(s,rstart,rlength) \
|
|
|
|
((type == "NAME") ? "</STRONG>" : "") \
|
|
|
|
"</A>" \
|
|
|
|
anchor(substr(s,rstart+rlength),type,pattern,offset,prefix,save)
|
|
|
|
|
|
|
|
if (save_label)
|
|
|
|
{
|
|
|
|
if (type == "HREF")
|
|
|
|
add_entry(label_hrefs, name)
|
|
|
|
else if (type == "NAME")
|
|
|
|
add_entry(label_names, name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return (s)
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function begin_file( f)
|
|
|
|
{
|
|
|
|
f = output_filename(FILENAME)
|
|
|
|
|
|
|
|
## NB: If Transitional is eliminated in DOCTYPE, background coloring is lost! Why?
|
|
|
|
slash_pos = str::rindex(FILENAME, "/");
|
|
|
|
BASE_FILENAME = (slash_pos > 0)? str::substr(FILENAME, slash_pos + 1): FILENAME;
|
|
|
|
|
|
|
|
print "<!-- -*-html-*- -->" > f
|
|
|
|
print "" > f
|
|
|
|
## print "<!-- " FILENAME " -->" > f
|
|
|
|
print "<!-- " BASE_FILENAME " -->" > f
|
|
|
|
print "<!-- WARNING: Do NOT edit this file. It was converted from -->" > f
|
|
|
|
print "<!-- BibTeX format to HTML by " PROGRAM " version " VERSION " -->" > f
|
|
|
|
## print "<!-- on " DATE " -->" > f
|
|
|
|
## print "<!-- for " PERSONAL_NAME " (" USER "@" HOSTNAME ") -->" > f
|
|
|
|
print "" > f
|
|
|
|
print "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/1998/REC-html40-19980424/loose.dtd\">" > f
|
|
|
|
print "" > f
|
|
|
|
print "" > f
|
|
|
|
print "<HTML>" > f
|
|
|
|
print " <HEAD>" > f
|
|
|
|
print " <META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=iso-8859-1\">"> f
|
|
|
|
print " <TITLE>" > f
|
|
|
|
## print " BibTeX bibliography " FILENAME > f
|
|
|
|
print " BibTeX bibliography " BASE_FILENAME > f
|
|
|
|
print " </TITLE>" > f
|
2020-12-01 14:44:53 +00:00
|
|
|
## print " <LINK REV=\"made\" HREF=\"mailto:" USER "@" HOSTNAME "\">" > f
|
2020-12-01 10:22:17 +00:00
|
|
|
print " <LINK HREF=\"http://www.math.utah.edu/pub/tex/bib/tugbib.css\" TYPE=\"text/css\" REL=\"stylesheet\">" > f
|
|
|
|
print " </HEAD>" > f
|
|
|
|
print "" > f
|
|
|
|
print " <BODY>" > f
|
|
|
|
print " <DIV ALIGN=\"right\">" > f
|
|
|
|
print " <A HREF=\"http://validator.w3.org/check/referer\">" > f
|
|
|
|
print " <IMG ALIGN=\"MIDDLE\" BORDER=\"0\" SRC=\"/images/valid-html40.png\" ALT=\"Valid HTML 4.0!\" HEIGHT=\"31\" WIDTH=\"88\">" > f
|
|
|
|
print " </A>" > f
|
|
|
|
print " <A HREF=\"http://jigsaw.w3.org/css-validator/check/referer\">" > f
|
|
|
|
print " <IMG ALIGN=\"MIDDLE\" BORDER=\"0\" SRC=\"/images/valid-css.gif\" ALT=\"Valid CSS!\" HEIGHT=\"31\" WIDTH=\"88\">" > f
|
|
|
|
print " </A>" > f
|
|
|
|
print " </DIV>" > f
|
|
|
|
print "<PRE>" > f
|
|
|
|
|
|
|
|
clear_array(label_names)
|
|
|
|
clear_array(label_hrefs)
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function check_for_file_change()
|
|
|
|
{
|
|
|
|
if (LASTFILENAME != FILENAME)
|
|
|
|
{
|
|
|
|
if (LASTFILENAME != UNSET_FILENAME)
|
|
|
|
{
|
|
|
|
end_file(LASTFILENAME)
|
|
|
|
|
|
|
|
if (LASTFILENAME != "/dev/stdout")
|
|
|
|
close (output_filename(LASTFILENAME))
|
|
|
|
}
|
|
|
|
|
|
|
|
LASTFILENAME = FILENAME
|
|
|
|
begin_file()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function check_refs( label)
|
|
|
|
{
|
|
|
|
for (label in label_hrefs)
|
|
|
|
{
|
|
|
|
if (!(label in label_names))
|
|
|
|
warning("undefined label " label " at line(s) " label_hrefs[label])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function clear_array(array, key)
|
|
|
|
{
|
|
|
|
for (key in array)
|
|
|
|
delete array[key]
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function end_file(filename, f)
|
|
|
|
{
|
|
|
|
f = output_filename(filename)
|
|
|
|
|
|
|
|
print "</PRE>" > f
|
|
|
|
print " </BODY>" > f
|
|
|
|
print "</HTML>" > f
|
|
|
|
check_refs()
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function do_cite(s, k,n,labels,t)
|
|
|
|
{
|
|
|
|
n = split(substr(s,RSTART + CITE_OFFSET,RLENGTH - 1 - CITE_OFFSET),labels,",")
|
|
|
|
t = substr(s,1,RSTART+CITE_OFFSET-1)
|
|
|
|
|
|
|
|
for (k = 1; k <= n; ++k)
|
|
|
|
{
|
|
|
|
t = t ((k > 1) ? "," : "") "<A HREF=\"#" labels[k] "\">" labels[k] "</A>"
|
|
|
|
add_entry(label_hrefs, labels[k])
|
|
|
|
}
|
|
|
|
|
|
|
|
t = t substr(s,RSTART + RLENGTH - 1)
|
|
|
|
|
|
|
|
return (t)
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function do_line( n,name,s)
|
|
|
|
{
|
|
|
|
s = protect_SGML_characters($0)
|
2024-04-22 15:39:01 +00:00
|
|
|
|
2020-12-01 10:22:17 +00:00
|
|
|
if (match(s,STRING_PATTERN)) # remember name from @String{name = "value"}
|
|
|
|
{
|
|
|
|
name = substr(s,RSTART + STRING_OFFSET,RLENGTH - STRING_OFFSET)
|
|
|
|
string_name[name] = 1
|
|
|
|
# print "DEBUG 1: name =", name >"/dev/stderr"
|
|
|
|
}
|
|
|
|
|
|
|
|
if (match(s,/^%+[ \t]*email[ \t]*=/)) # special handling because BibTeX does not allow @ in comments
|
|
|
|
s = anchor(s,"HREF",BIBTEX_EMAIL_PATTERN,BIBTEX_EMAIL_OFFSET,BIBTEX_EMAIL_PREFIX,\
|
|
|
|
BIBTEX_EMAIL_SAVE_LABEL)
|
|
|
|
else
|
|
|
|
s = anchor(s,"HREF",EMAIL_PATTERN,EMAIL_OFFSET,EMAIL_PREFIX,EMAIL_SAVE_LABEL)
|
|
|
|
|
|
|
|
s = anchor(s,"HREF",URL_PATTERN,URL_OFFSET,URL_PREFIX,URL_SAVE_LABEL)
|
|
|
|
s = anchor(s,"NAME",STRING_PATTERN,STRING_OFFSET,STRING_PREFIX,STRING_SAVE_LABEL)
|
|
|
|
|
|
|
|
if (match(s,CITE_PATTERN))
|
|
|
|
s = do_cite(s)
|
|
|
|
|
|
|
|
if (match(s,ENTRY_PATTERN)) # then have ``@Entry{label,''
|
|
|
|
{
|
|
|
|
n = index(s,"{")
|
|
|
|
name = substr(s,n+1)
|
|
|
|
gsub(/^[ \t]*/,"",name) # trim optional leading space
|
|
|
|
gsub(/,[ \t]*$/,"",name) # trim trailing comma and optional space
|
|
|
|
# print "DEBUG 2: name =", name >"/dev/stderr"
|
|
|
|
s = substr(s,1,n) \
|
|
|
|
"<A NAME=\"" name "\"><STRONG>" name "</STRONG></A>" \
|
|
|
|
substr(s,n+1+length(name))
|
|
|
|
add_entry(label_names, name)
|
|
|
|
}
|
|
|
|
else if (match(s,KEY_EQUALS_NAME_PATTERN)) # then have ``key = name''
|
|
|
|
{
|
|
|
|
name = substr(s,RSTART+RLENGTH-1)
|
|
|
|
sub(/,?[ \t]*$/,"",name) # trim optional trailing comma and space
|
|
|
|
# print "DEBUG 3: name =", name >"/dev/stderr"
|
|
|
|
|
|
|
|
if (name in string_name) # then we have a definition of this name
|
|
|
|
{
|
|
|
|
s = substr(s,1,RSTART+RLENGTH-2) \
|
|
|
|
"<A HREF=\"#" name "\">" name "</A>" substr(s,RSTART+RLENGTH-1+length(name))
|
|
|
|
add_entry(label_hrefs, name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (match(s,CROSSREF_EQUALS_LABEL_PATTERN)) # then have `` crossref = "label"''
|
|
|
|
{
|
|
|
|
name = substr(s,RSTART+RLENGTH)
|
|
|
|
sub(/",?[ \t]*$/,"",name) # trim trailing quote and optional comma and space
|
|
|
|
# print "DEBUG 4: name =", name >"/dev/stderr"
|
|
|
|
s = substr(s,1,RSTART+RLENGTH-1) \
|
|
|
|
"<A HREF=\"#" name "\">" name "</A>" substr(s,RSTART+RLENGTH+length(name))
|
|
|
|
add_entry(label_hrefs, name)
|
|
|
|
}
|
|
|
|
|
|
|
|
check_for_file_change()
|
|
|
|
|
|
|
|
if ( (s ~ "^%") && !IN_BLOCK_COMMENT)
|
|
|
|
{
|
|
|
|
printf("</PRE><PRE CLASS=\"blockcomment\">") > output_filename(FILENAME)
|
|
|
|
IN_BLOCK_COMMENT = 1
|
|
|
|
}
|
|
|
|
else if ( (s !~ "^%") && IN_BLOCK_COMMENT)
|
|
|
|
{
|
|
|
|
printf("</PRE><PRE>") > output_filename(FILENAME)
|
|
|
|
IN_BLOCK_COMMENT = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
print s >output_filename(FILENAME)
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function output_filename(input_filename)
|
|
|
|
{
|
2022-04-09 06:10:39 +00:00
|
|
|
## HAWK - for use in t/h-003.hawk
|
|
|
|
if (length(T_OUT_NAME) > 0) return T_OUT_NAME;
|
|
|
|
## END HAWK
|
2020-12-01 10:22:17 +00:00
|
|
|
|
|
|
|
if (input_filename != _last_input_filename)
|
|
|
|
{ # optimization: we cache last function result for speed
|
|
|
|
_last_input_filename = input_filename
|
|
|
|
sub(/.[^.]*$/,"",input_filename)
|
|
|
|
|
|
|
|
if ((input_filename == "") || (input_filename == "/dev/stdin"))
|
|
|
|
_last_output_filename = "/dev/stdout"
|
|
|
|
else
|
|
|
|
_last_output_filename = PREFIX input_filename SUFFIX
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return (_last_output_filename)
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function protect_SGML_characters(s)
|
|
|
|
{
|
|
|
|
gsub(/&/,"\\&",s) # NB: this one MUST be first
|
|
|
|
gsub(/</,"\\<",s)
|
|
|
|
gsub(/>/,"\\>",s)
|
|
|
|
|
2024-04-22 15:39:01 +00:00
|
|
|
## [24-May-2016] with the change from HTML 3.2 to 4.0, we can use " again!
|
2020-12-01 10:22:17 +00:00
|
|
|
## gsub(/\"/,"\\"",s) # this was " in earlier HTML
|
|
|
|
# versions, including the HTML 3.2
|
|
|
|
# draft, but was stupidly eliminated in
|
|
|
|
# the final HTML 3.2 version: see
|
|
|
|
# http://www.w3.org/pub/WWW/MarkUp/Wilbur/
|
|
|
|
# in the section ``What happened to "?''
|
|
|
|
gsub(/\"/,"\\"",s)
|
|
|
|
|
|
|
|
return (s)
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
function warning(message)
|
|
|
|
{
|
|
|
|
# print FILENAME ":" FNR ":%%" message >"/dev/stderr"
|
|
|
|
#
|
|
|
|
# The only place that we need warning messages above is
|
|
|
|
# checkrefs(), after the current file has been closed, and a new
|
|
|
|
# one started, so we need LASTFILENAME instead of FILENAME here,
|
|
|
|
# and we omit the FNR, since we have no record of it for
|
|
|
|
# LASTFILENAME.
|
|
|
|
print LASTFILENAME ":%%" message >"/dev/stderr"
|
|
|
|
}
|