994 lines
		
	
	
		
			26 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			994 lines
		
	
	
		
			26 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
# http://www.netlib.org/bibnet/tools/software/journal-toc.awk
 | 
						|
#
 | 
						|
### ====================================================================
 | 
						|
###  @Awk-file{
 | 
						|
###     author          = "Nelson H. F. Beebe",
 | 
						|
###     version         = "1.00",
 | 
						|
###     date            = "09 October 1996",
 | 
						|
###     time            = "15:57:06 MDT",
 | 
						|
###     filename        = "journal-toc.awk",
 | 
						|
###     address         = "Center for Scientific Computing
 | 
						|
###                        Department of Mathematics
 | 
						|
###                        University of Utah
 | 
						|
###                        Salt Lake City, UT 84112
 | 
						|
###                        USA",
 | 
						|
###     telephone       = "+1 801 581 5254",
 | 
						|
###     FAX             = "+1 801 581 4148",
 | 
						|
###     URL             = "http://www.math.utah.edu/~beebe",
 | 
						|
###     checksum        = "25092 977 3357 26493",
 | 
						|
###     email           = "beebe@math.utah.edu (Internet)",
 | 
						|
###     codetable       = "ISO/ASCII",
 | 
						|
###     keywords        = "BibTeX, bibliography, HTML, journal table of
 | 
						|
###                        contents",
 | 
						|
###     supported       = "yes",
 | 
						|
###     docstring       = "Create a journal cover table of contents from
 | 
						|
###                        <at>Article{...} entries in a journal BibTeX
 | 
						|
###                        .bib file for checking the bibliography
 | 
						|
###                        database against the actual journal covers.
 | 
						|
###                        The output can be either plain text, or HTML.
 | 
						|
###
 | 
						|
###                        Usage:
 | 
						|
###                            bibclean -max-width 0 BibTeX-file(s) | \
 | 
						|
###                                bibsort -byvolume | \
 | 
						|
###                                awk -f journal-toc.awk \
 | 
						|
###                                    [-v HTML=nnn] [-v INDENT=nnn] \
 | 
						|
###                                    [-v BIBFILEURL=url] >foo.toc
 | 
						|
###
 | 
						|
###                            or if the bibliography is already sorted
 | 
						|
###                            by volume,
 | 
						|
###
 | 
						|
###                            bibclean -max-width 0 BibTeX-file(s) | \
 | 
						|
###                                awk -f journal-toc.awk \
 | 
						|
###                                    [-v HTML=nnn] [-v INDENT=nnn] \
 | 
						|
###                                    [-v BIBFILEURL=url] >foo.toc
 | 
						|
###
 | 
						|
###                        A non-zero value of the command-line option,
 | 
						|
###                        HTML=nnn, results in HTML output instead of
 | 
						|
###                        the default plain ASCII text (corresponding
 | 
						|
###                        to HTML=0).  The
 | 
						|
###
 | 
						|
###                        The INDENT=nnn command-line option specifies
 | 
						|
###                        the number of blanks to indent each logical
 | 
						|
###                        level of HTML.  The default is INDENT=4.
 | 
						|
###                        INDENT=0 suppresses indentation.  The INDENT
 | 
						|
###                        option has no effect when the default HTML=0
 | 
						|
###                        (plain text output) option is in effect.
 | 
						|
###
 | 
						|
###                        When HTML output is selected, the
 | 
						|
###                        BIBFILEURL=url command-line option provides a
 | 
						|
###                        way to request hypertext links from table of
 | 
						|
###                        contents page numbers to the complete BibTeX
 | 
						|
###                        entry for the article.  These links are
 | 
						|
###                        created by appending a sharp (#) and the
 | 
						|
###                        citation label to the BIBFILEURL value, which
 | 
						|
###                        conforms with the practice of
 | 
						|
###                        bibtex-to-html.awk.
 | 
						|
###
 | 
						|
###                        The HTML output form may be useful as a more
 | 
						|
###                        compact representation of journal article
 | 
						|
###                        bibliography data than the original BibTeX
 | 
						|
###                        file provides.  Of course, the
 | 
						|
###                        table-of-contents format provides less
 | 
						|
###                        information, and is considerably more
 | 
						|
###                        troublesome for a computer program to parse.
 | 
						|
###
 | 
						|
###                        When URL key values are provided, they will
 | 
						|
###                        be used to create hypertext links around
 | 
						|
###                        article titles.  This supports journals that
 | 
						|
###                        provide article contents on the World-Wide
 | 
						|
###                        Web.
 | 
						|
###
 | 
						|
###                        For parsing simplicity, this program requires
 | 
						|
###                        that BibTeX
 | 
						|
###
 | 
						|
###                            key = "value"
 | 
						|
###
 | 
						|
###                        and
 | 
						|
###
 | 
						|
###                            @String{name = "value"}
 | 
						|
###
 | 
						|
###                        specifications be entirely contained on
 | 
						|
###                        single lines, which is readily provided by
 | 
						|
###                        the `bibclean -max-width 0' filter.  It also
 | 
						|
###                        requires that bibliography entries begin and
 | 
						|
###                        end at the start of a line, and that
 | 
						|
###                        quotation marks, rather than balanced braces,
 | 
						|
###                        delimit string values.  This is a
 | 
						|
###                        conventional format that again can be
 | 
						|
###                        guaranteed by bibclean.
 | 
						|
###
 | 
						|
###                        This program requires `new' awk, as described
 | 
						|
###                        in the book
 | 
						|
###
 | 
						|
###                            Alfred V. Aho, Brian W. Kernighan, and
 | 
						|
###                            Peter J. Weinberger,
 | 
						|
###                            ``The AWK Programming Language'',
 | 
						|
###                            Addison-Wesley (1988), ISBN
 | 
						|
###                            0-201-07981-X,
 | 
						|
###
 | 
						|
###                        such as provided by programs named (GNU)
 | 
						|
###                        gawk, nawk, and recent AT&T awk.
 | 
						|
###
 | 
						|
###                        The checksum field above contains a CRC-16
 | 
						|
###                        checksum as the first value, followed by the
 | 
						|
###                        equivalent of the standard UNIX wc (word
 | 
						|
###                        count) utility output of lines, words, and
 | 
						|
###                        characters.  This is produced by Robert
 | 
						|
###                        Solovay's checksum utility.",
 | 
						|
###  }
 | 
						|
### ====================================================================
 | 
						|
 | 
						|
BEGIN						{ initialize() }
 | 
						|
 | 
						|
/^ *@ *[Ss][Tt][Rr][Ii][Nn][Gg] *\{/		{ do_String(); next }
 | 
						|
 | 
						|
/^ *@ *[Pp][Rr][Ee][Aa][Mm][Bb][Ll][Ee]/	{ next }
 | 
						|
 | 
						|
/^ *@ *[Aa][Rr][Tt][Ii][Cc][Ll][Ee]/		{ do_Article(); next }
 | 
						|
 | 
						|
/^ *@/						{ do_Other(); next }
 | 
						|
 | 
						|
/^ *author *= *\"/ 				{ do_author(); next }
 | 
						|
 | 
						|
/^ *journal *= */				{ do_journal(); next }
 | 
						|
 | 
						|
/^ *volume *= *\"/				{ do_volume(); next }
 | 
						|
 | 
						|
/^ *number *= *\"/				{ do_number(); next }
 | 
						|
 | 
						|
/^ *year *= *\"/				{ do_year(); next }
 | 
						|
 | 
						|
/^ *month *= */					{ do_month(); next }
 | 
						|
 | 
						|
/^ *title *= *\"/				{ do_title(); next }
 | 
						|
 | 
						|
/^ *pages *= *\"/				{ do_pages(); next }
 | 
						|
 | 
						|
/^ *URL *= *\"/					{ do_URL(); next }
 | 
						|
 | 
						|
/^ *} *$/					{ if (In_Article) do_end_entry(); next }
 | 
						|
 | 
						|
END						{ terminate() }
 | 
						|
 | 
						|
 | 
						|
########################################################################
 | 
						|
# NB: The programming conventions for variables in this program are:   #
 | 
						|
#	UPPERCASE		global constants and user options      #
 | 
						|
#	Initialuppercase	global variables                       #
 | 
						|
#	lowercase		local variables                        #
 | 
						|
# Any deviation is an error!                                           #
 | 
						|
########################################################################
 | 
						|
 | 
						|
 | 
						|
function do_Article()
 | 
						|
{
 | 
						|
	In_Article = 1
 | 
						|
 | 
						|
	Citation_label = $0
 | 
						|
	sub(/^[^\{]*\{/,"",Citation_label)
 | 
						|
	sub(/ *, *$/,"",Citation_label)
 | 
						|
 | 
						|
	Author = ""
 | 
						|
        Title = ""
 | 
						|
        Journal = ""
 | 
						|
        Volume = ""
 | 
						|
        Number = ""
 | 
						|
        Month = ""
 | 
						|
        Year = ""
 | 
						|
        Pages = ""
 | 
						|
        Url = ""
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function do_author()
 | 
						|
{
 | 
						|
	Author = TeX_to_HTML(get_value($0))
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function do_end_entry( k,n,parts)
 | 
						|
{
 | 
						|
	n = split(Author,parts," and ")
 | 
						|
	if (Last_number != Number)
 | 
						|
		do_new_issue()
 | 
						|
	for (k = 1; k < n; ++k)
 | 
						|
		print_toc_line(parts[k] " and", "", "")
 | 
						|
	Title_prefix = html_begin_title()
 | 
						|
	Title_suffix = html_end_title()
 | 
						|
	if (html_length(Title) <= (MAX_TITLE_CHARS + MIN_LEADERS)) # complete title fits on line
 | 
						|
		print_toc_line(parts[n], Title, html_begin_pages() Pages html_end_pages())
 | 
						|
	else			# need to split long title over multiple lines
 | 
						|
		do_long_title(parts[n], Title, html_begin_pages() Pages html_end_pages())
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function do_journal()
 | 
						|
{
 | 
						|
	if ($0 ~ /[=] *"/)	# have journal = "quoted journal name",
 | 
						|
		Journal = get_value($0)
 | 
						|
	else			# have journal = journal-abbreviation,
 | 
						|
	{
 | 
						|
        	Journal = get_abbrev($0)
 | 
						|
		if (Journal in String) # replace abbrev by its expansion
 | 
						|
			Journal = String[Journal]
 | 
						|
	}
 | 
						|
	gsub(/\\-/,"",Journal)	# remove discretionary hyphens
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function do_long_title(author,title,pages, last_title,n)
 | 
						|
{
 | 
						|
	title = trim(title)			# discard leading and trailing space
 | 
						|
	while (length(title) > 0)
 | 
						|
	{
 | 
						|
		n = html_breakpoint(title,MAX_TITLE_CHARS+MIN_LEADERS)
 | 
						|
		last_title = substr(title,1,n)
 | 
						|
		title = substr(title,n+1)
 | 
						|
		sub(/^ +/,"",title)		# discard any leading space
 | 
						|
		print_toc_line(author, last_title, (length(title) == 0) ? pages : "")
 | 
						|
		author = ""
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function do_month( k,n,parts)
 | 
						|
{
 | 
						|
	Month = ($0 ~ /[=] *"/) ? get_value($0) : get_abbrev($0)
 | 
						|
	gsub(/[\"]/,"",Month)
 | 
						|
	gsub(/ *# *\\slash *# */," / ",Month)
 | 
						|
	gsub(/ *# *-+ *# */," / ",Month)
 | 
						|
	n = split(Month,parts," */ *")
 | 
						|
	Month = ""
 | 
						|
	for (k = 1; k <= n; ++k)
 | 
						|
		Month = Month ((k > 1) ? " / " : "") \
 | 
						|
			((parts[k] in Month_expansion) ? Month_expansion[parts[k]] : parts[k])
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function do_new_issue()
 | 
						|
{
 | 
						|
	Last_number = Number
 | 
						|
	if (HTML)
 | 
						|
	{
 | 
						|
		if (Last_volume != Volume)
 | 
						|
		{
 | 
						|
			Last_volume = Volume
 | 
						|
			print_line(prefix(2) "<BR>")
 | 
						|
		}
 | 
						|
		html_end_toc()
 | 
						|
		html_begin_issue()
 | 
						|
		print_line(prefix(2) Journal "<BR>")
 | 
						|
	}
 | 
						|
	else
 | 
						|
	{
 | 
						|
		print_line("")
 | 
						|
		print_line(Journal)
 | 
						|
	}
 | 
						|
 | 
						|
	print_line(strip_html(vol_no_month_year()))
 | 
						|
 | 
						|
	if (HTML)
 | 
						|
	{
 | 
						|
		html_end_issue()
 | 
						|
		html_toc_entry()
 | 
						|
		html_begin_toc()
 | 
						|
	}
 | 
						|
	else
 | 
						|
		print_line("")
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function do_number()
 | 
						|
{
 | 
						|
	Number = get_value($0)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function do_Other()
 | 
						|
{
 | 
						|
	In_Article = 0
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function do_pages()
 | 
						|
{
 | 
						|
	Pages = get_value($0)
 | 
						|
	sub(/--[?][?]/,"",Pages)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function do_String()
 | 
						|
{
 | 
						|
	sub(/^[^\{]*\{/,"",$0)	# discard up to and including open brace
 | 
						|
	sub(/\} *$/,"",$0)	# discard from optional whitespace and trailing brace to end of line
 | 
						|
	String[get_key($0)] = get_value($0)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function do_title()
 | 
						|
{
 | 
						|
	Title = TeX_to_HTML(get_value($0))
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function do_URL( parts)
 | 
						|
{
 | 
						|
	Url = get_value($0)
 | 
						|
	split(Url,parts,"[,;]")			# in case we have multiple URLs
 | 
						|
	Url = trim(parts[1])
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function do_volume()
 | 
						|
{
 | 
						|
	Volume = get_value($0)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function do_year()
 | 
						|
{
 | 
						|
	Year = get_value($0)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function get_abbrev(s)
 | 
						|
{	# return abbrev from ``key = abbrev,''
 | 
						|
	sub(/^[^=]*= */,"",s)	# discard text up to start of non-blank value
 | 
						|
	sub(/ *,? *$/,"",s)	# discard trailing optional whitspace, quote,
 | 
						|
				# optional comma, and optional space
 | 
						|
	return (s)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function get_key(s)
 | 
						|
{	# return kay from ``key = "value",''
 | 
						|
	sub(/^ */,"",s)		# discard leading space
 | 
						|
	sub(/ *=.*$/,"",s)	# discard everthing after key
 | 
						|
 | 
						|
	return (s)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function get_value(s)
 | 
						|
{	# return value from ``key = "value",''
 | 
						|
	sub(/^[^\"]*\" */,"",s)	# discard text up to start of non-blank value
 | 
						|
	sub(/ *\",? *$/,"",s)	# discard trailing optional whitspace, quote,
 | 
						|
				# optional comma, and optional space
 | 
						|
	return (s)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function html_accents(s)
 | 
						|
{
 | 
						|
	if (index(s,"\\") > 0)			# important optimization
 | 
						|
	{
 | 
						|
		# Convert common lower-case accented letters according to the
 | 
						|
		# table on p. 169 of in Peter Flynn's ``The World Wide Web
 | 
						|
		# Handbook'', International Thomson Computer Press, 1995, ISBN
 | 
						|
		# 1-85032-205-8.  The official table of ISO Latin 1 SGML
 | 
						|
		# entities used in HTML can be found in the file
 | 
						|
		# /usr/local/lib/html-check/lib/ISOlat1.sgml (your path
 | 
						|
		# may differ).
 | 
						|
 | 
						|
		gsub(/\{\\\a}/,	"\\à",	s)
 | 
						|
		gsub(/\{\\'a}/,	"\\á",	s)
 | 
						|
		gsub(/\{\\[\^]a}/,"\\â",	s)
 | 
						|
		gsub(/\{\\~a}/,	"\\ã",	s)
 | 
						|
		##gsub(/\{\\\"a\}/,	"\\ä",	s)
 | 
						|
		gsub(/\{\\\"\{a\}\}/,	"\\ä",	s)
 | 
						|
		gsub(/\{\\aa}/,	"\\å",	s)
 | 
						|
		gsub(/\{\\ae}/,	"\\æ",	s)
 | 
						|
 | 
						|
		gsub(/\{\\c\{c\}}/,"\\ç",	s)
 | 
						|
 | 
						|
		gsub(/\{\\\e}/,	"\\è",	s)
 | 
						|
		gsub(/\{\\'e}/,	"\\é",	s)
 | 
						|
		gsub(/\{\\[\^]e}/,"\\ê",	s)
 | 
						|
		gsub(/\{\\\"e}/,	"\\ë",	s)
 | 
						|
 | 
						|
		gsub(/\{\\\i}/,	"\\ì",	s)
 | 
						|
		gsub(/\{\\'i}/,	"\\í",	s)
 | 
						|
		gsub(/\{\\[\^]i}/,"\\î",	s)
 | 
						|
		gsub(/\{\\\"i}/,	"\\ï",	s)
 | 
						|
 | 
						|
		# ignore eth and thorn
 | 
						|
 | 
						|
		gsub(/\{\\~n}/,	"\\ñ",	s)
 | 
						|
 | 
						|
		gsub(/\{\\\o}/,	"\\ò",	s)
 | 
						|
		gsub(/\{\\'o}/,	"\\ó",	s)
 | 
						|
		gsub(/\{\\[\^]o}/, "\\ô",	s)
 | 
						|
		gsub(/\{\\~o}/,	"\\õ",	s)
 | 
						|
		##gsub(/\{\\\"o}/,	"\\ö",	s)
 | 
						|
		gsub(/\{\\\"\{o\}}/,	"\\ö",	s)
 | 
						|
		gsub(/\{\\o}/,	"\\ø",	s)
 | 
						|
 | 
						|
		gsub(/\{\\\u}/,	"\\ù",	s)
 | 
						|
		gsub(/\{\\'u}/,	"\\ú",	s)
 | 
						|
		gsub(/\{\\[\^]u}/,"\\û",	s)
 | 
						|
		##gsub(/\{\\\"u}/,	"\\ü",	s)
 | 
						|
		gsub(/\{\\\"\{u\}\}/,	"\\ü",	s)
 | 
						|
 | 
						|
		gsub(/\{\\'y}/,	"\\ý",	s)
 | 
						|
		gsub(/\{\\\"y}/,	"\\ÿ",	s)
 | 
						|
 | 
						|
		# Now do the same for upper-case accents
 | 
						|
 | 
						|
		gsub(/\{\\\A}/,	"\\À",	s)
 | 
						|
		gsub(/\{\\'A}/,	"\\Á",	s)
 | 
						|
		gsub(/\{\\[\^]A}/,	"\\Â",	s)
 | 
						|
		gsub(/\{\\~A}/,	"\\Ã",	s)
 | 
						|
		##gsub(/\{\\\"A}/,	"\\Ä",	s)
 | 
						|
		gsub(/\{\\\"\{A\}\}/,	"\\Ä",	s)
 | 
						|
		gsub(/\{\\AA}/,	"\\Å",	s)
 | 
						|
		gsub(/\{\\AE}/,	"\\Æ",	s)
 | 
						|
 | 
						|
		gsub(/\{\\c\{C\}}/,"\\Ç",	s)
 | 
						|
 | 
						|
		gsub(/\{\\\e}/,	"\\È",	s)
 | 
						|
		gsub(/\{\\'E}/,	"\\É",	s)
 | 
						|
		gsub(/\{\\[\^]E}/,	"\\Ê",	s)
 | 
						|
		gsub(/\{\\\"E}/,	"\\Ë",	s)
 | 
						|
 | 
						|
		gsub(/\{\\\I}/,	"\\Ì",	s)
 | 
						|
		gsub(/\{\\'I}/,	"\\Í",	s)
 | 
						|
		gsub(/\{\\[\^]I}/,	"\\Î",	s)
 | 
						|
		gsub(/\{\\\"I}/,	"\\Ï",	s)
 | 
						|
 | 
						|
		# ignore eth and thorn
 | 
						|
 | 
						|
		gsub(/\{\\~N}/,	"\\Ñ",	s)
 | 
						|
 | 
						|
		gsub(/\{\\\O}/,	"\\Ò",	s)
 | 
						|
		gsub(/\{\\'O}/,	"\\Ó",	s)
 | 
						|
		gsub(/\{\\[\^]O}/,	"\\Ô",	s)
 | 
						|
		gsub(/\{\\~O}/,	"\\Õ",	s)
 | 
						|
		##gsub(/\{\\\"O}/,	"\\Ö",	s)
 | 
						|
		gsub(/\{\\\"\{O\}\}/,	"\\Ö",	s)
 | 
						|
		gsub(/\{\\O}/,	"\\Ø",	s)
 | 
						|
 | 
						|
		gsub(/\{\\\U}/,	"\\Ù",	s)
 | 
						|
		gsub(/\{\\'U}/,	"\\Ú",	s)
 | 
						|
		gsub(/\{\\[\^]U}/,	"\\Û",	s)
 | 
						|
		##gsub(/\{\\\"U}/,	"\\Ü",	s)
 | 
						|
		gsub(/\{\\\"\{U\}\}/,	"\\Ü",	s)
 | 
						|
 | 
						|
		gsub(/\{\\'Y}/,	"\\Ý",	s)
 | 
						|
 | 
						|
		gsub(/\{\\ss}/,	"\\ß",	s)
 | 
						|
 | 
						|
		# Others not mentioned in Flynn's book
 | 
						|
		gsub(/\{\\'\\i}/,"\\í",	s)
 | 
						|
		gsub(/\{\\'\\j}/,"j",		s)
 | 
						|
	}
 | 
						|
	return (s)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function html_begin_issue()
 | 
						|
{
 | 
						|
	print_line("")
 | 
						|
	print_line(prefix(2) "<HR>")
 | 
						|
	print_line("")
 | 
						|
	print_line(prefix(2) "<H1>")
 | 
						|
	print_line(prefix(3) "<A NAME=\"" html_label() "\">")
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function html_begin_pages()
 | 
						|
{
 | 
						|
	return ((HTML && (BIBFILEURL != "")) ? ("<A HREF=\"" BIBFILEURL "#" Citation_label "\">") : "")
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function html_begin_pre()
 | 
						|
{
 | 
						|
	In_PRE = 1
 | 
						|
	print_line("<PRE>")
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function html_begin_title()
 | 
						|
{
 | 
						|
	return ((HTML && (Url != "")) ? ("<A HREF=\"" Url "\">") : "")
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function html_begin_toc()
 | 
						|
{
 | 
						|
	html_end_toc()
 | 
						|
	html_begin_pre()
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function html_body( k)
 | 
						|
{
 | 
						|
	for (k = 1; k <= BodyLines; ++k)
 | 
						|
		print Body[k]
 | 
						|
}
 | 
						|
 | 
						|
function html_breakpoint(title,maxlength, break_after,k)
 | 
						|
{
 | 
						|
	# Return the largest character position in title AFTER which we
 | 
						|
	# can break the title across lines, without exceeding maxlength
 | 
						|
	# visible characters.
 | 
						|
	if (html_length(title) > maxlength)	# then need to split title across lines
 | 
						|
	{
 | 
						|
		# In the presence of HTML markup, the initialization of
 | 
						|
		# k here is complicated, because we need to advance it
 | 
						|
		# until html_length(title) is at least maxlength,
 | 
						|
		# without invoking the expensive html_length() function
 | 
						|
		# too frequently.  The need to split the title makes the
 | 
						|
		# alternative of delayed insertion of HTML markup much
 | 
						|
		# more complicated.
 | 
						|
		break_after = 0
 | 
						|
		for (k = min(maxlength,length(title)); k < length(title); ++k)
 | 
						|
		{
 | 
						|
			if (substr(title,k+1,1) == " ")
 | 
						|
			{		# could break after position k
 | 
						|
				if (html_length(substr(title,1,k)) <= maxlength)
 | 
						|
					break_after = k
 | 
						|
				else	# advanced too far, retreat back to last break_after
 | 
						|
					break
 | 
						|
			}
 | 
						|
		}
 | 
						|
		if (break_after == 0)		# no breakpoint found by forward scan
 | 
						|
		{				# so switch to backward scan
 | 
						|
			for (k = min(maxlength,length(title)) - 1; \
 | 
						|
				(k > 0) && (substr(title,k+1,1) != " "); --k)
 | 
						|
				;		# find space at which to break title
 | 
						|
			if (k < 1)		# no break point found
 | 
						|
				k = length(title) # so must print entire string
 | 
						|
		}
 | 
						|
		else
 | 
						|
			k = break_after
 | 
						|
	}
 | 
						|
	else					# title fits on one line
 | 
						|
		k = length(title)
 | 
						|
	return (k)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
function html_end_issue()
 | 
						|
{
 | 
						|
	print_line(prefix(3) "</A>")
 | 
						|
	print_line(prefix(2) "</H1>")
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function html_end_pages()
 | 
						|
{
 | 
						|
	return ((HTML && (BIBFILEURL != "")) ? "</A>" : "")
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function html_end_pre()
 | 
						|
{
 | 
						|
	if (In_PRE)
 | 
						|
	{
 | 
						|
		print_line("</PRE>")
 | 
						|
		In_PRE = 0
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function html_end_title()
 | 
						|
{
 | 
						|
	return ((HTML && (Url != "")) ? "</A>" : "")
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function html_end_toc()
 | 
						|
{
 | 
						|
	html_end_pre()
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function html_fonts(s, arg,control_word,k,level,n,open_brace)
 | 
						|
{
 | 
						|
	open_brace = index(s,"{")
 | 
						|
	if (open_brace > 0)			# important optimization
 | 
						|
	{
 | 
						|
		level = 1
 | 
						|
		for (k = open_brace + 1; (level != 0) && (k <= length(s)); ++k)
 | 
						|
		{
 | 
						|
			if (substr(s,k,1) == "{")
 | 
						|
				level++
 | 
						|
			else if (substr(s,k,1) == "}")
 | 
						|
				level--
 | 
						|
		}
 | 
						|
 | 
						|
		# {...} is now found at open_brace ... (k-1)
 | 
						|
		for (control_word in Font_decl_map)	# look for {\xxx ...}
 | 
						|
		{
 | 
						|
			if (substr(s,open_brace+1,length(control_word)+1) ~ \
 | 
						|
				("\\" control_word "[^A-Za-z]"))
 | 
						|
			{
 | 
						|
				n = open_brace + 1 + length(control_word)
 | 
						|
				arg = trim(substr(s,n,k - n))
 | 
						|
				if (Font_decl_map[control_word] == "toupper") # arg -> ARG
 | 
						|
					arg = toupper(arg)
 | 
						|
				else if (Font_decl_map[control_word] != "") # arg -> <TAG>arg</TAG>
 | 
						|
					arg = "<" Font_decl_map[control_word] ">" arg "</" Font_decl_map[control_word] ">"
 | 
						|
				return (substr(s,1,open_brace-1) arg html_fonts(substr(s,k)))
 | 
						|
			}
 | 
						|
		}
 | 
						|
		for (control_word in Font_cmd_map)	# look for \xxx{...}
 | 
						|
		{
 | 
						|
			if (substr(s,open_brace - length(control_word),length(control_word)) ~ \
 | 
						|
				("\\" control_word))
 | 
						|
			{
 | 
						|
				n = open_brace + 1
 | 
						|
				arg = trim(substr(s,n,k - n))
 | 
						|
				if (Font_cmd_map[control_word] == "toupper") # arg -> ARG
 | 
						|
					arg = toupper(arg)
 | 
						|
				else if (Font_cmd_map[control_word] != "") # arg -> <TAG>arg</TAG>
 | 
						|
					arg = "<" Font_cmd_map[control_word] ">" arg "</" Font_cmd_map[control_word] ">"
 | 
						|
				n = open_brace - length(control_word) - 1
 | 
						|
				return (substr(s,1,n) arg html_fonts(substr(s,k)))
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return (s)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function html_header()
 | 
						|
{
 | 
						|
	USER = ENVIRON["USER"]
 | 
						|
	if (USER == "")
 | 
						|
	    USER = ENVIRON["LOGNAME"]
 | 
						|
	if (USER == "")
 | 
						|
	    USER = "????"
 | 
						|
	"hostname" | getline HOSTNAME
 | 
						|
	"date" | getline DATE
 | 
						|
	##("ypcat passwd | grep '^" USER ":' | awk -F: '{print $5}'") | getline PERSONAL_NAME
 | 
						|
	("getent passwd " USER " | awk -F: '{print $5}'") | getline PERSONAL_NAME
 | 
						|
 | 
						|
	if (PERSONAL_NAME == "")
 | 
						|
	    ("grep  '^" USER ":' /etc/passwd | awk -F: '{print $5}'") | getline PERSONAL_NAME
 | 
						|
 | 
						|
 | 
						|
	print "<!-- WARNING: Do NOT edit this file.  It was converted from -->"
 | 
						|
	print "<!-- BibTeX format to HTML by journal-toc.awk version " VERSION_NUMBER " " VERSION_DATE " -->"
 | 
						|
	##print "<!-- on " DATE " -->"
 | 
						|
	##print "<!-- for " PERSONAL_NAME " (" USER "@" HOSTNAME ") -->"
 | 
						|
	print ""
 | 
						|
	print ""
 | 
						|
	print "<!DOCTYPE HTML public \"-//IETF//DTD HTML//EN\">"
 | 
						|
	print ""
 | 
						|
	print "<HTML>"
 | 
						|
	print prefix(1) "<HEAD>"
 | 
						|
	print prefix(2) "<TITLE>"
 | 
						|
	print prefix(3)  Journal
 | 
						|
	print prefix(2) "</TITLE>"
 | 
						|
	##print prefix(2) "<LINK REV=\"made\" HREF=\"mailto:" USER "@" HOSTNAME "\">"
 | 
						|
	print prefix(1) "</HEAD>"
 | 
						|
	print ""
 | 
						|
	print prefix(1) "<BODY>"
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function html_label( label)
 | 
						|
{
 | 
						|
	label = Volume "(" Number "):" Month ":" Year
 | 
						|
	gsub(/[^A-Za-z0-9():,;.\/\-]/,"",label)
 | 
						|
	return (label)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function html_length(s)
 | 
						|
{	# Return visible length of s, ignoring any HTML markup
 | 
						|
	if (HTML)
 | 
						|
	{
 | 
						|
		gsub(/<\/?[^>]*>/,"",s)		# remove SGML tags
 | 
						|
		gsub(/&[A-Za-z0-9]+;/,"",s)	# remove SGML entities
 | 
						|
	}
 | 
						|
	return (length(s))
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function html_toc()
 | 
						|
{
 | 
						|
	print prefix(2) "<H1>"
 | 
						|
	print prefix(3) "Table of contents for issues of " Journal
 | 
						|
	print prefix(2) "</H1>"
 | 
						|
	print HTML_TOC
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function html_toc_entry()
 | 
						|
{
 | 
						|
	HTML_TOC = HTML_TOC "        <A HREF=\"#" html_label() "\">"
 | 
						|
	HTML_TOC = HTML_TOC vol_no_month_year()
 | 
						|
	HTML_TOC = HTML_TOC "</A><BR>" "\n"
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function html_trailer()
 | 
						|
{
 | 
						|
	html_end_pre()
 | 
						|
	print prefix(1) "</BODY>"
 | 
						|
	print "</HTML>"
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function initialize()
 | 
						|
{
 | 
						|
	# NB: Update these when the program changes
 | 
						|
	VERSION_DATE = "[09-Oct-1996]"
 | 
						|
	VERSION_NUMBER = "1.00"
 | 
						|
 | 
						|
	HTML = (HTML == "") ? 0 : (0 + HTML)
 | 
						|
 | 
						|
	if (INDENT == "")
 | 
						|
		INDENT = 4
 | 
						|
 | 
						|
	if (HTML == 0)
 | 
						|
		INDENT = 0	# indentation suppressed in ASCII mode
 | 
						|
 | 
						|
	LEADERS = " . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ."
 | 
						|
 | 
						|
	MAX_TITLE_CHARS = 36	# 36 produces a 79-char output line when there is
 | 
						|
				# just an initial page number.  If this is
 | 
						|
				# increased, the LEADERS string may need to be
 | 
						|
				# lengthened.
 | 
						|
 | 
						|
	MIN_LEADERS = 4		# Minimum number of characters from LEADERS
 | 
						|
				# required when leaders are used.  The total
 | 
						|
				# number of characters that can appear in a
 | 
						|
				# title line is MAX_TITLE_CHARS + MIN_LEADERS.
 | 
						|
				# Leaders are omitted when the title length is
 | 
						|
				# between MAX_TITLE_CHARS and this sum.
 | 
						|
 | 
						|
	MIN_LEADERS_SPACE = "        "	# must be at least MIN_LEADERS characters long
 | 
						|
 | 
						|
	Month_expansion["jan"]	= "January"
 | 
						|
	Month_expansion["feb"]	= "February"
 | 
						|
	Month_expansion["mar"]	= "March"
 | 
						|
	Month_expansion["apr"]	= "April"
 | 
						|
	Month_expansion["may"]	= "May"
 | 
						|
	Month_expansion["jun"]	= "June"
 | 
						|
	Month_expansion["jul"]	= "July"
 | 
						|
	Month_expansion["aug"]	= "August"
 | 
						|
	Month_expansion["sep"]	= "September"
 | 
						|
	Month_expansion["oct"]	= "October"
 | 
						|
	Month_expansion["nov"]	= "November"
 | 
						|
	Month_expansion["dec"]	= "December"
 | 
						|
 | 
						|
	Font_cmd_map["\\emph"]		= "EM"
 | 
						|
	Font_cmd_map["\\textbf"]	= "B"
 | 
						|
	Font_cmd_map["\\textit"]	= "I"
 | 
						|
	Font_cmd_map["\\textmd"]	= ""
 | 
						|
	Font_cmd_map["\\textrm"]	= ""
 | 
						|
	Font_cmd_map["\\textsc"]	= "toupper"
 | 
						|
	Font_cmd_map["\\textsl"]	= "I"
 | 
						|
	Font_cmd_map["\\texttt"]	= "t"
 | 
						|
	Font_cmd_map["\\textup"]	= ""
 | 
						|
 | 
						|
	Font_decl_map["\\bf"]		= "B"
 | 
						|
	Font_decl_map["\\em"]		= "EM"
 | 
						|
	Font_decl_map["\\it"]		= "I"
 | 
						|
	Font_decl_map["\\rm"]		= ""
 | 
						|
	Font_decl_map["\\sc"]		= "toupper"
 | 
						|
	Font_decl_map["\\sf"]		= ""
 | 
						|
	Font_decl_map["\\tt"]		= "TT"
 | 
						|
	Font_decl_map["\\itshape"]	= "I"
 | 
						|
	Font_decl_map["\\upshape"]	= ""
 | 
						|
	Font_decl_map["\\slshape"]	= "I"
 | 
						|
	Font_decl_map["\\scshape"]	= "toupper"
 | 
						|
	Font_decl_map["\\mdseries"]	= ""
 | 
						|
	Font_decl_map["\\bfseries"]	= "B"
 | 
						|
	Font_decl_map["\\rmfamily"]	= ""
 | 
						|
	Font_decl_map["\\sffamily"]	= ""
 | 
						|
	Font_decl_map["\\ttfamily"]	= "TT"
 | 
						|
}
 | 
						|
 | 
						|
function min(a,b)
 | 
						|
{
 | 
						|
	return (a < b) ? a : b
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function prefix(level)
 | 
						|
{
 | 
						|
	# Return a prefix of up to 60 blanks
 | 
						|
 | 
						|
	if (In_PRE)
 | 
						|
		return ("")
 | 
						|
	else
 | 
						|
		return (substr("                                                            ", \
 | 
						|
			1, INDENT * level))
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function print_line(line)
 | 
						|
{
 | 
						|
	if (HTML)		# must buffer in memory so that we can accumulate TOC
 | 
						|
		Body[++BodyLines] = line
 | 
						|
	else
 | 
						|
		print line
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function print_toc_line(author,title,pages, extra,leaders,n,t)
 | 
						|
{
 | 
						|
	# When we have a multiline title, the hypertext link goes only
 | 
						|
	# on the first line.  A multiline hypertext link looks awful
 | 
						|
	# because of long underlines under the leading indentation.
 | 
						|
 | 
						|
	if (pages == "")	# then no leaders needed in title lines other than last one
 | 
						|
		t = sprintf("%31s   %s%s%s", author, Title_prefix, title, Title_suffix)
 | 
						|
	else					# last title line, with page number
 | 
						|
	{
 | 
						|
		n = html_length(title)		# potentially expensive
 | 
						|
		extra = n % 2			# extra space for aligned leader dots
 | 
						|
		if (n <= MAX_TITLE_CHARS) 	# then need leaders
 | 
						|
			leaders = substr(LEADERS, 1, MAX_TITLE_CHARS + MIN_LEADERS - extra - \
 | 
						|
				   min(MAX_TITLE_CHARS,n))
 | 
						|
		else				# title (almost) fills line, so no leaders
 | 
						|
			leaders = substr(MIN_LEADERS_SPACE,1, \
 | 
						|
					 (MAX_TITLE_CHARS + MIN_LEADERS - extra - n))
 | 
						|
		t = sprintf("%31s   %s%s%s%s%s %4s", \
 | 
						|
			    author, Title_prefix, title, Title_suffix, \
 | 
						|
			    (extra ? " " : ""), leaders, pages)
 | 
						|
	}
 | 
						|
 | 
						|
	Title_prefix = ""	# forget any hypertext
 | 
						|
	Title_suffix = ""	# link material
 | 
						|
 | 
						|
	# Efficency note: an earlier version accumulated the body in a
 | 
						|
	# single scalar like this: "Body = Body t".  Profiling revealed
 | 
						|
	# this statement as the major hot spot, and the change to array
 | 
						|
	# storage made the program more than twice as fast.  This
 | 
						|
	# suggests that awk might benefit from an optimization of
 | 
						|
	# "s = s t" that uses realloc() instead of malloc().
 | 
						|
	if (HTML)
 | 
						|
		Body[++BodyLines] = t
 | 
						|
	else
 | 
						|
		print t
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function protect_SGML_characters(s)
 | 
						|
{
 | 
						|
    gsub(/&/,"\\&",s)	# NB: this one MUST be first
 | 
						|
    gsub(/</,"\\<",s)
 | 
						|
    gsub(/>/,"\\>",s)
 | 
						|
    gsub(/\"/,"\\"",s)
 | 
						|
    ##gsub(/\"/,"\\"",s)
 | 
						|
    return (s)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function strip_braces(s, k)
 | 
						|
{	# strip non-backslashed braces from s and return the result
 | 
						|
 | 
						|
	return (strip_char(strip_char(s,"{"),"}"))
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function strip_char(s,c, k)
 | 
						|
{	# strip non-backslashed instances of c from s, and return the result
 | 
						|
	k = index(s,c)
 | 
						|
	if (k > 0)		# then found the character
 | 
						|
	{
 | 
						|
		if (substr(s,k-1,1) != "\\") # then not backslashed char
 | 
						|
			s = substr(s,1,k-1) strip_char(substr(s,k+1),c) # so remove it (recursively)
 | 
						|
		else		# preserve backslashed char
 | 
						|
			s = substr(s,1,k) strip_char(s,k+1,c)
 | 
						|
	}
 | 
						|
	return (s)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function strip_html(s)
 | 
						|
{
 | 
						|
	gsub(/<\/?[^>]*>/,"",s)
 | 
						|
	return (s)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function terminate()
 | 
						|
{
 | 
						|
	if (HTML)
 | 
						|
	{
 | 
						|
		html_end_pre()
 | 
						|
 | 
						|
		HTML = 0	# NB: stop line buffering
 | 
						|
		html_header()
 | 
						|
		html_toc()
 | 
						|
		html_body()
 | 
						|
		html_trailer()
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function TeX_to_HTML(s, k,n,parts)
 | 
						|
{
 | 
						|
	# First convert the four SGML reserved characters to SGML entities
 | 
						|
	if (HTML)
 | 
						|
	{
 | 
						|
	    gsub(/>/,	"\\>",	s)
 | 
						|
	    gsub(/</,	"\\<",	s)
 | 
						|
	    ##gsub(/"/,	"\\"",	s)
 | 
						|
	}
 | 
						|
 | 
						|
	gsub(/[$][$]/,"$$",s)	# change display math to triple dollars for split
 | 
						|
	n = split(s,parts,/[$]/)# split into non-math (odd) and math (even) parts
 | 
						|
 | 
						|
	s = ""
 | 
						|
	for (k = 1; k <= n; ++k) # unbrace non-math part, leaving math mode intact
 | 
						|
		s = s ((k > 1) ? "$" : "") \
 | 
						|
			((k % 2) ? strip_braces(TeX_to_HTML_nonmath(parts[k])) : \
 | 
						|
			TeX_to_HTML_math(parts[k]))
 | 
						|
 | 
						|
	gsub(/[$][$][$]/,"$$",s) # restore display math
 | 
						|
 | 
						|
	if (HTML)
 | 
						|
	{
 | 
						|
	    gsub(/"/,	"\\"",	s)
 | 
						|
	}
 | 
						|
 | 
						|
	return (s)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function TeX_to_HTML_math(s)
 | 
						|
{
 | 
						|
	# Mostly a dummy for now, but HTML 3 could support some math translation
 | 
						|
 | 
						|
	gsub(/\\&/,"\\&",s)	# reduce TeX ampersands to SGML entities
 | 
						|
 | 
						|
	return (s)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function TeX_to_HTML_nonmath(s)
 | 
						|
{
 | 
						|
	if (index(s,"\\") > 0)			# important optimization
 | 
						|
	{
 | 
						|
		gsub(/\\slash +/,"/",s)		# replace TeX slashes with conventional ones
 | 
						|
		gsub(/ *\\emdash +/," --- ",s)	# replace BibNet emdashes with conventional ones
 | 
						|
		gsub(/\\%/,"%",s)		# reduce TeX percents to conventional ones
 | 
						|
		gsub(/\\[$]/,"$",s)		# reduce TeX dollars to conventional ones
 | 
						|
		gsub(/\\#/,"#",s)		# reduce TeX sharps to conventional ones
 | 
						|
 | 
						|
		if (HTML)			# translate TeX markup to HTML
 | 
						|
		{
 | 
						|
			gsub(/\\&/,"\\&",s)	# reduce TeX ampersands to SGML entities
 | 
						|
			s = html_accents(s)
 | 
						|
			s = html_fonts(s)
 | 
						|
		}
 | 
						|
		else				# plain ASCII text output: discard all TeX markup
 | 
						|
		{
 | 
						|
			gsub(/\\\&/, "\\&", s)	# reduce TeX ampersands to conventional ones
 | 
						|
 | 
						|
			gsub(/\\[a-z][a-z] +/,"",s) # remove TeX font changes
 | 
						|
			gsub(/\\[^A-Za-z]/,"",s) # remove remaining TeX control symbols
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return (s)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function trim(s)
 | 
						|
{
 | 
						|
    gsub(/^[ \t]+/,"",s)
 | 
						|
    gsub(/[ \t]+$/,"",s)
 | 
						|
    return (s)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function vol_no_month_year()
 | 
						|
{
 | 
						|
	return ("Volume " wrap(Volume)  ",  Number " wrap(Number) ", " wrap(Month) ", " wrap(Year))
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
function wrap(value)
 | 
						|
{
 | 
						|
	return (HTML ? ("<STRONG>" value "</STRONG>") : value)
 | 
						|
}
 |