diff --git a/codepot/README.md b/codepot/README.md
index 630ea35b..fafe3830 100644
--- a/codepot/README.md
+++ b/codepot/README.md
@@ -336,7 +336,7 @@ jQuery JavaScript Library v1.11.2            MIT See http://jquery.org/license
 jQuery UI 1.9.2                              MIT See http://jquery.org/license
 X-editable 1.5.1                             MIT
 PHPGraphLib                                  MIT
-CLOC 1.62                                    GPL
+CLOC 1.91                                    GPL
 Flot                                         https://github.com/flot/flot/blob/master/LICENSE.txt
 Font Awesome 4.3.0                           MIT & SIL OFL 1.1
 D3.js 3.5.5                                  BSD
diff --git a/codepot/etc/cloc.pl b/codepot/etc/cloc.pl
index d3b9faeb..adca5f4d 100755
--- a/codepot/etc/cloc.pl
+++ b/codepot/etc/cloc.pl
@@ -1,14 +1,14 @@
 #!/usr/bin/env perl
 # cloc -- Count Lines of Code                  {{{1
-# Copyright (C) 2006-2017 Al Danial <al.danial@gmail.com>
+# Copyright (C) 2006-2021 Al Danial <al.danial@gmail.com>
 # First release August 2006
 #
 # Includes code from:
 #   - SLOCCount v2.26
 #     http://www.dwheeler.com/sloccount/
 #     by David Wheeler.
-#   - Regexp::Common v2013031301
-#     http://search.cpan.org/~abigail/Regexp-Common-2013031301/lib/Regexp/Common.pm
+#   - Regexp::Common v2017060201
+#     http://search.cpan.org/~abigail/Regexp-Common-2017060201/lib/Regexp/Common.pm
 #     by Damian Conway and Abigail.
 #   - Win32::Autoglob
 #     http://search.cpan.org/~sburke/Win32-Autoglob-1.01/Autoglob.pm
@@ -29,7 +29,7 @@
 # <http://www.gnu.org/licenses/gpl.txt>.
 #
 # 1}}}
-my $VERSION = "1.75";  # odd number == beta; even number == stable
+my $VERSION = "1.91";  # odd number == beta; even number == stable
 my $URL     = "github.com/AlDanial/cloc";  # 'https://' pushes header too wide
 require 5.006;
 # use modules                                  {{{1
@@ -46,7 +46,6 @@ use IO::File;
 use List::Util qw( min max );
 use Cwd;
 use POSIX qw { strftime ceil};
-
 # Parallel::ForkManager isn't in the standard distribution.
 # Use it only if installed, and only if --processes=N is given.
 # The module load happens in get_max_processes().
@@ -69,12 +68,11 @@ $HAVE_Time_HiRes = 1 if defined $Time::HiRes::VERSION;
 my $HAVE_Rexexp_Common;
 # Regexp::Common isn't in the standard distribution.  It will
 # be installed in a temp directory if necessary.
-BEGIN {
-    if (eval "use Regexp::Common;") {
-        $HAVE_Rexexp_Common = 1;
-    } else {
-        $HAVE_Rexexp_Common = 0;
-    }
+eval "use Regexp::Common qw ( comment ) ";
+if (defined $Regexp::Common::VERSION) {
+    $HAVE_Rexexp_Common = 1;
+} else {
+    $HAVE_Rexexp_Common = 0;
 }
 
 my $HAVE_Algorith_Diff = 0;
@@ -136,12 +134,26 @@ if ($ON_WINDOWS and $ENV{'SHELL'}) {
         $ON_WINDOWS = 1;  # MKS defines $SHELL but still acts like Windows
     }
 }
+my $config_file = '';
+if ( $ENV{'HOME'} ) {
+    $config_file = File::Spec->catfile( $ENV{'HOME'}, '.config', 'cloc', 'options.txt');
+} elsif ( $ENV{'APPDATA'} and $ON_WINDOWS ) {
+    $config_file = File::Spec->catfile( $ENV{'APPDATA'}, 'cloc');
+}
+# $config_file may be updated by check_alternate_config_files()
 
 my $NN     = chr(27) . "[0m";  # normal
-   $NN     = "" if $ON_WINDOWS or !(-t STDERR); # -t STDERR:  is it a terminal?
+   $NN     = "" if $ON_WINDOWS or !(-t STDOUT); # -t STDOUT:  is it a terminal?
 my $BB     = chr(27) . "[1m";  # bold
-   $BB     = "" if $ON_WINDOWS or !(-t STDERR);
+   $BB     = "" if $ON_WINDOWS or !(-t STDOUT);
 my $script = basename $0;
+
+#  Intended for v1.88:
+#  --git-diff-simindex       Git diff strategy #3:  use git's similarity index
+#                            (git diff -M --name-status) to identify file pairs
+#                            to compare.  This is especially useful to compare
+#                            files that were renamed between the commits.
+
 my $brief_usage  = "
                        cloc -- Count Lines of Code
 
@@ -159,14 +171,15 @@ Usage:
         Example:    cloc --diff Python-3.5.tar.xz python-3.6/
 
 $script --help  shows full documentation on the options.
-http://$URL has numerous examples and more information.
+https://$URL has numerous examples and more information.
 ";
 my $usage  = "
 Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <report files>
 
  Count, or compute differences of, physical lines of source code in the
- given files (may be archives such as compressed tarballs or zip files)
- and/or recursively below the given directories.
+ given files (may be archives such as compressed tarballs or zip files,
+ or git commit hashes or branch names) and/or recursively below the
+ given directories.
 
  ${BB}Input Options${NN}
    --extract-with=<cmd>      This option is only needed if cloc is unable
@@ -189,8 +202,16 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
                              process from <file>, which has one file/directory
                              name per line.  Only exact matches are counted;
                              relative path names will be resolved starting from
-                             the directory where cloc is invoked.
-                             See also --exclude-list-file.
+                             the directory where cloc is invoked.  Set <file>
+                             to - to read file names from a STDIN pipe.
+                             See also --exclude-list-file, --config.
+   --diff-list-file=<file>   Take the pairs of file names to be diff'ed from
+                             <file>, whose format matches the output of
+                             --diff-alignment.  (Run with that option to
+                             see a sample.)  The language identifier at the
+                             end of each line is ignored.  This enables --diff
+                             mode and bypasses file pair alignment logic.
+                             See also --config.
    --vcs=<VCS>               Invoke a system call to <VCS> to obtain a list of
                              files to work on.  If <VCS> is 'git', then will
                              invoke 'git ls-files' to get a file list and
@@ -211,6 +232,10 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
                              to obtain file names (and therefore may require
                              authentication to the remote repository), but
                              the files themselves must be local.
+                             Setting <VCS> to 'auto' selects between 'git'
+                             and 'svn' (or neither) depending on the presence
+                             of a .git or .svn subdirectory below the directory
+                             where cloc is invoked.
    --unicode                 Check binary files to see if they contain Unicode
                              expanded ASCII text.  This causes performance to
                              drop noticeably.
@@ -221,6 +246,18 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
    --by-file                 Report results for every source file encountered.
    --by-file-by-lang         Report results for every source file encountered
                              in addition to reporting by language.
+   --config <file>           Read command line switches from <file> instead of
+                             the default location of $config_file.
+                             The file should contain one switch, along with
+                             arguments (if any), per line.  Blank lines and lines
+                             beginning with '#' are skipped.  Options given on
+                             the command line take priority over entries read from
+                             the file.
+                             If a directory is also given with any of these
+                             switches: --list-file, --exclude-list-file,
+                             --read-lang-def, --force-lang-def, --diff-list-file
+                             and a config file exists in that directory, it will
+                             take priority over $config_file.
    --count-and-diff <set1> <set2>
                              First perform direct code counts of source file(s)
                              of <set1> and <set2> separately, then perform a diff
@@ -235,14 +272,24 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
                              may be any mix of files, directories, archives,
                              or git commit hashes.  Use --diff-alignment to
                              generate a list showing which file pairs where
-                             compared.  See also --count-and-diff, --diff-alignment,
-                             --diff-timeout, --ignore-case, --ignore-whitespace.
+                             compared.  When comparing git branches, only files
+                             which have changed in either commit are compared.
+                             See also --git, --count-and-diff, --diff-alignment,
+                             --diff-list-file, --diff-timeout, --ignore-case,
+                             --ignore-whitespace.
    --diff-timeout <N>        Ignore files which take more than <N> seconds
-                             to process.  Default is 10 seconds.
-                             (Large files with many repeated lines can cause
-                             Algorithm::Diff::sdiff() to take hours.)
+                             to process.  Default is 10 seconds.  Setting <N>
+                             to 0 allows unlimited time.  (Large files with many
+                             repeated lines can cause Algorithm::Diff::sdiff()
+                             to take hours.) See also --timeout.
+   --docstring-as-code       cloc considers docstrings to be comments, but this is
+                             not always correct as docstrings represent regular
+                             strings when they appear on the right hand side of an
+                             assignment or as function arguments.  This switch
+                             forces docstrings to be counted as code.
    --follow-links            [Unix only] Follow symbolic links to directories
                              (sym links to files are always followed).
+                             See also --stat.
    --force-lang=<lang>[,<ext>]
                              Process all files that have a <ext> extension
                              with the counter for language <lang>.  For
@@ -260,13 +307,14 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
                              then use these filters instead of the built-in
                              filters.  Note:  languages which map to the same
                              file extension (for example:
-                             MATLAB/Mathematica/Objective C/MUMPS/Mercury;
+                             MATLAB/Mathematica/Objective-C/MUMPS/Mercury;
                              Pascal/PHP; Lisp/OpenCL; Lisp/Julia; Perl/Prolog)
                              will be ignored as these require additional
                              processing that is not expressed in language
                              definition files.  Use --read-lang-def to define
                              new language filters without replacing built-in
-                             filters (see also --write-lang-def).
+                             filters (see also --write-lang-def,
+                             --write-lang-def-incl-dup, --config).
    --git                     Forces the inputs to be interpreted as git targets
                              (commit hashes, branch names, et cetera) if these
                              are not first identified as file or directory
@@ -279,11 +327,23 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
                              commits, or between a git commit and a file,
                              directory, or archive.  Use -v/--verbose to see
                              the git system commands cloc issues.
+   --git-diff-rel            Same as --git --diff, or just --diff if the inputs
+                             are recognized as git targets.  Only files which
+                             have changed in either commit are compared.
+   --git-diff-all            Git diff strategy #2:  compare all files in the
+                             repository between the two commits.
    --ignore-whitespace       Ignore horizontal white space when comparing files
                              with --diff.  See also --ignore-case.
-   --ignore-case             Ignore changes in case; consider upper- and lower-
-                             case letters equivalent when comparing files with
-                             --diff.  See also --ignore-whitespace.
+   --ignore-case             Ignore changes in case within file contents;
+                             consider upper- and lowercase letters equivalent
+                             when comparing files with --diff.  See also
+                             --ignore-whitespace.
+   --ignore-case-ext         Ignore case of file name extensions.  This will
+                             cause problems counting some languages
+                             (specifically, .c and .C are associated with C and
+                             C++; this switch would count .C files as C rather
+                             than C++ on *nix operating systems).  File name
+                             case insensitivity is always true on Windows.
    --lang-no-ext=<lang>      Count files without extensions using the <lang>
                              counter.  This option overrides internal logic
                              for files without extensions (where such files
@@ -314,7 +374,8 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
                              If <file> defines a language cloc already knows
                              about, cloc's definition will take precedence.
                              Use --force-lang-def to over-ride cloc's
-                             definitions (see also --write-lang-def ).
+                             definitions (see also --write-lang-def,
+                             --write-lang-def-incl-dup, --config).
    --script-lang=<lang>,<s>  Process all files that invoke <s> as a #!
                              scripting language with the counter for language
                              <lang>.  For example, files that begin with
@@ -330,10 +391,25 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
                              letting File::Temp chose the location.  Files
                              written to this location are not removed at
                              the end of the run (as they are with File::Temp).
+   --skip-leading=<N[,ext]>  Skip the first <N> lines of each file.  If a
+                             comma separated list of extensions is also given,
+                             only skip lines from those file types.  Example:
+                               --skip-leading=10,cpp,h
+                             will skip the first ten lines of *.cpp and *.h
+                             files.  This is useful for ignoring boilerplate
+                             text.
    --skip-uniqueness         Skip the file uniqueness check.  This will give
                              a performance boost at the expense of counting
                              files with identical contents multiple times
                              (if such duplicates exist).
+   --stat                    Some file systems (AFS, CD-ROM, FAT, HPFS, SMB)
+                             do not have directory 'nlink' counts that match
+                             the number of its subdirectories.  Consequently
+                             cloc may undercount or completely skip the
+                             contents of such file systems.  This switch forces
+                             File::Find to stat directories to obtain the
+                             correct count.  File search speed will decrease.
+                             See also --follow-links.
    --stdin-name=<file>       Give a file name to use to determine the language
                              for standard input.  (Use - as the input name to
                              receive source code via STDIN.)
@@ -344,21 +420,33 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
                              original file name with .<ext> appended to it.
                              It is written to the current directory unless
                              --original-dir is on.
+   --strip-str-comments      Replace comment markers embedded in strings with
+                             'xx'.  This attempts to work around a limitation
+                             in Regexp::Common::Comment where comment markers
+                             embedded in strings are seen as actual comment
+                             markers and not strings, often resulting in a
+                             'Complex regular subexpression recursion limit'
+                             warning and incorrect counts.  There are two
+                             disadvantages to using this switch:  1/code count
+                             performance drops, and 2/code generated with
+                             --strip-comments will contain different strings
+                             where ever embedded comments are found.
    --sum-reports             Input arguments are report files previously
-                             created with the --report-file option.  Makes
-                             a cumulative set of results containing the
+                             created with the --report-file option in plain
+                             format (eg. not JSON, YAML, XML, or SQL).
+                             Makes a cumulative set of results containing the
                              sum of data from the individual report files.
-   --processes=NUM           Sets the maximum number of processes that cloc
-                             uses. If this parameter is set to 0, multi-
-                             processing will be disabled. On Linux and MacOS
-                             systems, cloc creates up to one process per core
-                             by default if a recent version of the
-                             Parallel::ForkManager module is available.
-                             Multiprocessing is disabled by default if cloc
-                             is unable to determine the number of CPU cores.
-                             Multiprocessing is not supported on Windows systems
-                             and on systems which don't have a recent version
-                             of the Parallel::ForkManager module.
+   --timeout <N>             Ignore files which take more than <N> seconds
+                             to process at any of the language's filter stages.
+                             The default maximum number of seconds spent on a
+                             filter stage is the number of lines in the file
+                             divided by one thousand.  Setting <N> to 0 allows
+                             unlimited time.  See also --diff-timeout.
+   --processes=NUM           [Available only on systems with a recent version
+                             of the Parallel::ForkManager module.  Not
+                             available on Windows.] Sets the maximum number of
+                             cores that cloc uses.  The default value of 0
+                             disables multiprocessing.
    --unix                    Override the operating system autodetection
                              logic and run in UNIX mode.  See also
                              --windows, --show-os.
@@ -376,6 +464,8 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
                              See also --unix, --show-os.
 
  ${BB}Filter Options${NN}
+   --exclude-content=<regex> Exclude files containing text that matches the given
+                             regular expression.
    --exclude-dir=<D1>[,D2,]  Exclude the given comma separated directories
                              D1, D2, D3, et cetera, from being scanned.  For
                              example  --exclude-dir=.cache,test  will skip
@@ -390,14 +480,15 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
    --exclude-ext=<ext1>[,<ext2>[...]]
                              Do not count files having the given file name
                              extensions.
-   --exclude-lang=<L1>[,L2,] Exclude the given comma separated languages
+   --exclude-lang=<L1>[,L2[...]]
+                             Exclude the given comma separated languages
                              L1, L2, L3, et cetera, from being counted.
    --exclude-list-file=<file>  Ignore files and/or directories whose names
                              appear in <file>.  <file> should have one file
                              name per line.  Only exact matches are ignored;
                              relative path names will be resolved starting from
                              the directory where cloc is invoked.
-                             See also --list-file.
+                             See also --list-file, --config.
    --fullpath                Modifies the behavior of --match-f, --not-match-f,
                              and --not-match-d to include the file's path
                              in the regex, not just the file's basename.
@@ -406,8 +497,14 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
                              the path as is passed in to cloc.)
                              Note:  --match-d always looks at the full
                              path and therefore is unaffected by --fullpath.
-   --include-lang=<L1>[,L2,] Count only the given comma separated languages
-                             L1, L2, L3, et cetera.
+   --include-ext=<ext1>[,ext2[...]]
+                             Count only languages having the given comma
+                             separated file extensions.  Use --show-ext to
+                             see the recognized extensions.
+   --include-lang=<L1>[,L2[...]]
+                             Count only the given comma separated languages
+                             L1, L2, L3, et cetera.  Use --show-lang to see
+                             the list of recognized languages.
    --match-d=<regex>         Only count files in directories matching the Perl
                              regex.  For example
                                --match-d='/(src|include)/'
@@ -472,8 +569,14 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
    --version                 Print the version of this program and exit.
    --write-lang-def=<file>   Writes to <file> the language processing filters
                              then exits.  Useful as a first step to creating
-                             custom language definitions (see also
-                             --force-lang-def, --read-lang-def).
+                             custom language definitions. Note: languages which
+                             map to the same file extension will be excluded.
+                             (See also --force-lang-def, --read-lang-def).
+   --write-lang-def-incl-dup=<file>
+                             Same as --write-lang-def, but includes duplicated
+                             extensions.  This generates a problematic language
+                             definition file because cloc will refuse to use
+                             it until duplicates are removed.
 
  ${BB}Output Options${NN}
    --3                       Print third-generation language output.
@@ -482,11 +585,11 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
                              while others were produced without it.)
    --by-percent  X           Instead of comment and blank line counts, show
                              these values as percentages based on the value
-                             of X in the denominator:
-                                X = 'c'   -> # lines of code
-                                X = 'cm'  -> # lines of code + comments
-                                X = 'cb'  -> # lines of code + blanks
-                                X = 'cmb' -> # lines of code + comments + blanks
+                             of X in the denominator, where X is
+                                 c    meaning lines of code
+                                 cm   meaning lines of code + comments
+                                 cb   meaning lines of code + blanks
+                                 cmb  meaning lines of code + comments + blanks
                              For example, if using method 'c' and your code
                              has twice as many lines of comments as lines
                              of code, the value in the comment column will
@@ -494,6 +597,13 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
    --csv                     Write the results as comma separated values.
    --csv-delimiter=<C>       Use the character <C> as the delimiter for comma
                              separated files instead of ,.  This switch forces
+   --file-encoding=<E>       Write output files using the <E> encoding instead of
+                             the default ASCII (<E> = 'UTF-7').  Examples: 'UTF-16',
+                             'euc-kr', 'iso-8859-16'.  Known encodings can be
+                             printed with
+                               perl -MEncode -e 'print join(\"\\n\", Encode->encodings(\":all\")), \"\\n\"'
+   --hide-rate               Do not show line and file processing rates in the
+                             output header. This makes output deterministic.
    --json                    Write the results as JavaScript Object Notation
                              (JSON) formatted output.
    --md                      Write the results as Markdown-formatted text.
@@ -505,6 +615,16 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
    --quiet                   Suppress all information messages except for
                              the final report.
    --report-file=<file>      Write the results to <file> instead of STDOUT.
+   --summary-cutoff=X:N      Aggregate to 'Other' results having X lines
+                             below N where X is one of
+                                c   meaning lines of code
+                                f   meaning files
+                                m   meaning lines of comments
+                                cm  meaning lines of code + comments
+                             Appending a percent sign to N changes
+                             the calculation from straight count to
+                             percentage.
+                             Ignored with --diff or --by-file.
    --sql=<file>              Write results as SQL create and insert statements
                              which can be read by a database program such as
                              SQLite.  If <file> is -, output is sent to STDOUT.
@@ -514,8 +634,8 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
    --sql-project=<name>      Use <name> as the project identifier for the
                              current run.  Only valid with the --sql option.
    --sql-style=<style>       Write SQL statements in the given style instead
-                             of the default SQLite format.  Currently, the
-                             only style option is Oracle.
+                             of the default SQLite format.  Styles include
+                             'Oracle' and 'Named_Columns'.
    --sum-one                 For plain text reports, show the SUM: output line
                              even if only one input file is processed.
    --xml                     Write the results in XML.
@@ -544,7 +664,9 @@ my (
     $opt_count_diff           ,
     $opt_diff                 ,
     $opt_diff_alignment       ,
+    $opt_diff_list_file       ,
     $opt_diff_timeout         ,
+    $opt_timeout              ,
     $opt_html                 ,
     $opt_ignored              ,
     $opt_counted              ,
@@ -555,20 +677,24 @@ my (
     $opt_v                    ,
     $opt_vcs                  ,
     $opt_version              ,
+    $opt_exclude_content      ,
     $opt_exclude_lang         ,
     $opt_exclude_list_file    ,
     $opt_exclude_dir          ,
     $opt_explain              ,
+    $opt_include_ext          ,
     $opt_include_lang         ,
     $opt_force_lang_def       ,
     $opt_read_lang_def        ,
     $opt_write_lang_def       ,
+    $opt_write_lang_def_incl_dup,
     $opt_strip_comments       ,
     $opt_original_dir         ,
     $opt_quiet                ,
     $opt_report_file          ,
     $opt_sdir                 ,
     $opt_sum_reports          ,
+    $opt_hide_rate            ,
     $opt_processes            ,
     $opt_unicode              ,
     $opt_no3                  ,   # accept it but don't use it
@@ -602,6 +728,7 @@ my (
     $opt_exclude_ext          ,
     $opt_ignore_whitespace    ,
     $opt_ignore_case          ,
+    $opt_ignore_case_ext      ,
     $opt_follow_links         ,
     $opt_autoconf             ,
     $opt_sum_one              ,
@@ -614,13 +741,26 @@ my (
     $opt_use_sloccount        ,
     $opt_no_autogen           ,
     $opt_force_git            ,
+    $opt_git_diff_rel         ,
+    $opt_git_diff_all         ,
+    $opt_git_diff_simindex    ,
+    $opt_config_file          ,
+    $opt_strip_str_comments   ,
+    $opt_file_encoding        ,
+    $opt_docstring_as_code    ,
+    $opt_stat                 ,
+    $opt_summary_cutoff       ,
+    $opt_skip_leading         ,
    );
-my $getopt_success = GetOptions(
+
+my $getopt_success = GetOptions(             # {{{1
    "by_file|by-file"                         => \$opt_by_file             ,
    "by_file_by_lang|by-file-by-lang"         => \$opt_by_file_by_lang     ,
    "categorized=s"                           => \$opt_categorized         ,
    "counted=s"                               => \$opt_counted             ,
+   "include_ext|include-ext=s"               => \$opt_include_ext         ,
    "include_lang|include-lang=s"             => \$opt_include_lang        ,
+   "exclude_content|exclude-content=s"       => \$opt_exclude_content     ,
    "exclude_lang|exclude-lang=s"             => \$opt_exclude_lang        ,
    "exclude_dir|exclude-dir=s"               => \$opt_exclude_dir         ,
    "exclude_list_file|exclude-list-file=s"   => \$opt_exclude_list_file   ,
@@ -631,6 +771,8 @@ my $getopt_success = GetOptions(
    "diff"                                    => \$opt_diff                ,
    "diff-alignment|diff_alignment=s"         => \$opt_diff_alignment      ,
    "diff-timeout|diff_timeout=i"             => \$opt_diff_timeout        ,
+   "diff-list-file|diff_list_file=s"         => \$opt_diff_list_file      ,
+   "timeout=i"                               => \$opt_timeout             ,
    "html"                                    => \$opt_html                ,
    "ignored=s"                               => \$opt_ignored             ,
    "quiet"                                   => \$opt_quiet               ,
@@ -648,6 +790,7 @@ my $getopt_success = GetOptions(
    "strip_comments|strip-comments=s"         => \$opt_strip_comments      ,
    "original_dir|original-dir"               => \$opt_original_dir        ,
    "sum_reports|sum-reports"                 => \$opt_sum_reports         ,
+   "hide_rate|hide-rate"                     => \$opt_hide_rate           ,
    "processes=n"                             => \$opt_processes           ,
    "unicode"                                 => \$opt_unicode             ,
    "no3"                                     => \$opt_no3                 ,  # ignored
@@ -656,13 +799,14 @@ my $getopt_success = GetOptions(
    "vcs=s"                                   => \$opt_vcs                 ,
    "version"                                 => \$opt_version             ,
    "write_lang_def|write-lang-def=s"         => \$opt_write_lang_def      ,
+   "write_lang_def_incl_dup|write-lang-def-incl-dup=s" => \$opt_write_lang_def_incl_dup,
    "xml"                                     => \$opt_xml                 ,
    "xsl=s"                                   => \$opt_xsl                 ,
    "force_lang|force-lang=s"                 => \@opt_force_lang          ,
    "lang_no_ext|lang-no-ext=s"               => \$opt_lang_no_ext         ,
    "yaml"                                    => \$opt_yaml                ,
    "csv"                                     => \$opt_csv                 ,
-   "csv_delimeter|csv-delimiter=s"           => \$opt_csv_delimiter       ,
+   "csv_delimiter|csv-delimiter=s"           => \$opt_csv_delimiter       ,
    "json"                                    => \$opt_json                ,
    "md"                                      => \$opt_md                  ,
    "fullpath"                                => \$opt_fullpath            ,
@@ -682,6 +826,7 @@ my $getopt_success = GetOptions(
    "exclude_ext|exclude-ext=s"               => \$opt_exclude_ext         ,
    "ignore_whitespace|ignore-whitespace"     => \$opt_ignore_whitespace   ,
    "ignore_case|ignore-case"                 => \$opt_ignore_case         ,
+   "ignore_case_ext|ignore-case-ext"         => \$opt_ignore_case_ext     ,
    "follow_links|follow-links"               => \$opt_follow_links        ,
    "autoconf"                                => \$opt_autoconf            ,
    "sum_one|sum-one"                         => \$opt_sum_one             ,
@@ -691,11 +836,116 @@ my $getopt_success = GetOptions(
    "unix"                                    => \$opt_force_on_unix       ,
    "show_os|show-os"                         => \$opt_show_os             ,
    "skip_archive|skip-archive=s"             => \$opt_skip_archive        ,
-   "max_file_size|max-file-size=i"           => \$opt_max_file_size       ,
+   "max_file_size|max-file-size=f"           => \$opt_max_file_size       ,
    "use_sloccount|use-sloccount"             => \$opt_use_sloccount       ,
    "no_autogen|no-autogen"                   => \$opt_no_autogen          ,
    "git"                                     => \$opt_force_git           ,
+   "git_diff_rel|git-diff-rel"               => \$opt_git_diff_rel        ,
+   "git_diff_all|git-diff-all"               => \$opt_git_diff_all        ,
+#  "git_diff_simindex|git-diff-simindex"     => \$opt_git_diff_simindex   ,
+   "config=s"                                => \$opt_config_file         ,
+   "strip_str_comments|strip-str-comments"   => \$opt_strip_str_comments  ,
+   "file_encoding|file-encoding=s"           => \$opt_file_encoding       ,
+   "docstring_as_code|docstring-as-code"     => \$opt_docstring_as_code   ,
+   "stat"                                    => \$opt_stat                ,
+   "summary_cutoff|summary-cutoff=s"         => \$opt_summary_cutoff      ,
+   "skip_leading|skip-leading:s"             => \$opt_skip_leading        ,
   );
+# 1}}}
+$config_file = $opt_config_file if defined $opt_config_file;
+load_from_config_file($config_file,          # {{{2
+                                                \$opt_by_file             ,
+                                                \$opt_by_file_by_lang     ,
+                                                \$opt_categorized         ,
+                                                \$opt_counted             ,
+                                                \$opt_include_ext         ,
+                                                \$opt_include_lang        ,
+                                                \$opt_exclude_content     ,
+                                                \$opt_exclude_lang        ,
+                                                \$opt_exclude_dir         ,
+                                                \$opt_exclude_list_file   ,
+                                                \$opt_explain             ,
+                                                \$opt_extract_with        ,
+                                                \$opt_found               ,
+                                                \$opt_count_diff          ,
+                                                \$opt_diff                ,
+                                                \$opt_diff_alignment      ,
+                                                \$opt_diff_timeout        ,
+                                                \$opt_timeout             ,
+                                                \$opt_html                ,
+                                                \$opt_ignored             ,
+                                                \$opt_quiet               ,
+                                                \$opt_force_lang_def      ,
+                                                \$opt_read_lang_def       ,
+                                                \$opt_show_ext            ,
+                                                \$opt_show_lang           ,
+                                                \$opt_progress_rate       ,
+                                                \$opt_print_filter_stages ,
+                                                \$opt_report_file         ,
+                                                \@opt_script_lang         ,
+                                                \$opt_sdir                ,
+                                                \$opt_skip_uniqueness     ,
+                                                \$opt_strip_comments      ,
+                                                \$opt_original_dir        ,
+                                                \$opt_sum_reports         ,
+                                                \$opt_hide_rate           ,
+                                                \$opt_processes           ,
+                                                \$opt_unicode             ,
+                                                \$opt_3                   ,
+                                                \$opt_v                   ,
+                                                \$opt_vcs                 ,
+                                                \$opt_version             ,
+                                                \$opt_write_lang_def      ,
+                                                \$opt_write_lang_def_incl_dup,
+                                                \$opt_xml                 ,
+                                                \$opt_xsl                 ,
+                                                \@opt_force_lang          ,
+                                                \$opt_lang_no_ext         ,
+                                                \$opt_yaml                ,
+                                                \$opt_csv                 ,
+                                                \$opt_csv_delimiter       ,
+                                                \$opt_json                ,
+                                                \$opt_md                  ,
+                                                \$opt_fullpath            ,
+                                                \$opt_match_f             ,
+                                                \$opt_not_match_f         ,
+                                                \$opt_match_d             ,
+                                                \$opt_not_match_d         ,
+                                                \$opt_list_file           ,
+                                                \$opt_help                ,
+                                                \$opt_skip_win_hidden     ,
+                                                \$opt_read_binary_files   ,
+                                                \$opt_sql                 ,
+                                                \$opt_sql_project         ,
+                                                \$opt_sql_append          ,
+                                                \$opt_sql_style           ,
+                                                \$opt_inline              ,
+                                                \$opt_exclude_ext         ,
+                                                \$opt_ignore_whitespace   ,
+                                                \$opt_ignore_case         ,
+                                                \$opt_ignore_case_ext     ,
+                                                \$opt_follow_links        ,
+                                                \$opt_autoconf            ,
+                                                \$opt_sum_one             ,
+                                                \$opt_by_percent          ,
+                                                \$opt_stdin_name          ,
+                                                \$opt_force_on_windows    ,
+                                                \$opt_force_on_unix       ,
+                                                \$opt_show_os             ,
+                                                \$opt_skip_archive        ,
+                                                \$opt_max_file_size       ,
+                                                \$opt_use_sloccount       ,
+                                                \$opt_no_autogen          ,
+                                                \$opt_force_git           ,
+                                                \$opt_strip_str_comments  ,
+                                                \$opt_file_encoding       ,
+                                                \$opt_docstring_as_code   ,
+                                                \$opt_stat                ,
+);  # 2}}} Not pretty.  Not at all.
+if ($opt_version) {
+    printf "$VERSION\n";
+    exit;
+}
 $opt_by_file  = 1 if defined  $opt_by_file_by_lang;
 my $CLOC_XSL = "cloc.xsl"; # created with --xsl
    $CLOC_XSL = "cloc-diff.xsl" if $opt_diff;
@@ -708,6 +958,9 @@ my %Exclude_Dir      = ();
    %Exclude_Dir      = map { $_ => 1 } split(/,/, $opt_exclude_dir )
         if $opt_exclude_dir ;
 die unless exclude_dir_validates(\%Exclude_Dir);
+my %Include_Ext = ();
+   %Include_Ext = map { $_ => 1 } split(/,/, $opt_include_ext)
+        if $opt_include_ext;
 my %Include_Language = ();
    %Include_Language = map { $_ => 1 } split(/,/, $opt_include_lang)
         if $opt_include_lang;
@@ -720,14 +973,35 @@ $Exclude_Dir{".hg"}    = 1;
 $Exclude_Dir{".git"}   = 1;
 $Exclude_Dir{".bzr"}   = 1;
 $Exclude_Dir{".snapshot"} = 1;  # NetApp backups
+$Exclude_Dir{".config"} = 1;
 $opt_count_diff        = defined $opt_count_diff ? 1 : 0;
-$opt_diff              = 1  if $opt_diff_alignment;
+$opt_diff              = 1  if $opt_diff_alignment    or
+                               $opt_diff_list_file    or
+                               $opt_git_diff_rel      or
+                               $opt_git_diff_all      or
+                               $opt_git_diff_simindex;
+$opt_force_git         = 1  if $opt_git_diff_rel      or
+                               $opt_git_diff_all      or
+                               $opt_git_diff_simindex;
+$opt_diff_alignment    = 0  if $opt_diff_list_file;
 $opt_exclude_ext       = "" unless $opt_exclude_ext;
 $opt_ignore_whitespace = 0  unless $opt_ignore_whitespace;
 $opt_ignore_case       = 0  unless $opt_ignore_case;
+$opt_ignore_case_ext   = 0  unless $opt_ignore_case_ext;
 $opt_lang_no_ext       = 0  unless $opt_lang_no_ext;
 $opt_follow_links      = 0  unless $opt_follow_links;
-$opt_diff_timeout      =10  unless $opt_diff_timeout;
+if (defined $opt_diff_timeout) {
+    # if defined but with a value of <= 0, set to 2^31-1 seconds = 68 years
+    $opt_diff_timeout = 2**31-1 unless $opt_diff_timeout > 0;
+} else {
+    $opt_diff_timeout  =10; # seconds
+}
+if (defined $opt_timeout) {
+    # if defined but with a value of <= 0, set to 2^31-1 seconds = 68 years
+    $opt_timeout = 2**31-1 unless $opt_timeout > 0;
+    # else is computed dynamically, ref $max_duration_sec
+}
+$opt_csv               = 0  unless defined $opt_csv;
 $opt_csv               = 1  if $opt_csv_delimiter;
 $ON_WINDOWS            = 1  if $opt_force_on_windows;
 $ON_WINDOWS            = 0  if $opt_force_on_unix;
@@ -760,7 +1034,7 @@ $opt_vcs = 0 if $opt_force_git;
 
 my @COUNT_DIFF_ARGV        = undef;
 my $COUNT_DIFF_report_file = undef;
-if ($opt_count_diff) {
+if ($opt_count_diff and !$opt_diff_list_file) {
     die "--count-and-diff requires two arguments; got ", scalar @ARGV, "\n"
         if scalar @ARGV != 2;
     # prefix with a dummy term so that $opt_count_diff is the
@@ -800,7 +1074,7 @@ if ($opt_sql eq "-" || $opt_sql eq "1") { # stream SQL output to STDOUT
 }
 if ($opt_sql_style) {
     $opt_sql_style = lc $opt_sql_style;
-    if (!grep { lc $_ eq $opt_sql_style } qw ( Oracle )) {
+    if (!grep { lc $_ eq $opt_sql_style } qw ( Oracle Named_Columns )) {
         die "'$opt_sql_style' is not a recognized SQL style.\n";
     }
 }
@@ -811,13 +1085,22 @@ if ($opt_by_percent and $opt_by_percent !~ m/^(c|cm|cb|cmb)$/i) {
 $opt_by_percent = lc $opt_by_percent;
 
 if (defined $opt_vcs) {
+    if ($opt_vcs eq "auto") {
+        if      (-d ".git") {
+            $opt_vcs = "git";
+        } elsif (-d ".svn") {
+            $opt_vcs = "svn";
+        } else {
+            warn "--vcs auto:  unable to determine versioning system\n";
+        }
+    }
     if      ($opt_vcs eq "git") {
         $opt_vcs = "git ls-files";
         my @submodules = invoke_generator('git submodule status');
         foreach my $SM (@submodules) {
             $SM =~ s/^\s+//;        # may have leading space
             $SM =~ s/\(\S+\)\s*$//; # may end with something like (heads/master)
-			my ($checksum, $dir) = split(' ', $SM, 2);
+            my ($checksum, $dir) = split(' ', $SM, 2);
             $dir =~ s/\s+$//;
             $Exclude_Dir{$dir} = 1;
         }
@@ -830,25 +1113,44 @@ my $list_no_autogen = 0;
 if (defined $opt_no_autogen and scalar @ARGV == 1 and $ARGV[0] eq "list") {
     $list_no_autogen = 1;
 }
+if ($opt_summary_cutoff) {
+    my $error = summary_cutoff_error($opt_summary_cutoff);
+    die "$error\n" if $error;
+}
+
+if (!$opt_config_file) {
+    # if not explicitly given, look for a config file in other
+    # possible locations
+    my $other_loc = check_alternate_config_files($opt_list_file,
+        $opt_exclude_list_file, $opt_read_lang_def, $opt_force_lang_def,
+        $opt_diff_list_file);
+    $opt_config_file = $other_loc if $other_loc;
+}
 
 die $brief_usage unless defined $opt_version         or
                         defined $opt_show_lang       or
                         defined $opt_show_ext        or
                         defined $opt_show_os         or
                         defined $opt_write_lang_def  or
+                        defined $opt_write_lang_def_incl_dup  or
                         defined $opt_list_file       or
+                        defined $opt_diff_list_file  or
                         defined $opt_vcs             or
                         defined $opt_xsl             or
                         defined $opt_explain         or
                         $list_no_autogen             or
                         scalar @ARGV >= 1;
-die "--diff requires two arguments; got ", scalar @ARGV, "\n"
-    if $opt_diff and scalar @ARGV != 2;
-if ($opt_version) {
-    printf "$VERSION\n";
-    exit;
+if (!$opt_diff_list_file) {
+    die "--diff requires two arguments; got ", scalar @ARGV, "\n"
+        if $opt_diff and !$opt_sum_reports and scalar @ARGV != 2;
+    die "--diff arguments are identical; nothing done", "\n"
+        if $opt_diff and !$opt_sum_reports and scalar @ARGV == 2
+                                           and $ARGV[0] eq $ARGV[1];
 }
-replace_git_hash_with_tarfile(\@ARGV);
+trick_pp_packer_encode() if $ON_WINDOWS and $opt_file_encoding;
+$File::Find::dont_use_nlink = 1 if $opt_stat or top_level_SMB_dir(\@ARGV);
+my @git_similarity = (); # only populated with --git-diff-simindex
+replace_git_hash_with_tarfile(\@ARGV, \@git_similarity);
 # 1}}}
 # Step 1:  Initialize global constants.        {{{1
 #
@@ -856,7 +1158,7 @@ my $nFiles_Found = 0;  # updated in make_file_list
 my (%Language_by_Extension, %Language_by_Script,
     %Filters_by_Language, %Not_Code_Extension, %Not_Code_Filename,
     %Language_by_File, %Scale_Factor, %Known_Binary_Archives,
-    %EOL_Continuation_re,
+    %Language_by_Prefix, %EOL_Continuation_re,
    );
 my $ALREADY_SHOWED_HEADER = 0;
 my $ALREADY_SHOWED_XML_SECTION = 0;
@@ -866,6 +1168,26 @@ my %Error_Codes = ( 'Unable to read'                => -1,
                     'Diff error, exceeded timeout'  => -4,
                     'Line count, exceeded timeout'  => -5,
                   );
+my %Extension_Collision = (
+    'ADSO/IDSM'                                     => [ 'adso' ] ,
+    'C#/Smalltalk'                                  => [ 'cs'   ] ,
+    'D/dtrace'                                      => [ 'd'    ] ,
+    'F#/Forth'                                      => [ 'fs'   ] ,
+    'Fortran 77/Forth'                              => [ 'f', 'for' ] ,
+    'IDL/Qt Project/Prolog/ProGuard'                => [ 'pro'  ] ,
+    'Lisp/Julia'                                    => [ 'jl'   ] ,
+    'Lisp/OpenCL'                                   => [ 'cl'   ] ,
+    'MATLAB/Mathematica/Objective-C/MUMPS/Mercury'  => [ 'm'    ] ,
+    'Pascal/Puppet'                                 => [ 'pp'   ] ,
+    'Perl/Prolog'                                   => [ 'pl', 'PL'  ] ,
+    'PHP/Pascal'                                    => [ 'inc'  ] ,
+    'Raku/Prolog'                                   => [ 'p6', 'P6'  ] ,
+    'Qt/Glade'                                      => [ 'ui'   ] ,
+    'TypeScript/Qt Linguist'                        => [ 'ts'   ] ,
+    'Verilog-SystemVerilog/Coq'                     => [ 'v'    ] ,
+    'Visual Basic/TeX/Apex Class'                   => [ 'cls'  ] ,
+    'Scheme/SaltStack'                              => [ 'sls'  ] ,
+);
 my @Autogen_to_ignore = no_autogen_files($list_no_autogen);
 if ($opt_force_lang_def) {
     # replace cloc's definitions
@@ -886,6 +1208,7 @@ if ($opt_force_lang_def) {
         \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77'
         \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
         \%Language_by_File     , # Language_by_File{makefile}  = 'make'
+        \%Language_by_Prefix   , # Language_by_Prefix{Dockerfile}  = 'Dockerfile'
         \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] =
                                  #      [ 'remove_matches' , '^\s*#'  ]
         \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
@@ -960,10 +1283,17 @@ foreach my $pair (@opt_script_lang) {
     $Language_by_Script{$script_name} = $Recognized_Language_lc{$lang_lc};
 }
 
+# If user provided a language definition file, make sure those
+# extensions aren't rejected.
+foreach my $ext (%Language_by_Extension) {
+    next unless defined $Not_Code_Extension{$ext};
+    delete $Not_Code_Extension{$ext};
+}
+
 # If user provided file extensions to ignore, add these to
 # the exclusion list.
 foreach my $ext (map { $_ => 1 } split(/,/, $opt_exclude_ext ) ) {
-    $ext = lc $ext if $ON_WINDOWS;
+    $ext = lc $ext if $ON_WINDOWS or $opt_ignore_case_ext;
     $Not_Code_Extension{$ext} = 1;
 }
 
@@ -1058,17 +1388,32 @@ if (defined $opt_skip_archive) {
 
 if ($opt_sum_reports and $opt_diff) {
     my @results = ();
+    if ($opt_csv and !defined($opt_csv_delimiter)) {
+        $opt_csv_delimiter = ",";
+    }
     if ($opt_list_file) { # read inputs from the list file
         my @list = read_list_file($opt_list_file);
-        @results = combine_diffs(\@list);
+        if ($opt_csv) {
+            @results = combine_csv_diffs($opt_csv_delimiter, \@list);
+        } else {
+            @results = combine_diffs(\@list);
+        }
     } elsif ($opt_vcs) { # read inputs from the VCS generator
         my @list = invoke_generator($opt_vcs, \@ARGV);
-        @results = combine_diffs(\@list);
+        if ($opt_csv) {
+            @results = combine_csv_diffs($opt_csv_delimiter, \@list);
+        } else {
+            @results = combine_diffs(\@list);
+        }
     } else { # get inputs from the command line
-        @results = combine_diffs(\@ARGV);
+        if ($opt_csv) {
+            @results = combine_csv_diffs($opt_csv_delimiter, \@ARGV);
+        } else {
+            @results = combine_diffs(\@ARGV);
+        }
     }
     if ($opt_report_file) {
-        write_file($opt_report_file, @results);
+        write_file($opt_report_file, {}, @results);
     } else {
         print "\n", join("\n", @results), "\n";
     }
@@ -1105,15 +1450,17 @@ if ($opt_sum_reports) {
             my $ext  = ".lang";
                $ext  = ".file" unless $type eq "by language";
             next if !$found_lang and  $ext  eq ".lang";
-            write_file($opt_report_file . $ext, @results);
+            write_file($opt_report_file . $ext, {}, @results);
         } else {
             print "\n", join("\n", @results), "\n";
         }
     }
     exit;
 }
-if ($opt_write_lang_def) {
-    write_lang_def($opt_write_lang_def   ,
+if ($opt_write_lang_def or $opt_write_lang_def_incl_dup) {
+    my $file = $opt_write_lang_def          if $opt_write_lang_def;
+       $file = $opt_write_lang_def_incl_dup if $opt_write_lang_def_incl_dup;
+    write_lang_def($file                 ,
                   \%Language_by_Extension,
                   \%Language_by_Script   ,
                   \%Language_by_File     ,
@@ -1226,6 +1573,7 @@ if ($opt_extract_with) {
 } else {
     # see if any of the inputs need to be auto-uncompressed &/or expanded
     my @updated_ARGS = ();
+    replace_git_hash_with_tarfile(\@ARGV, \@git_similarity) if $opt_force_git;
     foreach my $Arg (@ARGV) {
         if (is_dir($Arg)) {
             push @updated_ARGS, $Arg;
@@ -1290,15 +1638,61 @@ my @file_list = ();  # global variable updated in files()
 my %Ignored   = ();  # files that are not counted (language not recognized or
                      # problems reading the file)
 my @Lines_Out = ();
+my %upper_lower_map = ();  # global variable (needed only on Windows) to
+                           # track case of original filename, populated in
+                           # make_file_list() if $ON_WINDOWS
 if ($opt_diff) {
 # Step 4:  Separate code from non-code files.  {{{1
 my @fh            = ();
 my @files_for_set = ();
+my @files_added_tot = ();
+my @files_removed_tot = ();
+my @file_pairs_tot = ();
 # make file lists for each separate argument
+if ($opt_diff_list_file) {
+    @files_for_set = ( (), () );
+    file_pairs_from_file($opt_diff_list_file, # in
+                        \@files_added_tot   , # out
+                        \@files_removed_tot , # out
+                        \@file_pairs_tot    , # out
+                       );
+    foreach my $F (@files_added_tot) {
+        if ($ON_WINDOWS) {
+            (my $lc = lc $F) =~ s{\\}{/}g;
+            $upper_lower_map{$lc} = $F;
+            $F = $lc;
+        }
+        push @{$files_for_set[1]}, $F;
+    }
+    foreach my $F (@files_removed_tot) {
+        if ($ON_WINDOWS) {
+            (my $lc = lc $F) =~ s{\\}{/}g;
+            $upper_lower_map{$lc} = $F;
+            $F = $lc;
+        }
+        push @{$files_for_set[0]}, $F;
+    }
+    foreach my $pair (@file_pairs_tot) {
+        if ($ON_WINDOWS) {
+            push @{$files_for_set[0]}, lc $pair->[0];
+            push @{$files_for_set[1]}, lc $pair->[1];
+        } else {
+            push @{$files_for_set[0]}, $pair->[0];
+            push @{$files_for_set[1]}, $pair->[1];
+        }
+    }
+    @ARGV = (1, 2); # place holders
+}
 for (my $i = 0; $i < scalar @ARGV; $i++) {
-    push @fh,
-         make_file_list([ $ARGV[$i] ], \%Error_Codes, \@Errors, \%Ignored);
-    @{$files_for_set[$i]} = @file_list;
+    if ($opt_diff_list_file) {
+        push @fh, make_file_list($files_for_set[$i],
+                                \%Error_Codes, \@Errors, \%Ignored);
+        @{$files_for_set[$i]} = @file_list;
+    } else {
+        push @fh, make_file_list([ $ARGV[$i] ],
+                                \%Error_Codes, \@Errors, \%Ignored);
+        @{$files_for_set[$i]} = @file_list;
+    }
     if ($opt_exclude_list_file) {
         # note: process_exclude_list_file() references global @file_list
         process_exclude_list_file($opt_exclude_list_file,
@@ -1325,6 +1719,32 @@ foreach my $FH (@fh) {  # loop over each pair of file sets
                           \%Error_Codes                         ,
                                \@Errors                         ,
                                \%Ignored                        );
+    if ($opt_exclude_content) {
+        exclude_by_regex($opt_exclude_content,              # in
+                        \%{$unique_source_file{$FH}},       # in/out
+                        \%Ignored);                         # out
+    }
+
+    if ($opt_include_lang) {
+        # remove files associated with languages not
+        # specified by --include-lang
+        my @delete_file = ();
+        foreach my $file (keys %{$unique_source_file{$FH}}) {
+            my $keep_file = 0;
+            foreach my $keep_lang (keys %Include_Language) {
+                if ($Language{$FH}{$file} eq $keep_lang) {
+                    $keep_file = 1;
+                    last;
+                }
+            }
+            next if $keep_file;
+            push @delete_file, $file;
+        }
+        foreach my $file (@delete_file) {
+            delete $Language{$FH}{$file};
+        }
+    }
+
     printf "%2d: %8d unique file%s.                          \r",
         $n_set,
         plural_form(scalar keys %unique_source_file)
@@ -1338,9 +1758,6 @@ my %Results_by_File     = ();
 my %Delta_by_Language   = ();
 my %Delta_by_File       = ();
 
-my @files_added_tot = ();
-my @files_removed_tot = ();
-my @file_pairs_tot = ();
 my %alignment = ();
 
 my $fset_a = $fh[0];
@@ -1350,21 +1767,28 @@ my $n_filepairs_compared = 0;
 my $tot_counted = 0;
 
 if ( scalar @fh != 2 ) {
-    print "Error: in correct length fh array when preparing diff at step 6.\n";
+    print "Error: incorrect length fh array when preparing diff at step 6.\n";
     exit 1;
 }
+if (!$opt_diff_list_file) {
+    align_by_pairs(\%{$unique_source_file{$fset_a}}      , # in
+                   \%{$unique_source_file{$fset_b}}      , # in
+                   \@files_added_tot                     , # out
+                   \@files_removed_tot                   , # out
+                   \@file_pairs_tot                      , # out
+                  );
+}
 
-align_by_pairs        (\%{$unique_source_file{$fset_a}}      , # in
-                       \%{$unique_source_file{$fset_b}}      , # in
-                       \@files_added_tot                     , # out
-                       \@files_removed_tot                   , # out
-                       \@file_pairs_tot                      , # out
-                      );
-
+#use Data::Dumper;
+#print "added : ", Dumper(\@files_added_tot);
+#print "removed : ", Dumper(\@files_removed_tot);
+#print "pairs : ", Dumper(\@file_pairs_tot);
 
 if ( $max_processes == 0) {
     # Multiprocessing is disabled
-    my $part = count_filesets ( $fset_a, $fset_b, \@files_added_tot, \@files_removed_tot, \@file_pairs_tot , 0, \%Language);
+    my $part = count_filesets ( $fset_a, $fset_b, \@files_added_tot,
+                               \@files_removed_tot, \@file_pairs_tot,
+                               0, \%Language, \%Ignored);
     %Results_by_File = %{$part->{'results_by_file'}};
     %Results_by_Language= %{$part->{'results_by_language'}};
     %Delta_by_File = %{$part->{'delta_by_file'}};
@@ -1373,11 +1797,13 @@ if ( $max_processes == 0) {
     %alignment = %{$part->{'alignment'}};
     $n_filepairs_compared = $part->{'n_filepairs_compared'};
     push ( @Errors, @{$part->{'errors'}});
-}
-else {
+} else {
     # Multiprocessing is enabled
     # Do not create more processes than the amount of data to be processed
-    my $num_processes = min(max(scalar @files_added_tot,scalar @files_removed_tot,scalar @file_pairs_tot),$max_processes);
+    my $num_processes = min(max(scalar @files_added_tot,
+                                scalar @files_removed_tot,
+                                scalar @file_pairs_tot),
+                            $max_processes);
     # ... but use at least one process.
        $num_processes = 1
             if $num_processes == 0;
@@ -1399,7 +1825,7 @@ else {
         # Since files are processed by multiple processes, we can't measure
         # the number of processed files exactly. We approximate this by showing
         # the number of files counted by finished processes.
-	printf "Counting:  %d\r", $tot_counted
+        printf "Counting:  %d\r", $tot_counted
                  if $opt_progress_rate;
 
         foreach my $this_language ( keys %$part_result_by_language ) {
@@ -1449,7 +1875,8 @@ else {
 
         $pm->start() and next;
         my $count_result = count_filesets ( $fset_a, $fset_b,
-            \@files_added_part, \@files_removed_part, \@filepairs_part, 1 , \%Language );
+            \@files_added_part, \@files_removed_part,
+            \@filepairs_part, 1, \%Language, \%Ignored );
         $pm->finish(0 , $count_result);
     }
     # Wait for processes to finish
@@ -1461,15 +1888,13 @@ if ($opt_diff_alignment) {
     write_alignment_data ( $opt_diff_alignment, $n_filepairs_compared, \%alignment ) ;
 }
 
-#use Data::Dumper;
-#print Dumper("Delta_by_Language:"  , \%Delta_by_Language);
-#print Dumper("Results_by_Language:", \%Results_by_Language);
-#print Dumper("Delta_by_File:"      , \%Delta_by_File);
-#print Dumper("Results_by_File:"    , \%Results_by_File);
-#die;
-my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored;
-write_file($opt_ignored, @ignored_reasons   ) if $opt_ignored;
-write_file($opt_counted, sort keys %Results_by_File) if $opt_counted;
+my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ": ";
+my @ignored_reasons = map { "${_}${separator} $Ignored{$_}" } sort keys %Ignored;
+write_file($opt_ignored, {"file_type" => "ignored",
+                          "separator" => ": ",
+                          "columns"   => ["file", "reason"],
+                         }, @ignored_reasons   ) if $opt_ignored;
+write_file($opt_counted, {}, sort keys %Results_by_File) if $opt_counted;
 # 1}}}
 # Step 7:  Assemble results.                   {{{1
 #
@@ -1553,6 +1978,11 @@ remove_duplicate_files($fh                          ,   # in
                       \%Error_Codes                 ,   # in
                            \@Errors                 ,   # out
                            \%Ignored                );  # out
+if ($opt_exclude_content) {
+    exclude_by_regex($opt_exclude_content,              # in
+                    \%unique_source_file ,              # in/out
+                    \%Ignored);                         # out
+}
 printf "%8d unique file%s.                              \n",
     plural_form(scalar keys %unique_source_file)
     unless $opt_quiet;
@@ -1571,8 +2001,7 @@ if ( $max_processes == 0) {
     %Results_by_Language= %{$part->{'results_by_language'}};
     %Ignored = ( %Ignored, %{$part->{'ignored'}});
     push ( @Errors, @{$part->{'errors'}});
-}
-else {
+} else {
     # Do not create more processes than the number of files to be processed
     my $num_files = scalar @sorted_files;
     my $num_processes = $num_files >= $max_processes ? $max_processes : $num_files;
@@ -1593,7 +2022,7 @@ else {
         # Since files are processed by multiple processes, we can't measure
         # the number of processed files exactly. We approximate this by showing
         # the number of files counted by finished processes.
-	printf "Counting:  %d\r", $nCounted
+        printf "Counting:  %d\r", $nCounted
                  if $opt_progress_rate;
 
         foreach my $this_language ( keys %$part_result_by_language ) {
@@ -1617,9 +2046,17 @@ else {
     $pm->wait_all_children();
 }
 
-my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored;
-write_file($opt_ignored, @ignored_reasons   ) if $opt_ignored;
-write_file($opt_counted, sort keys %Results_by_File) if $opt_counted;
+my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ": ";
+my @ignored_reasons = map { "${_}${separator} $Ignored{$_}" } sort keys %Ignored;
+write_file($opt_ignored, {"file_type" => "ignored",
+                          "separator" => $separator,
+                          "columns"   => ["file", "reason"],
+                         }, @ignored_reasons   ) if $opt_ignored;
+if ($opt_summary_cutoff) {
+    %Results_by_Language = apply_cutoff($opt_summary_cutoff,
+                                       \%Results_by_Language);
+}
+write_file($opt_counted, {}, sort keys %Results_by_File) if $opt_counted;
 # 1}}}
 # Step 7:  Assemble results.                   {{{1
 #
@@ -1651,20 +2088,117 @@ if      ($opt_by_file_by_lang) {
 }
 # 1}}}
 }
-if ($opt_report_file) { write_file($opt_report_file, @Lines_Out); }
+if ($opt_report_file) { write_file($opt_report_file, {}, @Lines_Out); }
 else                  { print "\n", join("\n", @Lines_Out), "\n"; }
 if ($opt_count_diff) {
     ++$opt_count_diff;
     exit if $opt_count_diff > 3;
     goto Top_of_Processing_Loop;
 }
-sub get_max_processes {			                   # {{{1
+sub summary_cutoff_error {                   # {{{
+    my ($parameter) = @_;
+    print "-> summary_cutoff_is_ok($parameter)\n" if $opt_v > 2;
+    my %known_keys = ( 'c' => 1, 'f' => 1, 'm' => 1, 'cm' => 1 );
+    my $result = "";
+    my $by_pct = 0;
+    my ($key, $value);
+    if ($parameter !~ /:/) {
+        $result = "expected a colon in --summary-cutoff argument";
+    } else {
+        ($key, $value) = split(':', $parameter, 2);
+        if ($value =~ /%$/) {
+            $by_pct = 1;
+            $value =~ s/%$//;
+        }
+        if (!$known_keys{$key}) {
+            $result = "--summary-cutoff argument:  '$key' is not 'c', 'f', 'm' or 'cm'";
+        }
+        if ($value !~ /^\d+(\.\d*)?$/) {
+            $result = "--summary-cutoff argument:  '$value' is not a number";
+        }
+    }
+    print "<- summary_cutoff_is_ok($result)\n" if $opt_v > 2;
+    return $result;
+} # 1}}}
+sub apply_cutoff {                           # {{{1
+    my ($criterion,
+        $rhh_by_lang) = @_;
+
+    my %aggregated_Results_by_Language = ();
+    my $by_pct = 0;
+    my ($key, $value) = split(':', $criterion, 2);
+    if ($value =~ /%$/) {
+        $by_pct = 1;
+        $value =~ s/%$//;
+    }
+
+    my %sum = ();
+    if ($by_pct) {
+        foreach my $lang (keys %{$rhh_by_lang}) {
+            foreach my $category (qw(nFiles comment blank code)) {
+                $sum{$category} += $rhh_by_lang->{$lang}{$category};
+            }
+        }
+        if      ($key eq 'c') {
+            $value *= $sum{'code'}/100;
+        } elsif ($key eq 'f') {
+            $value *= $sum{'nFiles'}/100;
+        } elsif ($key eq 'm') {
+            $value *= $sum{'comment'}/100;
+        } elsif ($key eq 'cm') {
+            $value *= ($sum{'code'} + $sum{'comment'})/100;
+        }
+    }
+
+    foreach my $lang (keys %{$rhh_by_lang}) {
+        my %sum = ();
+        my $agg_lang = $lang;
+        if      ($key eq 'c') {
+            $agg_lang = 'Other' if $rhh_by_lang->{$lang}{'code'}    <= $value;
+        } elsif ($key eq 'f') {
+            $agg_lang = 'Other' if $rhh_by_lang->{$lang}{'nFiles'}  <= $value;
+        } elsif ($key eq 'm') {
+            $agg_lang = 'Other' if $rhh_by_lang->{$lang}{'comment'} <= $value;
+        } elsif ($key eq 'cm') {
+            $agg_lang = 'Other' if $rhh_by_lang->{$lang}{'code'} +
+                                      $rhh_by_lang->{$lang}{'comment'} <= $value;
+        }
+        foreach my $category (qw(nFiles comment blank code)) {
+            $aggregated_Results_by_Language{$agg_lang}{$category} +=
+                $rhh_by_lang->{$lang}{$category};
+        }
+    }
+
+    return %aggregated_Results_by_Language;
+} # 1}}}
+sub exclude_by_regex {                       # {{{1
+    my ($regex,
+        $rh_unique_source_file, # in/out
+        $rh_ignored           , # out
+       ) = @_;
+    my @exclude = ();
+    foreach my $file (keys %{$rh_unique_source_file}) {
+        my $line_num = 0;
+        foreach my $line (read_file($file)) {
+            ++$line_num;
+            if ($line =~ /$regex/) {
+                $rh_ignored->{$file} = "line $line_num match for --exclude-content=$regex";
+                push @exclude, $file;
+                last;
+            }
+        }
+    }
+    foreach my $file (@exclude) {
+        delete $rh_unique_source_file->{$file};
+    }
+} # 1}}}
+sub get_max_processes {                      # {{{1
     # If user has specified valid number of processes, use that.
     if (defined $opt_processes) {
-		eval "use Parallel::ForkManager 0.7.6;";
-		if ( defined $Parallel::ForkManager::VERSION ) {
-			$HAVE_Parallel_ForkManager = 1;
-		}
+        eval "use Parallel::ForkManager 0.7.6;";
+        if ( defined $Parallel::ForkManager::VERSION ) {
+            $HAVE_Parallel_ForkManager = 1;
+        }
         if ( $opt_processes !~ /^\d+$/ ) {
             print "Error: processes option argument must be numeric.\n";
             exit 1;
@@ -1674,7 +2208,7 @@ sub get_max_processes {			                   # {{{1
                   "Parallel::ForkManager is not installed, or the version is too old.\n";
             exit 1;
         }
-	elsif ( $opt_processes >0 and $ON_WINDOWS ) {
+    elsif ( $opt_processes >0 and $ON_WINDOWS ) {
             print "Error: cannot use multiple processes on Windows systems.\n";
             exit 1;
         }
@@ -1702,7 +2236,7 @@ sub get_max_processes {			                   # {{{1
         }
     }
 
-    # Set to number of cores on MacOS
+    # Set to number of cores on macOS
     if ( $^O =~ /darwin/i and -x '/usr/sbin/sysctl') {
        my $numavcores_macos = `/usr/sbin/sysctl -n hw.physicalcpu`;
        chomp $numavcores_macos;
@@ -1728,7 +2262,7 @@ sub exclude_autogenerated_files {            # {{{1
             push @file_list_minus_autogen, $file;
             next;
         }
-        my $first_line = first_line($file, $rh_Err, $raa_errors);
+        my $first_line = first_line($file, 1, $rh_Err, $raa_errors);
         if ($first_line =~ m{^//\s+Code\s+generated\s+.*?\s+DO\s+NOT\s+EDIT\.$}) {
             $rh_Ignored->{$file} = 'Go autogenerated file';
         } else {
@@ -1739,13 +2273,23 @@ sub exclude_autogenerated_files {            # {{{1
     @{$ra_file_list} = @file_list_minus_autogen;
     print "<- exclude_autogenerated_files()\n" if $opt_v > 2;
 } # 1}}}
+sub file_extension {                         # {{{1
+    my ($fname, ) = @_;
+    $fname =~ m/\.(\w+)$/;
+    if ($1) {
+        return $1;
+    } else {
+        return "";
+    }
+} # 1}}}
 sub count_files {                            # {{{1
-    my ($filelist,$counter_type,$language_hash) = @_;
+    my ($filelist, $counter_type, $language_hash) = @_;
+    print "-> count_files()\n" if $opt_v > 2;
     my @p_errors = ();
     my %p_ignored = ();
     my %p_rbl = ();
     my %p_rbf = ();
-    my %Language = %$language_hash;
+    my %Language = %{$language_hash};
 
     foreach my $file (@$filelist) {
         if ( ! $counter_type ) {
@@ -1757,14 +2301,25 @@ sub count_files {                            # {{{1
         }
 
         next if $Ignored{$file};
+        if ($opt_include_ext and not $Include_Ext{ file_extension($file) }) {
+            $p_ignored{$file} = "not in --include-ext=$opt_include_ext";
+            next;
+        }
         if ($opt_include_lang and not $Include_Language{$Language{$file}}) {
-            $p_ignored{$file} = "--include-lang=$Language{$file}";
+            $p_ignored{$file} = "not in --include-lang=$opt_include_lang";
             next;
         }
         if ($Exclude_Language{$Language{$file}}) {
             $p_ignored{$file} = "--exclude-lang=$Language{$file}";
             next;
         }
+        if ($opt_force_lang_def and ($Language{$file} eq "XML") and
+            !defined $Filters_by_Language{XML}) {
+            # XML check is attempted for all unidentified text files.
+            # This can't be done if user forces language definition
+            # that excludes XML.  GH #596
+            next;
+        }
 
         my $Filters_by_Language_Language_file = ! @{$Filters_by_Language{$Language{$file}} };
         if ($Filters_by_Language_Language_file) {
@@ -1778,8 +2333,8 @@ sub count_files {                            # {{{1
 
         my ($all_line_count, $blank_count, $comment_count, $code_count);
         if ($opt_use_sloccount and $Language{$file} =~ /^(C|C\+\+|XML|PHP|Pascal|Java)$/) {
-            chomp ($blank_count     = `grep -Pcv '\\S' $file`);
-            chomp ($all_line_count  = `cat $file | wc -l`);
+            chomp ($blank_count     = `grep -cv \"[^[:space:]]\" '$file'`);
+            chomp ($all_line_count  = `cat '$file' | wc -l`);
             if      ($Language{$file} =~ /^(C|C\+\+)$/) {
                 $code_count = `cat '$file' | c_count      | head -n 1`;
             } elsif ($Language{$file} eq "XML") {
@@ -1818,6 +2373,7 @@ sub count_files {                            # {{{1
         $p_rbl{$Language{$file}}{'comment'} += $comment_count;
 
     }
+    print "<- count_files()\n" if $opt_v > 2;
     return {
         "ignored" => \%p_ignored,
         "errors"  => \@p_errors,
@@ -1826,7 +2382,15 @@ sub count_files {                            # {{{1
     }
 } # 1}}}
 sub count_filesets {                         # {{{1
-    my ($fset_a,$fset_b,$files_added,$files_removed,$file_pairs,$counter_type,$language_hash) = @_;
+    my ($fset_a,
+        $fset_b,
+        $files_added,
+        $files_removed,
+        $file_pairs,
+        $counter_type,
+        $language_hash,
+        $rh_Ignored) = @_;
+    print "-> count_filesets()\n" if $opt_v > 2;
     my @p_errors = ();
     my %p_alignment = ();
     my %p_ignored = ();
@@ -1882,12 +2446,27 @@ sub count_filesets {                         # {{{1
         #printf "%10s -> %s\n", $f, $Language{$fh[$F+1]}{$f};
         # Don't proceed unless the file (both L and R versions)
         # is in a known language.
+        next if $opt_include_ext
+            and not $Include_Ext{ file_extension($f) };
         next if $opt_include_lang
             and not $Include_Language{$Language{$fset_b}{$f}};
-        next if $Language{$fset_b}{$f} eq "(unknown)";
-        next if $Exclude_Language{$fset_b}{$f};
-        $p_alignment{"added"}{sprintf "  + %s ; %s\n", $f, $Language{$fset_b}{$f}} = 1;
-        ++$p_dbl{ $Language{$fset_b}{$f} }{'nFiles'}{'added'};
+        my $this_lang = $Language{$fset_b}{$f};
+        if (!defined  $Language{$fset_b}{$f}) {
+            # shouldn't happen but could get here if using
+            # --diff-list-file which bypasses earlier checks
+            $p_ignored{$f} = "empty or uncharacterizeable file";
+            next;
+        }
+        if ($this_lang eq "(unknown)") {
+            $p_ignored{$f} = "unknown language";
+            next;
+        }
+        if ($Exclude_Language{$this_lang}) {
+            $p_ignored{$f} = "--exclude-lang=$this_lang";
+            next;
+        }
+        $p_alignment{"added"}{sprintf "  + %s ; %s\n", $f, $this_lang} = 1;
+        ++$p_dbl{ $this_lang }{'nFiles'}{'added'};
         # Additionally, add contents of file $f to
         # Delta_by_File{$f}{comment/blank/code}{'added'}
         # Delta_by_Language{$lang}{comment/blank/code}{'added'}
@@ -1895,12 +2474,10 @@ sub count_filesets {                         # {{{1
         my ($all_line_count,
             $blank_count   ,
             $comment_count ,
-           ) = call_counter($f, $Language{$fset_b}{$f}, \@p_errors);
-        $p_dbl{ $Language{$fset_b}{$f} }{'comment'}{'added'} +=
-           $comment_count;
-        $p_dbl{ $Language{$fset_b}{$f} }{'blank'}{'added'}   +=
-           $blank_count;
-        $p_dbl{ $Language{$fset_b}{$f} }{'code'}{'added'}    +=
+           ) = call_counter($f, $this_lang, \@p_errors);
+        $p_dbl{ $this_lang }{'comment'}{'added'} += $comment_count;
+        $p_dbl{ $this_lang }{'blank'}{'added'}   += $blank_count;
+        $p_dbl{ $this_lang }{'code'}{'added'}    +=
            $all_line_count - $blank_count - $comment_count;
         $p_dbf{ $f }{'comment'}{'added'} = $comment_count;
         $p_dbf{ $f }{'blank'}{'added'}   = $blank_count;
@@ -1913,12 +2490,22 @@ sub count_filesets {                         # {{{1
         next if $already_counted{$f};
         # Don't proceed unless the file (both L and R versions)
         # is in a known language.
+        next if $opt_include_ext
+            and not $Include_Ext{ file_extension($f) };
         next if $opt_include_lang
-            and not $Include_Language{$Language{$fset_a}{$f}};
-        next if $Language{$fset_a}{$f} eq "(unknown)";
-        next if $Exclude_Language{$fset_a}{$f};
-        ++$p_dbl{ $Language{$fset_a}{$f} }{'nFiles'}{'removed'};
-        $p_alignment{"removed"}{sprintf "  - %s ; %s\n", $f, $Language{$fset_a}{$f}} = 1;
+            and (not defined $Language{$fset_a}{$f}
+             or  not defined $Include_Language{$Language{$fset_a}{$f}});
+        my $this_lang = $Language{$fset_a}{$f};
+        if ($this_lang eq "(unknown)") {
+            $p_ignored{$f} = "unknown language";
+            next;
+        }
+        if ($Exclude_Language{$this_lang}) {
+            $p_ignored{$f} = "--exclude-lang=$this_lang";
+            next;
+        }
+        ++$p_dbl{ $this_lang }{'nFiles'}{'removed'};
+        $p_alignment{"removed"}{sprintf "  - %s ; %s\n", $f, $this_lang} = 1;
         #printf "%10s -> %s\n", $f, $Language{$fh[$F  ]}{$f};
         # Additionally, add contents of file $f to
         #        Delta_by_File{$f}{comment/blank/code}{'removed'}
@@ -1927,12 +2514,10 @@ sub count_filesets {                         # {{{1
         my ($all_line_count,
             $blank_count   ,
             $comment_count ,
-           ) = call_counter($f, $Language{$fset_a}{$f}, \@p_errors);
-        $p_dbl{ $Language{$fset_a}{$f}}{'comment'}{'removed'} +=
-             $comment_count;
-        $p_dbl{ $Language{$fset_a}{$f}}{'blank'}{'removed'}   +=
-             $blank_count;
-        $p_dbl{ $Language{$fset_a}{$f}}{'code'}{'removed'}    +=
+           ) = call_counter($f, $this_lang, \@p_errors);
+        $p_dbl{ $this_lang}{'comment'}{'removed'} += $comment_count;
+        $p_dbl{ $this_lang}{'blank'}{'removed'}   += $blank_count;
+        $p_dbl{ $this_lang}{'code'}{'removed'}    +=
              $all_line_count - $blank_count - $comment_count;
         $p_dbf{ $f }{'comment'}{'removed'} = $comment_count;
         $p_dbf{ $f }{'blank'}{'removed'}   = $blank_count;
@@ -1950,16 +2535,29 @@ sub count_filesets {                         # {{{1
         my $file_R = $pair->[1];
         my $Lang_L = $Language{$fset_a}{$file_L};
         my $Lang_R = $Language{$fset_b}{$file_R};
+        if (!defined($Lang_L) or !defined($Lang_R)) {
+            print " -> count_filesets skipping $file_L, $file_R ",
+                  "because language cannot be inferred\n" if $opt_v;
+            next;
+        }
         #print "main step 6 file_L=$file_L    file_R=$file_R\n";
         ++$nCounted;
         printf "Counting:  %d\r", $nCounted
              unless ($counter_type or !$opt_progress_rate or ($nCounted % $opt_progress_rate));
-        next if $p_ignored{$file_L};
+        next if $p_ignored{$file_L} or $p_ignored{$file_R};
+
+        # filter out non-included extensions
+        if ($opt_include_ext  and not $Include_Ext{ file_extension($file_L) }
+                              and not $Include_Ext{ file_extension($file_R) }) {
+            $p_ignored{$file_L} = "not in --include-lang=$opt_include_ext";
+            $p_ignored{$file_R} = "not in --include-lang=$opt_include_ext";
+            next;
+        }
         # filter out non-included languages
         if ($opt_include_lang and not $Include_Language{$Lang_L}
-                          and not $Include_Language{$Lang_R}) {
-            $p_ignored{$file_L} = "--include-lang=$Lang_L";
-            $p_ignored{$file_R} = "--include-lang=$Lang_R";
+                              and not $Include_Language{$Lang_R}) {
+            $p_ignored{$file_L} = "not in --include-lang=$opt_include_lang";
+            $p_ignored{$file_R} = "not in --include-lang=$opt_include_lang";
             next;
         }
         # filter out excluded or unrecognized languages
@@ -1972,7 +2570,9 @@ sub count_filesets {                         # {{{1
         my $not_Filters_by_Language_Lang_LR = 0;
         #print "file_LR = [$file_L] [$file_R]\n";
         #print "Lang_LR = [$Lang_L] [$Lang_R]\n";
-        if (!(@{$Filters_by_Language{$Lang_L} }) or
+        if (($Lang_L eq "(unknown)") or
+            ($Lang_R eq "(unknown)") or
+            !(@{$Filters_by_Language{$Lang_L} }) or
             !(@{$Filters_by_Language{$Lang_R} })) {
             $not_Filters_by_Language_Lang_LR = 1;
         }
@@ -1987,6 +2587,21 @@ sub count_filesets {                         # {{{1
             next;
         }
 
+        # filter out explicitly excluded files
+        if ($opt_exclude_list_file and
+            ($rh_Ignored->{$file_L} or $rh_Ignored->{$file_R})) {
+            my $msg_2;
+            if ($rh_Ignored->{$file_L}) {
+                $msg_2 = "$file_L (paired to $file_R)";
+            } else {
+                $msg_2 = "$file_R (paired to $file_L)";
+            }
+            my $msg_1 = "in --exclude-list-file=$opt_exclude_list_file";
+            $p_ignored{$file_L} = "$msg_1, $msg_2";
+            $p_ignored{$file_R} = "$msg_1, $msg_2";
+            next;
+        }
+
         #print "DIFF($file_L, $file_R)\n";
         # step 0: compare the two files' contents
         chomp ( my @lines_L = read_file($file_L) );
@@ -2029,141 +2644,163 @@ sub count_filesets {                         # {{{1
             ++$n_file_pairs_compared;
         }
 
-        # step 1: identify comments in both files
-        #print "Diff blank removal L language= $Lang_L";
-        #print " scalar(lines_L)=", scalar @lines_L, "\n";
-        my @original_minus_blanks_L
-                = rm_blanks(  \@lines_L, $Lang_L, \%EOL_Continuation_re);
-        #print "1: scalar(original_minus_blanks_L)=", scalar @original_minus_blanks_L, "\n";
-        @lines_L    = @original_minus_blanks_L;
-        #print "2: scalar(lines_L)=", scalar @lines_L, "\n";
-        @lines_L    = add_newlines(\@lines_L); # compensate for rm_comments()
-        @lines_L    = rm_comments( \@lines_L, $Lang_L, $file_L,
-                                   \%EOL_Continuation_re);
-        #print "3: scalar(lines_L)=", scalar @lines_L, "\n";
+        my ($all_line_count_L, $blank_count_L   , $comment_count_L ,
+            $all_line_count_R, $blank_count_R   , $comment_count_R , )  = (0,0,0,0,0,0,);
+        if (!$contents_are_same) {
+            # step 1: identify comments in both files
+            #print "Diff blank removal L language= $Lang_L";
+            #print " scalar(lines_L)=", scalar @lines_L, "\n";
+            my @original_minus_blanks_L
+                    = rm_blanks(  \@lines_L, $Lang_L, \%EOL_Continuation_re);
+            #print "1: scalar(original_minus_blanks_L)=", scalar @original_minus_blanks_L, "\n";
+            @lines_L    = @original_minus_blanks_L;
+            #print "2: scalar(lines_L)=", scalar @lines_L, "\n";
+            @lines_L    = add_newlines(\@lines_L); # compensate for rm_comments()
+            @lines_L    = rm_comments( \@lines_L, $Lang_L, $file_L,
+                                       \%EOL_Continuation_re);
+            #print "3: scalar(lines_L)=", scalar @lines_L, "\n";
 
-        #print "Diff blank removal R language= $Lang_R\n";
-        my @original_minus_blanks_R
-                = rm_blanks(  \@lines_R, $Lang_R, \%EOL_Continuation_re);
-        @lines_R    = @original_minus_blanks_R;
-        @lines_R    = add_newlines(\@lines_R); # taken away by rm_comments()
-        @lines_R    = rm_comments( \@lines_R, $Lang_R, $file_R,
-                                   \%EOL_Continuation_re);
+            #print "Diff blank removal R language= $Lang_R\n";
+            my @original_minus_blanks_R
+                    = rm_blanks(  \@lines_R, $Lang_R, \%EOL_Continuation_re);
+            @lines_R    = @original_minus_blanks_R;
+            @lines_R    = add_newlines(\@lines_R); # taken away by rm_comments()
+            @lines_R    = rm_comments( \@lines_R, $Lang_R, $file_R,
+                                       \%EOL_Continuation_re);
 
-        my (@diff_LL, @diff_LR, );
-               array_diff( $file_L                  ,   # in
-                   \@original_minus_blanks_L ,   # in
-                   \@lines_L                 ,   # in
-                   "comment"                 ,   # in
-                   \@diff_LL, \@diff_LR      ,   # out
-                   \@p_errors);                    # in/out
+            my (@diff_LL, @diff_LR, );
+                   array_diff( $file_L                  ,   # in
+                       \@original_minus_blanks_L ,   # in
+                       \@lines_L                 ,   # in
+                       "comment"                 ,   # in
+                       \@diff_LL, \@diff_LR      ,   # out
+                       \@p_errors);                    # in/out
 
-        my (@diff_RL, @diff_RR, );
-                array_diff( $file_R                  ,   # in
-                   \@original_minus_blanks_R ,   # in
-                   \@lines_R                 ,   # in
-                   "comment"                 ,   # in
-                   \@diff_RL, \@diff_RR      ,   # out
-                   \@p_errors);                    # in/out
-        # each line of each file is now classified as
-        # code or comment
-        #use Data::Dumper;
-        #print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
-        #print Dumper("diff_RL", \@diff_RL, "diff_RR", \@diff_RR, );
-        #die;
+            my (@diff_RL, @diff_RR, );
+                    array_diff( $file_R                  ,   # in
+                       \@original_minus_blanks_R ,   # in
+                       \@lines_R                 ,   # in
+                       "comment"                 ,   # in
+                       \@diff_RL, \@diff_RR      ,   # out
+                       \@p_errors);                    # in/out
+            # each line of each file is now classified as
+            # code or comment
+            #use Data::Dumper;
+            #print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
+            #print Dumper("diff_RL", \@diff_RL, "diff_RR", \@diff_RR, );
+            #die;
 
-        # step 2: separate code from comments for L and R files
-        my @code_L = ();
-        my @code_R = ();
-        my @comm_L = ();
-        my @comm_R = ();
-        foreach my $line_info (@diff_LL) {
-            if      ($line_info->{'type'} eq "code"   ) {
-                push @code_L, $line_info->{char};
-            } elsif ($line_info->{'type'} eq "comment") {
-                push @comm_L, $line_info->{char};
-            } else {
-                die "Diff unexpected line type ",
-                    $line_info->{'type'}, "for $file_L line ",
-                    $line_info->{'lnum'};
+            # step 2: separate code from comments for L and R files
+            my @code_L = ();
+            my @code_R = ();
+            my @comm_L = ();
+            my @comm_R = ();
+            foreach my $line_info (@diff_LL) {
+                if      ($line_info->{'type'} eq "code"   ) {
+                    push @code_L, $line_info->{char};
+                } elsif ($line_info->{'type'} eq "comment") {
+                    push @comm_L, $line_info->{char};
+                } else {
+                    die "Diff unexpected line type ",
+                        $line_info->{'type'}, "for $file_L line ",
+                        $line_info->{'lnum'};
+                }
             }
-        }
 
-        foreach my $line_info (@diff_RL) {
-            if      ($line_info->{type} eq "code"   ) {
-                push @code_R, $line_info->{'char'};
-            } elsif ($line_info->{type} eq "comment") {
-                push @comm_R, $line_info->{'char'};
-            } else {
-                die "Diff unexpected line type ",
-                    $line_info->{'type'}, "for $file_R line ",
-                    $line_info->{'lnum'};
+            foreach my $line_info (@diff_RL) {
+                if      ($line_info->{type} eq "code"   ) {
+                    push @code_R, $line_info->{'char'};
+                } elsif ($line_info->{type} eq "comment") {
+                    push @comm_R, $line_info->{'char'};
+                } else {
+                    die "Diff unexpected line type ",
+                        $line_info->{'type'}, "for $file_R line ",
+                        $line_info->{'lnum'};
+                }
             }
-        }
 
-        if ($opt_ignore_whitespace) {
-            # strip all whitespace from each line of source code
-            # and comments then use these stripped arrays in diffs
-            foreach (@code_L) { s/\s+//g }
-            foreach (@code_R) { s/\s+//g }
-            foreach (@comm_L) { s/\s+//g }
-            foreach (@comm_R) { s/\s+//g }
-        }
-        if ($opt_ignore_case) {
-            # change all text to lowercase in diffs
-            foreach (@code_L) { $_ = lc }
-            foreach (@code_R) { $_ = lc }
-            foreach (@comm_L) { $_ = lc }
-            foreach (@comm_R) { $_ = lc }
-        }
-        # step 3: compute code diffs
-        array_diff("$file_L v. $file_R"   ,   # in
-                   \@code_L               ,   # in
-                   \@code_R               ,   # in
-                   "revision"             ,   # in
-                   \@diff_LL, \@diff_LR   ,   # out
-                   \@p_errors);                 # in/out
-        #print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
-        #print Dumper("diff_LR", \@diff_LR);
-        foreach my $line_info (@diff_LR) {
-            my $status = $line_info->{'desc'}; # same|added|removed|modified
-            ++$p_dbl{$Lang_L}{'code'}{$status};
+            if ($opt_ignore_whitespace) {
+                # strip all whitespace from each line of source code
+                # and comments then use these stripped arrays in diffs
+                foreach (@code_L) { s/\s+//g }
+                foreach (@code_R) { s/\s+//g }
+                foreach (@comm_L) { s/\s+//g }
+                foreach (@comm_R) { s/\s+//g }
+            }
+            if ($opt_ignore_case) {
+                # change all text to lowercase in diffs
+                foreach (@code_L) { $_ = lc }
+                foreach (@code_R) { $_ = lc }
+                foreach (@comm_L) { $_ = lc }
+                foreach (@comm_R) { $_ = lc }
+            }
+            # step 3: compute code diffs
+            array_diff("$file_L v. $file_R"   ,   # in
+                       \@code_L               ,   # in
+                       \@code_R               ,   # in
+                       "revision"             ,   # in
+                       \@diff_LL, \@diff_LR   ,   # out
+                       \@p_errors);                 # in/out
+            #print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
+            #print Dumper("diff_LR", \@diff_LR);
+            foreach my $line_info (@diff_LR) {
+                my $status = $line_info->{'desc'}; # same|added|removed|modified
+                ++$p_dbl{$Lang_L}{'code'}{$status};
+                if ($opt_by_file) {
+                    ++$p_dbf{$file_L}{'code'}{$status};
+                }
+            }
+            #use Data::Dumper;
+            #print Dumper("code diffs:", \@diff_LL, \@diff_LR);
+
+            # step 4: compute comment diffs
+            array_diff("$file_L v. $file_R"   ,   # in
+                       \@comm_L               ,   # in
+                       \@comm_R               ,   # in
+                       "revision"             ,   # in
+                       \@diff_LL, \@diff_LR   ,   # out
+                       \@Errors);                 # in/out
+            #print Dumper("comment diff_LR", \@diff_LR);
+            foreach my $line_info (@diff_LR) {
+                my $status = $line_info->{'desc'}; # same|added|removed|modified
+                ++$p_dbl{$Lang_L}{'comment'}{$status};
+                if ($opt_by_file) {
+                    ++$p_dbf{$file_L}{'comment'}{$status};
+                }
+            }
+            #print Dumper("comment diffs:", \@diff_LL, \@diff_LR);
+
+            # step 5: compute difference in blank lines (kind of pointless)
+            next if $Lang_L eq '(unknown)' or
+                    $Lang_R eq '(unknown)';
+            ($all_line_count_L,
+             $blank_count_L   ,
+             $comment_count_L ,
+            ) = call_counter($file_L, $Lang_L, \@Errors);
+
+            ($all_line_count_R,
+             $blank_count_R   ,
+             $comment_count_R ,
+            ) = call_counter($file_R, $Lang_R, \@Errors);
+        } else {
+            # L and R file contents are identical, no need to diff
+            ($all_line_count_L,
+             $blank_count_L   ,
+             $comment_count_L ,
+            ) = call_counter($file_L, $Lang_L, \@Errors);
+            $all_line_count_R = $all_line_count_L;
+            $blank_count_R    = $blank_count_L   ;
+            $comment_count_R  = $comment_count_L ;
+            my $code_lines_R  = $all_line_count_R - ($blank_count_R + $comment_count_R);
+            $p_dbl{$Lang_L}{'blank'}{'same'}   += $blank_count_R;
+            $p_dbl{$Lang_L}{'comment'}{'same'} += $comment_count_R;
+            $p_dbl{$Lang_L}{'code'}{'same'}    += $code_lines_R;
             if ($opt_by_file) {
-                ++$p_dbf{$file_L}{'code'}{$status};
+                $p_dbf{$file_L}{'blank'}{'same'}   += $blank_count_R;
+                $p_dbf{$file_L}{'comment'}{'same'} += $comment_count_R;
+                $p_dbf{$file_L}{'code'}{'same'}    += $code_lines_R;
             }
         }
-        #use Data::Dumper;
-        #print Dumper("code diffs:", \@diff_LL, \@diff_LR);
-
-        # step 4: compute comment diffs
-        array_diff("$file_L v. $file_R"   ,   # in
-                   \@comm_L               ,   # in
-                   \@comm_R               ,   # in
-                   "revision"             ,   # in
-                   \@diff_LL, \@diff_LR   ,   # out
-                   \@Errors);                 # in/out
-        #print Dumper("comment diff_LR", \@diff_LR);
-        foreach my $line_info (@diff_LR) {
-            my $status = $line_info->{'desc'}; # same|added|removed|modified
-            ++$p_dbl{$Lang_L}{'comment'}{$status};
-            if ($opt_by_file) {
-                ++$p_dbf{$file_L}{'comment'}{$status};
-            }
-        }
-        #print Dumper("comment diffs:", \@diff_LL, \@diff_LR);
-
-        # step 5: compute difference in blank lines (kind of pointless)
-        next if $Lang_L eq '(unknown)' or
-                $Lang_R eq '(unknown)';
-        my ($all_line_count_L,
-            $blank_count_L   ,
-            $comment_count_L ,
-           ) = call_counter($file_L, $Lang_L, \@Errors);
-
-        my ($all_line_count_R,
-            $blank_count_R   ,
-            $comment_count_R ,
-           ) = call_counter($file_R, $Lang_R, \@Errors);
 
         if ($blank_count_L <  $blank_count_R) {
             my $D = $blank_count_R - $blank_count_L;
@@ -2199,6 +2836,7 @@ sub count_filesets {                         # {{{1
         $p_rbl{$Lang_L}{'comment'} += $comment_count_L;
     }
 
+    print "<- count_filesets()\n" if $opt_v > 2;
     return {
         "ignored" => \%p_ignored,
         "errors"  => \@p_errors,
@@ -2236,7 +2874,7 @@ sub write_alignment_data {                   # {{{1
             push (@output, $pair);
         }
     }
-    write_file($filename, @output);
+    write_file($filename, {}, @output);
 } # 1}}}
 sub exclude_dir_validates {                  # {{{1
     my ($rh_Exclude_Dir) = @_;
@@ -2282,6 +2920,7 @@ sub process_exclude_list_file {              # {{{1
                 if $opt_v > 1;
         }
     }
+
     print "<- process_exclude_list_file\n" if $opt_v > 2;
 } # 1}}}
 sub combine_results {                        # {{{1
@@ -2297,6 +2936,7 @@ sub combine_results {                        # {{{1
     my $found_language = 0;
 
     foreach my $file (@{$ra_report_files}) {
+        my $n_results_found = 0;
         my $IN = new IO::File $file, "r";
         if (!defined $IN) {
             warn "Unable to read $file; ignoring.\n";
@@ -2310,7 +2950,7 @@ sub combine_results {                        # {{{1
                    (\d+)\s+         # blank
                    (\d+)\s+         # comments
                    (\d+)\s+         # code
-                   (                #    next four entries missing with -nno3
+                   (                #    next four entries missing with -no3
                    x\s+             # x
                    \d+\.\d+\s+      # scale
                    =\s+             # =
@@ -2337,12 +2977,13 @@ sub combine_results {                        # {{{1
                     $rhh_count->{$file}{'code'   } += $5;
                     $rhh_count->{$file}{'scaled' } += $7 if $opt_3;
                 }
+                ++$n_results_found;
             } elsif ($opt_by_file  and
                 m{^(.*?)\s+         # language
                    (\d+)\s+         # blank
                    (\d+)\s+         # comments
                    (\d+)\s+         # code
-                   (                #    next four entries missing with -nno3
+                   (                #    next four entries missing with -no3
                    x\s+             # x
                    \d+\.\d+\s+      # scale
                    =\s+             # =
@@ -2366,8 +3007,11 @@ sub combine_results {                        # {{{1
                     $rhh_count->{$file}{'code'   } += $4;
                     $rhh_count->{$file}{'scaled' } += $6 if $opt_3;
                 }
+                ++$n_results_found;
             }
         }
+        warn "No counts found in $file--is the file format correct?\n"
+            unless $n_results_found;
     }
     print "<- combine_results\n" if $opt_v > 2;
     return $found_language;
@@ -2387,6 +3031,7 @@ sub yaml_to_json_separators {                # {{{1
     # YAML and JSON are closely related.  Their differences can be captured
     # by trailing commas ($C), braces ($open_B, $close_B), and
     # quotes around text ($Q).
+    print "-> yaml_to_json_separators()\n" if $opt_v > 2;
     my ($Q, $open_B, $close_B, $start, $C);
     if ($opt_json) {
        $C       = ',';
@@ -2401,6 +3046,7 @@ sub yaml_to_json_separators {                # {{{1
        $close_B = '';
        $start   = "---\n# $URL\n";
     }
+    print "<- yaml_to_json_separators()\n" if $opt_v > 2;
     return ($Q, $open_B, $close_B, $start, $C);
 } # 1}}}
 sub diff_report     {                        # {{{1
@@ -2410,7 +3056,6 @@ sub diff_report     {                        # {{{1
     if ($opt_xml) {
         print "<- diff_report\n" if $opt_v > 2;
         return diff_xml_report(@_)
-###     return diff_xml_yaml_json_report(@_)
     } elsif ($opt_yaml) {
         print "<- diff_report\n" if $opt_v > 2;
         return diff_yaml_report(@_)
@@ -2428,6 +3073,19 @@ sub diff_report     {                        # {{{1
         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
         $rh_scale   , # in
        ) = @_;
+    my %orig_case = ();
+    if ($ON_WINDOWS and $report_type eq "by file") {
+        # restore the original upper/lowercase version of the file name
+        foreach my $lc_file (sort keys %{$rhhh_count}) {
+          foreach my $cat (sort keys %{$rhhh_count->{$lc_file}}) {
+            foreach my $S (qw(added same modified removed)) {
+                $orig_case{ $upper_lower_map{$lc_file} }{$cat}{$S} =
+                           $rhhh_count->{$lc_file}{$cat}{$S};
+            }
+          }
+        }
+        $rhhh_count = \%orig_case;
+    }
 
 #use Data::Dumper;
 #print "diff_report: ", Dumper($rhhh_count), "\n";
@@ -2457,7 +3115,7 @@ sub diff_report     {                        # {{{1
     if (!$opt_3) {
         $spacing_1 = 19;
         $spacing_2 = 14;
-        $spacing_3 = 28;
+        $spacing_3 = 27;
     }
     $spacing_0 += $column_1_offset;
     $spacing_1 += $column_1_offset;
@@ -2470,7 +3128,7 @@ sub diff_report     {                        # {{{1
                  'txt' => "\%-${spacing_3}s ",
                },
         '3' => { 'xml' => 'files_count="%d" ',
-                 'txt' => '%5d ',
+                 'txt' => '%6d ',
                },
         '4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
                  'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
@@ -2505,19 +3163,6 @@ sub diff_report     {                        # {{{1
         $first_column = "Report File";
     }
 
-    my $header_line  = sprintf "%s v %s", $URL, $version;
-    my $sum_files    = 1;
-    my $sum_lines    = 1;
-       $header_line .= sprintf("  T=%.2f s (%.1f files/s, %.1f lines/s)",
-                        $elapsed_sec           ,
-                        $sum_files/$elapsed_sec,
-                        $sum_lines/$elapsed_sec) unless $opt_sum_reports;
-    if ($Style eq "txt") {
-        push @results, output_header($header_line, $hyphen_line, $BY_FILE);
-    } elsif ($Style eq "csv") {
-        die "csv";
-    }
-
     # column headers
     if (!$opt_3 and $BY_FILE) {
         my $spacing_n = $spacing_1 - 11;
@@ -2542,15 +3187,18 @@ sub diff_report     {                        # {{{1
         push @results, $hyphen_line;
     }
 
-####foreach my $lang_or_file (keys %{$rhhh_count}) {
-####    $rhhh_count->{$lang_or_file}{'code'} = 0 unless
-####        defined $rhhh_count->{$lang_or_file}{'code'};
-####}
+    # sort diff output in descending order of cumulative entries
     foreach my $lang_or_file (sort {
-                                 $rhhh_count->{$b}{'code'} <=>
-                                 $rhhh_count->{$a}{'code'}
-                               }
-                          keys %{$rhhh_count}) {
+                                ($rhhh_count->{$b}{'code'}{'added'}    +
+                                 $rhhh_count->{$b}{'code'}{'same'}     +
+                                 $rhhh_count->{$b}{'code'}{'modified'} +
+                                 $rhhh_count->{$b}{'code'}{'removed'}  )  <=>
+                                ($rhhh_count->{$a}{'code'}{'added'}    +
+                                 $rhhh_count->{$a}{'code'}{'same'}     +
+                                 $rhhh_count->{$a}{'code'}{'modified'} +
+                                 $rhhh_count->{$a}{'code'}{'removed'})
+                              or $a cmp $b }
+                                    keys %{$rhhh_count}) {
 
         if ($BY_FILE) {
             push @results, rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
@@ -2590,13 +3238,17 @@ sub diff_report     {                        # {{{1
     }
     push @results, $hyphen_line;
     push @results, "SUM:";
+    my $sum_files    = 0;
+    my $sum_lines    = 0;
     foreach my $S (qw(same modified added removed)) {
         my $indent = $spacing_1 - 2;
         my $line .= sprintf " %-${indent}s", $S;
             if ($BY_FILE) {
                 $line .= sprintf "   ";
+                $sum_files += 1;
             } else {
                 $line .= sprintf "  %${spacing_2}s", $sum{'nFiles'}{$S};
+                $sum_files += $sum{'nFiles'}{$S};
             }
         if ($opt_by_percent) {
             my $DEN = compute_denominator($opt_by_percent,
@@ -2616,8 +3268,19 @@ sub diff_report     {                        # {{{1
                 $sum{'comment'}{$S} ,
                 $sum{'code'}{$S}    ;
         }
+        $sum_lines += $sum{'blank'}{$S} + $sum{'comment'}{$S} + $sum{'code'}{$S};
         push @results, $line;
     }
+
+    my $header_line  = sprintf "%s v %s", $URL, $version;
+       $header_line .= sprintf("  T=%.2f s (%.1f files/s, %.1f lines/s)",
+                        $elapsed_sec           ,
+                        $sum_files/$elapsed_sec,
+                        $sum_lines/$elapsed_sec) unless $opt_sum_reports or $opt_hide_rate;
+    if ($Style eq "txt") {
+        unshift @results, output_header($header_line, $hyphen_line, $BY_FILE);
+    }
+
     push @results, $hyphen_line;
     write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
     print "<- diff_report\n" if $opt_v > 2;
@@ -2634,18 +3297,20 @@ sub xml_yaml_or_json_header {                # {{{1
        $type        = "diff_" if $opt_diff;
     my $report_file = "";
     if ($opt_report_file) {
+        my $Fname = $opt_report_file;
+        $Fname =~ s{\\}{\\\\}g if $ON_WINDOWS;
         if ($opt_sum_reports) {
             if ($by_file) {
-                $report_file = "  <report_file>$opt_report_file.file</report_file>"
+                $report_file = "  <report_file>$Fname.file</report_file>"
             } else {
-                $report_file = "  <report_file>$opt_report_file.lang</report_file>"
+                $report_file = "  <report_file>$Fname.lang</report_file>"
             }
         } else {
-            $report_file = "  <report_file>$opt_report_file</report_file>"
+            $report_file = "  <report_file>$Fname</report_file>"
         }
     }
     if ($opt_xml) {
-        $header = "<?xml version=\"1.0\"?>";
+        $header = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
         $header .= "\n<?xml-stylesheet type=\"text/xsl\" href=\"" . $opt_xsl . "\"?>" if $opt_xsl;
         $header .= "<${type}results>
 <header>
@@ -2670,14 +3335,16 @@ sub xml_yaml_or_json_header {                # {{{1
   ${Q}files_per_second${Q}   : $file_rate${C}
   ${Q}lines_per_second${Q}   : $line_rate";
         if ($opt_report_file) {
+            my $Fname = $opt_report_file;
+            $Fname =~ s{\\}{\\\\}g if $ON_WINDOWS;
             if ($opt_sum_reports) {
                 if ($by_file) {
-                    $header .= "$C\n  ${Q}report_file${Q}        : ${Q}$opt_report_file.file${Q}"
+                    $header .= "$C\n  ${Q}report_file${Q}        : ${Q}$Fname.file${Q}"
                 } else {
-                    $header .= "$C\n  ${Q}report_file${Q}        : ${Q}$opt_report_file.lang${Q}"
+                    $header .= "$C\n  ${Q}report_file${Q}        : ${Q}$Fname.lang${Q}"
                 }
             } else {
-                $header .= "$C\n  ${Q}report_file${Q}        : ${Q}$opt_report_file${Q}";
+                $header .= "$C\n  ${Q}report_file${Q}        : ${Q}$Fname${Q}";
             }
         }
         $header .= "${close_B}${C}";
@@ -2709,7 +3376,9 @@ sub diff_yaml_report {                       # {{{1
     foreach my $S (qw(added same modified removed)) {
         push @results, "$S :";
         foreach my $F_or_L (keys %{$rhhh_count}) {
-            push @results, "  $F_or_L :";
+            # force quoted language or filename in case these
+            # have embedded funny characters, issue #312
+            push @results, "  '" . rm_leading_tempdir($F_or_L, \%TEMP_DIR) . "' :";
             foreach my $k (keys %{$rhhh_count->{$F_or_L}}) {
                 next if $k eq "lang"; # present only in those cases
                                       # where code exists for action $S
@@ -2757,7 +3426,7 @@ sub diff_json_report {                       # {{{1
     foreach my $S (qw(added same modified removed)) {
         push @results, " \"$S\" : {";
         foreach my $F_or_L (keys %{$rhhh_count}) {
-            push @results, "  \"$F_or_L\" : {";
+            push @results, "  \"" . rm_leading_tempdir($F_or_L, \%TEMP_DIR) . "\" : {";
             foreach my $k (keys %{$rhhh_count->{$F_or_L}}) {
                 next if $k eq "lang"; # present only in those cases
                                       # where code exists for action $S
@@ -2934,9 +3603,6 @@ sub diff_csv_report {                        # {{{1
        ) = @_;
     print "-> diff_csv_report\n" if $opt_v > 2;
 
-#use Data::Dumper;
-#print "diff_csv_report: ", Dumper($rhhh_count), "\n";
-#die;
     my @results       = ();
     my $languages     = ();
 
@@ -2950,6 +3616,7 @@ sub diff_csv_report {                        # {{{1
     }
     my $DELIM = ",";
        $DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
+       $DELIM = "|" if defined $opt_md;
 
     $elapsed_sec = 0.5 unless $elapsed_sec;
 
@@ -2961,8 +3628,25 @@ sub diff_csv_report {                        # {{{1
             $line .= "$symbol $item${DELIM} ";
         }
     }
-    $line .= "\"$URL v $version T=$elapsed_sec s\"";
-    push @results, $line;
+
+    my $T_elapsed_sec = "T=$elapsed_sec s";
+       $T_elapsed_sec = "" if $opt_hide_rate;
+
+    if ($opt_md) {
+        push @results, "cloc|$URL v $version $T_elapsed_sec";
+        push @results, "--- | ---";
+        push @results, "";
+        push @results, $line;
+        my @col_header  = ();
+        push @col_header, ":-------";
+        foreach (1..16) {
+            push @col_header, "-------:";
+        }
+        push @results, join("|", @col_header) . "|";
+    } else {
+        $line .= "\"$URL v $version $T_elapsed_sec\"";
+        push @results, $line;
+    }
 
     foreach my $lang_or_file (keys %{$rhhh_count}) {
         $rhhh_count->{$lang_or_file}{'code'}{'added'} = 0 unless
@@ -3042,7 +3726,13 @@ sub rm_leading_tempdir {                     # {{{1
             last;
         }
     }
-    $clean_filename =~ s{/}{\\}g if $ON_WINDOWS; # then go back from / to \
+    if ($ON_WINDOWS and $opt_by_file) { # then go back from / to \
+        if ($opt_json) {
+            $clean_filename =~ s{/}{\\\\}g;
+        } else {
+            $clean_filename =~ s{/}{\\}g;
+        }
+    }
     return $clean_filename;
 } # 1}}}
 sub generate_sql    {                        # {{{1
@@ -3114,10 +3804,14 @@ create table t        (
     }
     print $fh $schema unless defined $opt_sql_append;
 
+    my $insert_into_t = "insert into t ";
     if ($opt_sql_style eq "oracle") {
         printf $fh "insert into metadata values(TO_TIMESTAMP('%s','yyyy-mm-dd hh24:mi:ss'), '%s', %f);\n",
                     strftime("%Y-%m-%d %H:%M:%S", localtime(time())),
                     $opt_sql_project, $elapsed_sec;
+    } elsif ($opt_sql_style eq "named_columns") {
+        print $fh "begin transaction;\n";
+        $insert_into_t .= "( Project, Language, File, File_dirname, File_basename, nBlank, nComment, nCode, nScaled )";
     } else {
         print $fh "begin transaction;\n";
         printf $fh "insert into metadata values('%s', '%s', %f);\n",
@@ -3138,7 +3832,7 @@ create table t        (
         $clean_filename =~ s/\'/''/g;  # double embedded single quotes
                                        # to escape them
 
-        printf $fh "insert into t values('%s', '%s', '%s', '%s', '%s', " .
+        printf $fh "$insert_into_t values('%s', '%s', '%s', '%s', '%s', " .
                    "%d, %d, %d, %f);\n",
                     $opt_sql_project           ,
                     $language                  ,
@@ -3182,7 +3876,7 @@ sub output_header   {                        # {{{1
     my @R = ();
     if      ($opt_xml) {
         if (!$ALREADY_SHOWED_XML_SECTION) {
-            push @R, "<?xml version=\"1.0\"?>";
+            push @R, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
             push @R, '<?xml-stylesheet type="text/xsl" href="' .
                             $opt_xsl . '"?>' if $opt_xsl;
             push @R, "<results>";
@@ -3218,9 +3912,22 @@ sub generate_report {                        # {{{1
         $elapsed_sec, # in
         $report_type, # in  "by language" | "by report file" | "by file"
         $rhh_count  , # in  count{TYPE}{nFiles|code|blank|comment|scaled}
+                      #       where TYPE = name of language, source file,
+                      #                    or report file
         $rh_scale   , # in
        ) = @_;
 
+    my %orig_case = ();
+    if ($ON_WINDOWS and $report_type eq "by file") {
+        # restore the original upper/lowercase version of the file name
+        foreach my $lc_file (sort keys %{$rhh_count}) {
+            foreach my $cat (sort keys %{$rhh_count->{$lc_file}}) {
+                $orig_case{ $upper_lower_map{$lc_file} }{$cat} =
+                           $rhh_count->{$lc_file}{$cat};
+            }
+        }
+        $rhh_count = \%orig_case;
+    }
     print "-> generate_report\n" if $opt_v > 2;
     my $DELIM = ",";
        $DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
@@ -3254,7 +3961,7 @@ sub generate_report {                        # {{{1
     if (!$opt_3) {
         $spacing_1 = 19;
         $spacing_2 = 14;
-        $spacing_3 = 28;
+        $spacing_3 = 27;
     }
     $spacing_0 += $column_1_offset;
     $spacing_1 += $column_1_offset;
@@ -3267,7 +3974,7 @@ sub generate_report {                        # {{{1
                  'txt' => "\%-${spacing_3}s ",
                },
         '3' => { 'xml' => 'files_count="%d" ',
-                 'txt' => '%5d ',
+                 'txt' => '%6d ',
                },
         '4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
                  'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
@@ -3308,12 +4015,15 @@ sub generate_report {                        # {{{1
        $header_line .= sprintf("  T=%.2f s (%.1f files/s, %.1f lines/s)",
                         $elapsed_sec           ,
                         $sum_files/$elapsed_sec,
-                        $sum_lines/$elapsed_sec) unless $opt_sum_reports;
+                        $sum_lines/$elapsed_sec) unless $opt_sum_reports or $opt_hide_rate;
     if ($opt_xml or $opt_yaml or $opt_json) {
         if (!$ALREADY_SHOWED_HEADER) {
+            if ($opt_by_file_by_lang and $opt_json) {
+                push @results, '{ "by_file" : ';
+            }
             push @results, xml_yaml_or_json_header($URL, $version, $elapsed_sec,
                                                    $sum_files, $sum_lines, $BY_FILE);
-            $ALREADY_SHOWED_HEADER = 1 unless $opt_sum_reports;
+#           $ALREADY_SHOWED_HEADER = 1 unless $opt_sum_reports;
             # --sum-reports yields two xml or yaml files, one by
             # language and one by report file, each of which needs a header
         }
@@ -3386,15 +4096,12 @@ sub generate_report {                        # {{{1
     }
 
     my $sum_scaled = 0;
-####foreach my $lang_or_file (keys %{$rhh_count}) {
-####    $rhh_count->{$lang_or_file}{'code'} = 0 unless
-####        defined $rhh_count->{$lang_or_file}{'code'};
-####}
     foreach my $lang_or_file (sort {
                                  $rhh_count->{$b}{'code'} <=>
                                  $rhh_count->{$a}{'code'}
-                               }
-                          keys %{$rhh_count}) {
+                              or $a cmp $b
+                                        }
+                                   keys %{$rhh_count}) {
         next if $lang_or_file eq "by report file";
         my ($factor, $scaled);
         if ($BY_LANGUAGE or $BY_FILE) {
@@ -3473,7 +4180,13 @@ sub generate_report {                        # {{{1
             }
         } elsif ($opt_yaml or $opt_json) {
             my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
-            push @results,"${Q}$lang_or_file${Q} :$open_B";
+            if ($opt_yaml) {
+                # YAML: force quoted language or filename in case these
+                #       have embedded funny characters, issue #312
+                push @results,"'" . rm_leading_tempdir($lang_or_file, \%TEMP_DIR). "' :$open_B";
+            } else {
+                push @results,"${Q}" . rm_leading_tempdir($lang_or_file, \%TEMP_DIR). "${Q} :$open_B";
+            }
             push @results,"  ${Q}nFiles${Q}: " . $rhh_count->{$lang_or_file}{'nFiles'} . $C
                 unless $BY_FILE;
             if ($opt_by_percent) {
@@ -3609,9 +4322,39 @@ sub generate_report {                        # {{{1
         }
         if ($opt_json) {
             $results[-1] =~ s/,\s*$/} }/;
+            if ($opt_by_file_by_lang) {
+                if ($ALREADY_SHOWED_HEADER) {
+                    $results[-1] .= ' }';
+                } else {
+                    $results[-1] .= ', "by_lang" : {';
+                }
+            }
         }
     } elsif ($opt_csv) {
-        # do nothing
+        my @entries = ();
+        if ($opt_by_file) {
+            push @entries, "SUM";
+            push @entries, "";
+        } else {
+            push @entries, $sum_files;
+            push @entries, "SUM";
+        }
+        if ($opt_by_percent) {
+            my $DEN = compute_denominator($opt_by_percent    ,
+                          $sum_code, $sum_comment, $sum_blank);
+            push @entries, sprintf("%.2f", $sum_blank   / $DEN * 100);
+            push @entries, sprintf("%.2f", $sum_comment / $DEN * 100);
+        } else {
+            push @entries, $sum_blank;
+            push @entries, $sum_comment;
+        }
+        push @entries, $sum_code;
+        if ($opt_3) {
+            push @entries, $sum_scaled;
+            push @entries, $avg_scale ;
+        }
+        my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ",";
+        push @results, join($separator, @entries);
     } else {
 
         if ($BY_FILE) {
@@ -3652,6 +4395,7 @@ sub generate_report {                        # {{{1
         }
     }
     write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
+    $ALREADY_SHOWED_HEADER = 1 unless $opt_sum_reports;
     print "<- generate_report\n" if $opt_v > 2;
     return @results;
 } # 1}}}
@@ -3685,61 +4429,63 @@ sub write_lang_def {                         # {{{1
        ) = @_;
 
     print "-> write_lang_def($file)\n" if $opt_v > 2;
-    my $OUT = new IO::File $file, "w";
-    die "Unable to write to $file\n" unless defined $OUT;
+    my @outlines = ();
 
     foreach my $language (sort keys %{$rhaa_Filters_by_Language}) {
-        next if $language =~ /Brain/;
-        next if $language eq "MATLAB/Mathematica/Objective C/MUMPS/Mercury" or
-                $language eq "PHP/Pascal"                       or
-                $language eq "Pascal/Puppet"                    or
-                $language eq "Lisp/OpenCL"                      or
-                $language eq "Lisp/Julia"                       or
-                $language eq "Perl/Prolog"                      or
-                $language eq "D/dtrace"                         or
-                $language eq "IDL/Qt Project/Prolog/ProGuard"   or
-                $language eq "Fortran 77/Forth"                 or
-                $language eq "F#/Forth"                         or
-                $language eq "Verilog-SystemVerilog/Coq"        or
-                $language eq "TypeScript/Qt Linguist"           or
-                $language eq "Qt/Glade"                         or
-                $language eq "C#/Smalltalk"                     or
-                $language eq "(unknown)";
-        printf $OUT "%s\n", $language;
+        next if $language =~ /(Brain|\(unknown\))/;
+        next if defined $Extension_Collision{$language};
+        push @outlines, $language;
         foreach my $filter (@{$rhaa_Filters_by_Language->{$language}}) {
-            printf $OUT "    filter %s", $filter->[0];
-            printf $OUT " %s", $filter->[1] if defined $filter->[1];
+            my $line = "";
+            $line .= sprintf "    filter %s", $filter->[0];
+            $line .= sprintf " %s", $filter->[1] if defined $filter->[1];
             # $filter->[0] == 'remove_between_general',
             #                 'remove_between_regex', and
             #                 'remove_matches_2re' have two args
-            printf $OUT " %s", $filter->[2] if defined $filter->[2];
-            # $filter->[0] == 'replace_between_regex' has three args
-            printf $OUT " %s", $filter->[3] if defined $filter->[3];
-            print  $OUT "\n";
+            $line .= sprintf " %s", $filter->[2] if defined $filter->[2];
+            # $filter->[0] == 'replace_between_regex' has three or four args
+            $line .= sprintf " %s", $filter->[3] if defined $filter->[3];
+            $line .= sprintf " %s", $filter->[4] if defined $filter->[4];
+            push @outlines, $line;
         }
+
+        # file extension won't appear if the extension maps to
+        # multiple languages; work around this
+        my $found = 0;
         foreach my $ext (sort keys %{$rh_Language_by_Extension}) {
             if ($language eq $rh_Language_by_Extension->{$ext}) {
-                printf $OUT "    extension %s\n", $ext;
+                push @outlines, sprintf "    extension %s\n", $ext;
+                $found = 1;
             }
         }
+        if (!$found and $opt_write_lang_def_incl_dup) {
+            foreach my $multilang (sort keys %Extension_Collision) {
+                my %Languages = map { $_ => 1 } split('/', $multilang);
+                next unless $Languages{$language};
+                foreach my $ext (@{$Extension_Collision{$multilang}}) {
+                    push @outlines, sprintf "    extension %s\n", $ext;
+                }
+            }
+        }
+
         foreach my $filename (sort keys %{$rh_Language_by_File}) {
             if ($language eq $rh_Language_by_File->{$filename}) {
-                printf $OUT "    filename %s\n", $filename;
+                push @outlines, sprintf "    filename %s\n", $filename;
             }
         }
         foreach my $script_exe (sort keys %{$rh_Language_by_Script}) {
             if ($language eq $rh_Language_by_Script->{$script_exe}) {
-                printf $OUT "    script_exe %s\n", $script_exe;
+                push @outlines, sprintf "    script_exe %s\n", $script_exe;
             }
         }
-        printf $OUT "    3rd_gen_scale %.2f\n", $rh_Scale_Factor->{$language};
+        push @outlines, sprintf "    3rd_gen_scale %.2f\n", $rh_Scale_Factor->{$language};
         if (defined $rh_EOL_Continuation_re->{$language}) {
-            printf $OUT "    end_of_line_continuation %s\n",
+            push @outlines, sprintf "    end_of_line_continuation %s\n",
                 $rh_EOL_Continuation_re->{$language};
         }
     }
 
-    $OUT->close;
+    write_file($file, {}, @outlines);
     print "<- write_lang_def\n" if $opt_v > 2;
 } # 1}}}
 sub read_lang_def {                          # {{{1
@@ -3757,13 +4503,14 @@ sub read_lang_def {                          # {{{1
 
 
     print "-> read_lang_def($file)\n" if $opt_v > 2;
-    my $IN = new IO::File $file, "r";
-    die "Unable to read $file.\n" unless defined $IN;
 
     my $language = "";
-    while (<$IN>) {
+    my @lines = read_file($file);
+    foreach (@lines) {
         next if /^\s*#/ or /^\s*$/;
 
+        $_ = lc $_ if $ON_WINDOWS and /^\s+(filename|extension)\s/;
+
         if (/^(\w+.*?)\s*$/) {
             $language = $1;
             next;
@@ -3776,11 +4523,25 @@ sub read_lang_def {                          # {{{1
             push @{$rhaa_Filters_by_Language->{$language}}, [
                   $1 , $3 , $4 ]
 
+        } elsif (/^\s{4}filter\s+(replace_between_regex)
+                   \s+(\S+)\s+(\S+)\s+(\S+|\".*\")\s+(\S+)\s*$/x) {
+            push @{$rhaa_Filters_by_Language->{$language}}, [
+                  $1 , $2 , $3 , $4 , $5]
+
         } elsif (/^\s{4}filter\s+(replace_between_regex)
                        \s+(\S+)\s+(\S+)\s+(.*?)\s*$/x) {
             push @{$rhaa_Filters_by_Language->{$language}}, [
                   $1 , $2 , $3 , $4 ]
 
+        } elsif (/^\s{4}filter\s+(replace_regex)\s+(\S+)\s*$/) {
+            push @{$rhaa_Filters_by_Language->{$language}}, [
+                  $1 , $2 , '' ]
+
+        } elsif (/^\s{4}filter\s+(replace_regex)
+                       \s+(\S+)\s+(.+?)\s*$/x) {
+            push @{$rhaa_Filters_by_Language->{$language}}, [
+                  $1 , $2 , $3 ]
+
         } elsif (/^\s{4}filter\s+(\w+)\s*$/) {
             push @{$rhaa_Filters_by_Language->{$language}}, [ $1 ]
 
@@ -3814,7 +4575,6 @@ sub read_lang_def {                          # {{{1
         }
 
     }
-    $IN->close;
     print "<- read_lang_def\n" if $opt_v > 2;
 } # 1}}}
 sub merge_lang_def {                         # {{{1
@@ -3832,14 +4592,15 @@ sub merge_lang_def {                         # {{{1
 
 
     print "-> merge_lang_def($file)\n" if $opt_v > 2;
-    my $IN = new IO::File $file, "r";
-    die "Unable to read $file.\n" unless defined $IN;
 
     my $language        = "";
     my $already_know_it = undef;
-    while (<$IN>) {
+    my @lines = read_file($file);
+    foreach (@lines) {
         next if /^\s*#/ or /^\s*$/;
 
+        $_ = lc $_ if $ON_WINDOWS and /^\s+(filename|extension)\s/;
+
         if (/^(\w+.*?)\s*$/) {
             $language = $1;
             $already_know_it = defined $rh_Scale_Factor->{$language};
@@ -3848,19 +4609,39 @@ sub merge_lang_def {                         # {{{1
         die "Missing computer language name, line $. of $file\n"
             unless $language;
 
-        if      (/^    filter\s+(\w+)\s*$/) {
+        if      (/^\s{4}filter\s+(remove_between_(general|2re|regex))
+                       \s+(\S+)\s+(\S+)\s*$/x) {
+            next if $already_know_it;
+            push @{$rhaa_Filters_by_Language->{$language}}, [
+                  $1 , $3 , $4 ]
+
+        } elsif (/^\s{4}filter\s+(replace_between_regex)
+                   \s+(\S+)\s+(\S+)\s+(\S+|\".*\")\s+(\S+)\s*$/x) {
+            next if $already_know_it;
+            push @{$rhaa_Filters_by_Language->{$language}}, [
+                  $1 , $2 , $3 , $4 , $5]
+
+        } elsif (/^\s{4}filter\s+(replace_between_regex)
+                       \s+(\S+)\s+(\S+)\s+(.*?)\s*$/x) {
+            next if $already_know_it;
+            push @{$rhaa_Filters_by_Language->{$language}}, [
+                  $1 , $2 , $3 , $4 ]
+
+        } elsif (/^\s{4}filter\s+(replace_regex)
+                       \s+(\S+)\s+(.+?)\s*$/x) {
+            next if $already_know_it;
+            push @{$rhaa_Filters_by_Language->{$language}}, [
+                  $1 , $2 , $3 ]
+
+        } elsif (/^\s{4}filter\s+(\w+)\s*$/) {
             next if $already_know_it;
             push @{$rhaa_Filters_by_Language->{$language}}, [ $1 ]
 
-        } elsif (/^    filter\s+(remove_between_(general|regex))\s+(\S+)\s+(\S+)\s*$/) {
-            next if $already_know_it;
-            push @{$rhaa_Filters_by_Language->{$language}},
-                  [ $1 , $3 , $4 ]
-        } elsif (/^    filter\s+(\w+)\s+(.*?)\s*$/) {
+        } elsif (/^\s{4}filter\s+(\w+)\s+(.*?)\s*$/) {
             next if $already_know_it;
             push @{$rhaa_Filters_by_Language->{$language}}, [ $1 , $2 ]
 
-        } elsif (/^    extension\s+(\S+)\s*$/) {
+        } elsif (/^\s{4}extension\s+(\S+)\s*$/) {
             next if $already_know_it;
             if (defined $rh_Language_by_Extension->{$1}) {
                 die "File extension collision:  $1 ",
@@ -3871,19 +4652,19 @@ sub merge_lang_def {                         # {{{1
             }
             $rh_Language_by_Extension->{$1} = $language;
 
-        } elsif (/^    filename\s+(\S+)\s*$/) {
+        } elsif (/^\s{4}filename\s+(\S+)\s*$/) {
             next if $already_know_it;
             $rh_Language_by_File->{$1} = $language;
 
-        } elsif (/^    script_exe\s+(\S+)\s*$/) {
+        } elsif (/^\s{4}script_exe\s+(\S+)\s*$/) {
             next if $already_know_it;
             $rh_Language_by_Script->{$1} = $language;
 
-        } elsif (/^    3rd_gen_scale\s+(\S+)\s*$/) {
+        } elsif (/^\s{4}3rd_gen_scale\s+(\S+)\s*$/) {
             next if $already_know_it;
             $rh_Scale_Factor->{$language} = $1;
 
-        } elsif (/^    end_of_line_continuation\s+(\S+)\s*$/) {
+        } elsif (/^\s{4}end_of_line_continuation\s+(\S+)\s*$/) {
             next if $already_know_it;
             $rh_EOL_Continuation_re->{$language} = $1;
 
@@ -3892,20 +4673,19 @@ sub merge_lang_def {                         # {{{1
         }
 
     }
-    $IN->close;
     print "<- merge_lang_def\n" if $opt_v > 2;
 } # 1}}}
 sub print_extension_info {                   # {{{1
     my ($extension,) = @_;
     if ($extension) {  # show information on this extension
-        foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
+        foreach my $ext (sort {lc $a cmp lc $b or $a cmp $b } keys %Language_by_Extension) {
             # Language_by_Extension{f}    = 'Fortran 77'
             next if $Language_by_Extension{$ext} =~ /Brain/;
             printf "%-15s -> %s\n", $ext, $Language_by_Extension{$ext}
                 if $ext =~ m{$extension}i;
         }
     } else {           # show information on all  extensions
-        foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
+        foreach my $ext (sort {lc $a cmp lc $b or $a cmp $b } keys %Language_by_Extension) {
             next if $Language_by_Extension{$ext} =~ /Brain/;
             # Language_by_Extension{f}    = 'Fortran 77'
             printf "%-15s -> %s\n", $ext, $Language_by_Extension{$ext};
@@ -3917,26 +4697,26 @@ sub print_language_info {                    # {{{1
         $prefix ,) = @_;
     my %extensions = (); # the subset matched by the given $language value
     if ($language) {  # show information on this language
-        foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
+        foreach my $ext (sort {lc $a cmp lc $b or $a cmp $b } keys %Language_by_Extension) {
             # Language_by_Extension{f}    = 'Fortran 77'
             push @{$extensions{$Language_by_Extension{$ext}} }, $ext
                 if lc $Language_by_Extension{$ext} eq lc $language;
 #               if $Language_by_Extension{$ext} =~ m{$language}i;
         }
     } else {          # show information on all  languages
-        foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) {
+        foreach my $ext (sort {lc $a cmp lc $b  or $a cmp $b } keys %Language_by_Extension) {
             # Language_by_Extension{f}    = 'Fortran 77'
             push @{$extensions{$Language_by_Extension{$ext}} }, $ext
         }
     }
 
     # add exceptions (one file extension mapping to multiple languages)
-    if (!$language or $language =~ /^(Objective C|MATLAB|Mathematica|MUMPS|Mercury)$/i) {
-        push @{$extensions{'Objective C'}}, "m";
+    if (!$language or $language =~ /^(Objective-C|MATLAB|Mathematica|MUMPS|Mercury)$/i) {
+        push @{$extensions{'Objective-C'}}, "m";
         push @{$extensions{'MATLAB'}}     , "m";
         push @{$extensions{'Mathematica'}}, "m";
         push @{$extensions{'MUMPS'}}      , "m";
-        delete $extensions{'MATLAB/Mathematica/Objective C/MUMPS/Mercury'};
+        delete $extensions{'MATLAB/Mathematica/Objective-C/MUMPS/Mercury'};
     }
     if (!$language or $language =~ /^(Lisp|OpenCL)$/i) {
         push @{$extensions{'Lisp'}}  , "cl";
@@ -3953,6 +4733,11 @@ sub print_language_info {                    # {{{1
         push @{$extensions{'Prolog'}}, "pl";
         delete $extensions{'Perl/Prolog'};
     }
+    if (!$language or $language =~ /^(Raku|Prolog)$/i) {
+        push @{$extensions{'Perl'}}  , "p6";
+        push @{$extensions{'Prolog'}}, "p6";
+        delete $extensions{'Perl/Prolog'};
+    }
     if (!$language or $language =~ /^(IDL|Qt Project|Prolog|ProGuard)$/i) {
         push @{$extensions{'IDL'}}       , "pro";
         push @{$extensions{'Qt Project'}}, "pro";
@@ -4002,10 +4787,21 @@ sub print_language_info {                    # {{{1
         push @{$extensions{'Smalltalk'}}    , "cs";
         delete $extensions{'C#/Smalltalk'};
     }
+    if (!$language or $language =~ /^(Visual\s+Basic|TeX|Apex\s+Class)$/i) {
+        push @{$extensions{'Visual Basic'}} , "cls";
+        push @{$extensions{'TeX'}}          , "cls";
+        push @{$extensions{'Apex Class'}}   , "cls";
+        delete $extensions{'Visual Basic/TeX/Apex Class'};
+    }
     if (!$language or $language =~ /^(Ant)$/i) {
         push @{$extensions{'Ant'}}  , "build.xml";
         delete $extensions{'Ant/XML'};
     }
+    if (!$language or $language =~ /^(Scheme|SaltStack)$/i) {
+        push @{$extensions{'Scheme'}}    , "sls";
+        push @{$extensions{'SaltStack'}} , "sls";
+        delete $extensions{'Scheme/SaltStack'};
+    }
     if ($opt_explain) {
         return unless $extensions{$language};
         if ($prefix) {
@@ -4039,12 +4835,36 @@ sub print_language_filters {                 # {{{1
         printf "    filter %s", $filter->[0];
         printf "  %s", $filter->[1] if defined $filter->[1];
         printf "  %s", $filter->[2] if defined $filter->[2];
+        printf "  %s", $filter->[3] if defined $filter->[3];
+        printf "  %s", $filter->[4] if defined $filter->[4];
         print  "\n";
     }
     print_language_info($language, "    extensions:");
 } # 1}}}
+sub top_level_SMB_dir {                      # {{{1
+    # Ref https://github.com/AlDanial/cloc/issues/392, if the
+    # user supplies a directory name which is an SMB mount
+    # point, this directory will appear to File::Find as
+    # though it is empty unless $File::Find::dont_use_nlink
+    # is set to 1.  This subroutine checks to see if any SMB
+    # mounts (identified from stat()'s fourth entry, nlink,
+    # having a value of 2) were passed in on the command line.
+
+    my ($ra_arg_list,) = @_;  # in user supplied file name, directory name, git hash, etc
+    foreach my $entry (@{$ra_arg_list}) {
+        next unless -d $entry;
+        # gets here if $entry is a directory; now get its nlink value
+        my @stats = stat($entry);
+        my $nlink = $stats[3];
+        return 1 if $nlink == 2;  # meaning it is an SMB mount
+    }
+    return 0;
+}
+# 1}}}
 sub replace_git_hash_with_tarfile {          # {{{1
-    my ($ra_arg_list,) = @_;  # in   file name, directory name and/or git commit hash to examine
+    my ($ra_arg_list,             # in  file name, directory name and/or
+                                  #     git commit hash to examine
+        $ra_git_similarity) = @_; # out only if --opt-git-diff-simindex
     # replace git hashes in $ra_arg_list with tar files
     # Diff mode and count mode behave differently:
     #   Diff:
@@ -4052,16 +4872,28 @@ sub replace_git_hash_with_tarfile {          # {{{1
     #          Extract file from the git repo and only compare to it.
     #       git_hash1  git_hash2
     #          Get listings of all files in git_hash1 and git_hash2.
+    #            git ls-tree --name-only -r *git_hash1*
+    #            git ls-tree --name-only -r *git_hash2*
     #          Next, get listings of files that changed with git_hash1
-    #          and git_hash2.  Finally, make two tar files of
-    #          git repos1 and 2 where the file listing is the union
-    #          of changes.
+    #          and git_hash2.
+    #            git diff-tree -r --no-commit-id --name-only *git_hash1* *git_hash2*
+    #          Finally, make two tar files of git repos1 and 2 where the file
+    #          listing is the union of changes.
+    #            git archive -o tarfile1 *git_hash1* \
+    #               <union of files that changed and exist in this commit>
+    #            git archive -o tarfile2 *git_hash2* \
+    #               <union of files that changed and exist in this commit>
+    #          To avoid "Argument list too long" error, repeat the git
+    #          archive step with chunks of 30,000 files at a time then
+    #          merge the tar files as the final step.
     #   Regular count:
     #       Simply make a tar file of all files in the git repo.
 
+    my $prt_args = join(",", @{$ra_arg_list});
+    print "-> replace_git_hash_with_tarfile($prt_args)\n" if $opt_v > 2;
 #print "ra_arg_list 1: @{$ra_arg_list}\n";
 
-    my $hash_regex = qr/^([a-f\d]{5,40}|master|HEAD)$/;
+    my $hash_regex = qr/^([a-f\d]{5,40}|master|HEAD)(~\d+)?$/;
     my %replacement_arg_list = ();
 
     # early exit if none of the inputs look like git hashes
@@ -4078,7 +4910,8 @@ sub replace_git_hash_with_tarfile {          # {{{1
     }
     return unless %git_hash;
 
-    my $have_tar_git = external_utility_exists($ON_WINDOWS ? "unzip" : "tar --version") &&
+#   my $have_tar_git = external_utility_exists($ON_WINDOWS ? "unzip" : "tar --version") &&
+    my $have_tar_git = external_utility_exists("tar --version") &&
                        external_utility_exists("git --version");
     if (!$have_tar_git) {
         warn "One or more inputs looks like a git hash but " .
@@ -4088,7 +4921,14 @@ sub replace_git_hash_with_tarfile {          # {{{1
 
     my %repo_listing = ();  # $repo_listing{hash}{files} = 1;
     foreach my $hash (sort keys %git_hash) {
-        my $git_list_cmd = "git ls-tree --name-only -r $hash";
+        my $git_list_cmd = "git ls-tree --name-only -r ";
+        if ($hash =~ m/(.*?):(.*?)$/) {
+            # requesting specific file(s) from this hash; grep for them
+            # Note:  this capability not fully implemented yet
+            $git_list_cmd .= "$1|grep '$2'";
+        } else {
+            $git_list_cmd .= $hash;
+        }
         print "$git_list_cmd\n" if $opt_v;
         foreach my $file (`$git_list_cmd`) {
             $file =~ s/\s+$//;
@@ -4099,58 +4939,109 @@ sub replace_git_hash_with_tarfile {          # {{{1
     # logic for each of the modes
     if ($opt_diff) {
 #print "A DIFF\n";
+        # set the default git diff algorithm
+        $opt_git_diff_rel = 1 unless $opt_git_diff_all or
+                                     $opt_git_diff_simindex;
         # is it git to git, or git to file/dir ?
         my ($Left, $Right) = @{$ra_arg_list};
 
 #use Data::Dumper;
 #print "diff_listing= "; print Dumper(\%diff_listing);
 #print "git_hash= "; print Dumper(\%git_hash);
-#print "repo_listing= "; print Dumper(\%repo_listing);
         if ($git_hash{$Left} and $git_hash{$Right}) {
 #print "A DIFF git-to-git\n";
             # git to git
             # first make a union of all files that have changed in both commits
             my %files_union = ();
 
-            my $git_list_cmd = "git diff-tree -r --no-commit-id --name-only $Left $Right";
-            print "$git_list_cmd\n" if $opt_v;
-            foreach my $file (`$git_list_cmd`) {
-                chomp($file);
-                $files_union{$file} = 1;
-            }
-
-            # then make trucated tar files of those union files which
-            # actually exist in each repo
             my @left_files  = ();
             my @right_files = ();
+            if ($opt_git_diff_rel) {
+                # Strategy 1:  Union files are what git consinders have changed
+                #              between the two commits.
+                my $git_list_cmd = "git diff-tree -r --no-commit-id --name-only $Left $Right";
+                # print "$git_list_cmd\n" if $opt_v;
+                foreach my $file (`$git_list_cmd`) {
+                    chomp($file);
+                    $files_union{$file} = 1;
+                }
+            } elsif ($opt_git_diff_all) {
+                # Strategy 2:  Union files all files in both repos.
+                foreach my $file (keys %{$repo_listing{$Left }},
+                                  keys %{$repo_listing{$Right}}) {
+                   $files_union{$file} = 1;
+                }
+            } elsif ($opt_git_diff_simindex) {
+                # Strategy 3:  Use git's own similarity index to figure
+                #              out which files to compare.
+                git_similarity_index($Left              , # in
+                                     $Right             , # in
+                                    \@left_files        , # out
+                                    \@right_files       , # out
+                                     $ra_git_similarity); # out
+
+            }
+
+#use Data::Dumper;
+#print "files_union =\n", Dumper(\%files_union);
+#print "repo_listing=\n", Dumper(\%repo_listing);
+
+            # then make truncated tar files of those union files which
+            # actually exist in each repo
             foreach my $file (sort keys %files_union) {
                 push @left_files , $file if $repo_listing{$Left }{$file};
                 push @right_files, $file if $repo_listing{$Right}{$file};
             }
-            # backslash whitespace within file names (#257)
-            my $files = join(" ", map {$_ =~ s/(\s)/\\$1/g; $_} @left_files);
-            $replacement_arg_list{$git_hash{$Left}}  = git_archive("$Left  $files");
-               $files = join(" ", map {$_ =~ s/(\s)/\\$1/g; $_} @right_files);
-            $replacement_arg_list{$git_hash{$Right}} = git_archive("$Right $files");
+            # backslash whitespace, weird chars within file names (#257, #284)
+
+#           my @Lfiles= map {$_ =~ s/([\s\(\)\[\]{}';\^\$\?])/\\$1/g; $_}   @left_files;
+#           my @Lfiles= @left_files;
+            if(scalar(@left_files) > 0) {
+                $replacement_arg_list{$git_hash{$Left}}  = git_archive($Left , \@left_files);
+            } else {
+                # In the right side commit ONLY file(s) was added, so no file(s) will exist in the left side commit.
+                # Create empty TAR to detect added lines of code.
+                $replacement_arg_list{$git_hash{$Left}} = empty_tar();
+            }
+#           $replacement_arg_list{$git_hash{$Left}}  = git_archive($Left , \@Lfiles);
+#           my @Rfiles= map {$_ =~ s/([\s\(\)\[\]{}';\^\$\?])/\\$1/g; $_}   @right_files ;
+#           my @Rfiles= @right_files ;
+#use Data::Dumper;
+#print Dumper('left' , \@left_files);
+#print Dumper('right', \@right_files);
+#die;
+
+            if(scalar(@right_files) > 0) {
+                $replacement_arg_list{$git_hash{$Right}} = git_archive($Right, \@right_files);
+            } else {
+                 # In the left side commit ONLY file(s) was deleted, so file(s) will not exist in right side commit.
+                 # Create empty TAR to detect removed lines of code.
+                 $replacement_arg_list{$git_hash{$Right}} = empty_tar();
+            }
+#           $replacement_arg_list{$git_hash{$Right}} = git_archive($Right, \@Rfiles);
+#write_file("/tmp/Lfiles.txt", {}, sort @Lfiles);
+#write_file("/tmp/Rfiles.txt", {}, sort @Rfiles);
+#write_file("/tmp/files_union.txt", {}, sort keys %files_union);
+
         } else {
 #print "A DIFF git-to-file or file-to-git Left=$Left Right=$Right\n";
             # git to file/dir or file/dir to git
             if      ($git_hash{$Left}  and $repo_listing{$Left}{$Right} ) {
 #print "A DIFF 1\n";
                 # $Left is a git hash and $Right is a file
-                $replacement_arg_list{$git_hash{$Left}}  = git_archive("$Left $Right");
+                $replacement_arg_list{$git_hash{$Left}}  = git_archive($Left, $Right);
             } elsif ($git_hash{$Right} and $repo_listing{$Right}{$Left}) {
 #print "A DIFF 2\n";
                 # $Left is a file and $Right is a git hash
-                $replacement_arg_list{$git_hash{$Right}} = git_archive("$Right $Left");
+                $replacement_arg_list{$git_hash{$Right}} = git_archive($Right, $Left);
             } elsif ($git_hash{$Left}) {
 #print "A DIFF 3\n";
                 # assume Right is a directory; tar the entire git archive at this hash
-                $replacement_arg_list{$git_hash{$Left}}  = git_archive($Left);
+                $replacement_arg_list{$git_hash{$Left}}  = git_archive($Left, "");
             } else {
 #print "A DIFF 4\n";
                 # assume Left  is a directory; tar the entire git archive at this hash
-                $replacement_arg_list{$git_hash{$Right}} = git_archive($Right);
+                $replacement_arg_list{$git_hash{$Right}} = git_archive($Right, "");
             }
         }
     } else {
@@ -4169,23 +5060,181 @@ sub replace_git_hash_with_tarfile {          # {{{1
     }
 
 #print "ra_arg_list 2: @{$ra_arg_list}\n";
+    print "<- replace_git_hash_with_tarfile()\n" if $opt_v > 2;
+} # 1}}}
+sub git_similarity_index {                   # {{{
+    my ($git_hash_Left    ,       # in
+        $git_hash_Right   ,       # in
+        $ra_left_files    ,       # out
+        $ra_right_files   ,       # out
+        $ra_git_similarity) = @_; # out
+    die "this option is not yet implemented";
+    print "-> git_similarity_index($git_hash_Left, $git_hash_Right)\n" if $opt_v > 2;
+    my $cmd = "git diff -M --name-status $git_hash_Left $git_hash_Right";
+    print  $cmd, "\n" if $opt_v;
+    open(GSIM, "$cmd |") or die "Unable to run $cmd  $!";
+    while (<GSIM>) {
+        print "git similarity> $_";
+    }
+    close(GSIM);
+    print "<- git_similarity_index\n" if $opt_v > 2;
+} # 1}}}
+sub empty_tar {                              # {{{1
+    my ($Tarfh, $Tarfile);
+    if ($opt_sdir) {
+      File::Path::mkpath($opt_sdir) unless is_dir($opt_sdir);
+      ($Tarfh, $Tarfile) = tempfile(UNLINK => 1, DIR => $opt_sdir, SUFFIX => $ON_WINDOWS ? '.zip' : '.tar');  # delete on exit
+    } else {
+      ($Tarfh, $Tarfile) = tempfile(UNLINK => 1, SUFFIX => $ON_WINDOWS ? '.zip' : '.tar');  # delete on exit
+    }
+    my $cmd = $ON_WINDOWS ? "type nul > $Tarfile" : "tar -cf $Tarfile -T /dev/null";
+    print  $cmd, "\n" if $opt_v;
+    system $cmd;
+    if (!-r $Tarfile) {
+        # not readable
+        die "Failed to create empty tarfile.";
+    }
+
+    return $Tarfile;
 } # 1}}}
 sub git_archive {                            # {{{1
     # Invoke 'git archive' as a system command to create a tar file
     # using the given argument(s).
-    my ($args, ) = @_;
-    print "-> git_archive($args)\n" if $opt_v > 2;
-    my ($Tarfh, $Tarfile) = tempfile(UNLINK => 1, SUFFIX => $ON_WINDOWS ? '.zip' : '.tar');  # delete on exit
-    my $cmd = "git archive -o $Tarfile $args";
+    my ($A1, $A2) = @_;
+    print "-> git_archive($A1)\n" if $opt_v > 2;
+
+    my $args = undef;
+    my @File_Set = ( );
+    my $n_sets   = 1;
+    if (ref $A2 eq 'ARRAY') {
+        # Avoid "Argument list too long" for the git archive command
+        # by splitting the inputs into sets of 10,000 files (issue 273).
+        my $FILES_PER_ARCHIVE = 1_000;
+           $FILES_PER_ARCHIVE =   100 if $ON_WINDOWS; # github.com/AlDanial/cloc/issues/404
+
+        my $n_files  = scalar(@{$A2});
+        $n_sets = $n_files/$FILES_PER_ARCHIVE;
+        $n_sets = 1 + int($n_sets) if $n_sets > int($n_sets);
+        $n_sets = 1 if !$n_sets;
+        foreach my $i (0..$n_sets-1) {
+            @{$File_Set[$i]} = ( );
+            my $start = $i*$FILES_PER_ARCHIVE;
+            my $end   = smaller(($i+1)*$FILES_PER_ARCHIVE, $n_files) - 1;
+            # Wrap each file name in single quotes to protect spaces
+            # and other odd characters.  File names that themselves have
+            # single quotes are instead wrapped in double quotes.  File
+            # names with both single and double quotes... jeez.
+            foreach my $fname (@{$A2}[$start .. $end]) {
+                if      ($fname =~ /^".*?\\".*?"$/) {
+                    # git pre-handles filenames with double quotes by backslashing
+                    # each double quote then surrounding entire name in double
+                    # quotes; undo this otherwise archive command crashes
+                    $fname =~ s/\\"/"/g;
+                    $fname =~ s/^"(.*)"$/$1/;
+                } elsif ($fname =~ /'/ or $ON_WINDOWS) {
+                    push @{$File_Set[$i]}, "\"$fname\"";
+                } else {
+                    push @{$File_Set[$i]}, "'$fname'";
+                }
+            }
+            unshift @{$File_Set[$i]}, "$A1 ";  # prepend git hash to beginning of list
+
+##xx#        # don't include \$ in the regex because git handles these correctly
+#            # to each word in @{$A2}[$start .. $end]: first backslash each
+#            # single quote, then wrap all entries in single quotes (#320)
+#            push @File_Set,
+#                 "$A1 " . join(" ", map {$_ =~ s/'/\'/g; $_ =~ s/^(.*)$/'$1'/g; $_}
+##                "$A1 " . join(" ", map {$_ =~ s/([\s\(\)\[\]{}';\^\?])/\\$1/g; $_}
+#                              @{$A2}[$start .. $end]);
+        }
+    } else {
+        if (defined $A2) {
+            push @{$File_Set[0]}, "$A1 $A2";
+        } else {
+            push @{$File_Set[0]}, "$A1";
+        }
+    }
+
+    my $files_this_commit = join(" ", @{$File_Set[0]});
+    print "   git_archive(file_set[0]=$files_this_commit)\n" if $opt_v > 2;
+    my ($Tarfh, $Tarfile);
+    if ($opt_sdir) {
+      File::Path::mkpath($opt_sdir) unless is_dir($opt_sdir);
+      ($Tarfh, $Tarfile) = tempfile(UNLINK => 1, DIR => $opt_sdir, SUFFIX => '.tar');  # delete on exit
+    } else {
+      ($Tarfh, $Tarfile) = tempfile(UNLINK => 1, SUFFIX => '.tar');  # delete on exit
+    }
+    my $cmd = "git archive -o $Tarfile $files_this_commit";
     print  $cmd, "\n" if $opt_v;
     system $cmd;
     if (!-r $Tarfile or !-s $Tarfile) {
         # not readable, or zero sized
         die "Failed to create tarfile of files from git.";
     }
-    print "<- git_archive()\n" if $opt_v > 2;
+    if ($n_sets > 1) {
+        if ($ON_WINDOWS) {
+            my @tar_files = ( $Tarfile );
+            my $start_dir = cwd;
+            foreach my $i (1..$n_sets-1) {
+                my $fname = sprintf "%s_extra_%08d", $Tarfile, $i;
+                my $files_this_commit = join(" ", @{$File_Set[$i]});
+                my $cmd = "git archive -o $fname $files_this_commit";
+                print  $cmd, "\n" if $opt_v;
+                system $cmd;
+                push @tar_files, $fname;
+            }
+            # Windows tar can't combine tar files so expand
+            # them all to one directory then re-tar
+            my $extract_dir = tempdir( CLEANUP => 0 );  # 1 = delete on exit
+            chdir "$extract_dir";
+            foreach my $T (@tar_files) {
+                next unless -f $T and -s $T;
+                my $cmd = "tar -x -f \"$T\"";
+                print  $cmd, "\n" if $opt_v;
+                system $cmd;
+                unlink "$T";
+            }
+            chdir "..";
+            $Tarfile .= ".final.tar";
+            my $cmd = "tar -c -f \"${Tarfile}\" \"$extract_dir\"";
+            print  $cmd, "\n" if $opt_v;
+            system $cmd;
+            chdir "$start_dir";
+        } else {
+            foreach my $i (1..$n_sets-1) {
+                my $files_this_commit = join(" ", @{$File_Set[$i]});
+                my $cmd = "git archive -o ${Tarfile}_extra $files_this_commit";
+                print  $cmd, "\n" if $opt_v;
+                system $cmd;
+                # and merge into the first one
+                $cmd = "tar -A -f ${Tarfile} ${Tarfile}_extra";
+                print  $cmd, "\n" if $opt_v;
+                system $cmd;
+            }
+            unlink "${Tarfile}_extra";
+        }
+    }
+    print "<- git_archive() made $Tarfile\n" if $opt_v > 2;
     return $Tarfile
 } # 1}}}
+sub smaller {                                # {{{1
+    my( $a, $b ) = @_;
+    return $a < $b ? $a : $b;
+} # 1}}}
+sub lower_on_Windows {                       # {{{1
+    # If on Unix(-like), do nothing, just return the input.
+    # If on Windows, return a lowercase version of the file
+    # and also update %upper_lower_map with this new entry.
+    # Needed in make_file_list() because the full file list
+    # isn't known until the end of that routine--where
+    # %upper_lower_map is ordinarily populated.
+    my ($path,) = @_;
+    return $path unless $ON_WINDOWS;
+    my $lower = lc $path;
+    $upper_lower_map{$lower} = $path;
+    return $lower;
+}
+# }}}
 sub make_file_list {                         # {{{1
     my ($ra_arg_list,  # in   file and/or directory names to examine
         $rh_Err     ,  # in   hash of error codes
@@ -4194,6 +5243,7 @@ sub make_file_list {                         # {{{1
         ) = @_;
     print "-> make_file_list(@{$ra_arg_list})\n" if $opt_v > 2;
 
+    my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ",";
     my ($fh, $filename);
     if ($opt_categorized) {
         $filename = $opt_categorized;
@@ -4212,15 +5262,16 @@ sub make_file_list {                         # {{{1
 
     my @dir_list = ();
     foreach my $file_or_dir (@{$ra_arg_list}) {
-#print "make_file_list file_or_dir=$file_or_dir\n";
         my $size_in_bytes = 0;
+        my $F = lower_on_Windows($file_or_dir);
+        my $ul_F = $upper_lower_map{$F} ? $ON_WINDOWS : $F;
         if (!-r $file_or_dir) {
-            push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file_or_dir];
+            push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $F];
             next;
         }
         if (is_file($file_or_dir)) {
             if (!(-s $file_or_dir)) {   # 0 sized file, named pipe, socket
-                $rh_ignored->{$file_or_dir} = 'zero sized file';
+                $rh_ignored->{$F} = 'zero sized file';
                 next;
             } elsif (-B $file_or_dir and !$opt_read_binary_files) {
                 # avoid binary files unless user insists on reading them
@@ -4228,11 +5279,11 @@ sub make_file_list {                         # {{{1
                     # only ignore if not a Unicode file w/trivial
                     # ASCII transliteration
                     if (!unicode_file($file_or_dir)) {
-                        $rh_ignored->{$file_or_dir} = 'binary file';
+                        $rh_ignored->{$ul_F} = 'binary file';
                         next;
                     }
                 } else {
-                    $rh_ignored->{$file_or_dir} = 'binary file';
+                    $rh_ignored->{$ul_F} = 'binary file';
                     next;
                 }
             }
@@ -4240,8 +5291,8 @@ sub make_file_list {                         # {{{1
         } elsif (is_dir($file_or_dir)) {
             push @dir_list, $file_or_dir;
         } else {
-            push @{$raa_errors}, [$rh_Err->{'Neither file nor directory'} , $file_or_dir];
-            $rh_ignored->{$file_or_dir} = 'not file, not directory';
+            push @{$raa_errors}, [$rh_Err->{'Neither file nor directory'} , $F];
+            $rh_ignored->{$F} = 'not file, not directory';
         }
     }
 
@@ -4251,8 +5302,9 @@ sub make_file_list {                         # {{{1
         my ($volume, $directories, $filename) = File::Spec->splitpath( $File );
         my $ignore_this_file = 0;
         foreach my $Sub_Dir ( File::Spec->splitdir($directories) ) {
+            my $SD = lower_on_Windows($Sub_Dir);
             if ($Exclude_Dir{$Sub_Dir}) {
-                $Ignored{$Sub_Dir} = "($File) --exclude-dir=$Sub_Dir";
+                $Ignored{$SD} = "($File) --exclude-dir=$Sub_Dir";
                 $ignore_this_file = 1;
                 last;
             }
@@ -4261,16 +5313,23 @@ sub make_file_list {                         # {{{1
     }
     @file_list = @new_file_list;
     foreach my $dir (@dir_list) {
+        my $D = lower_on_Windows($dir);
 #print "make_file_list dir=$dir  Exclude_Dir{$dir}=$Exclude_Dir{$dir}\n";
         # populates global variable @file_list
         if ($Exclude_Dir{$dir}) {
-            $Ignored{$dir} = "--exclude-dir=$Exclude_Dir{$dir}";
+            $Ignored{$D} = "--exclude-dir=$Exclude_Dir{$dir}";
             next;
         }
         find({wanted     => \&files            ,
               preprocess => \&find_preprocessor,
               follow     =>  $opt_follow_links }, $dir);
     }
+    if ($opt_follow_links) {
+        # giving { 'follow' => 1 } to find() makes it skip the
+        # call to find_preprocessor() so have to call this manually
+        @file_list = manual_find_preprocessor(@file_list);
+    }
+
     # there's a possibility of file duplication if user provided a list
     # file or --vcs command that returns directory names; squash these
     my %unique_file_list = map { $_ => 1 } @file_list;
@@ -4278,20 +5337,26 @@ sub make_file_list {                         # {{{1
 
     $nFiles_Found = scalar @file_list;
     printf "%8d text file%s.\n", plural_form($nFiles_Found) unless $opt_quiet;
-    write_file($opt_found, sort @file_list) if $opt_found;
+    write_file($opt_found, {}, sort @file_list) if $opt_found;
 
     my $nFiles_Categorized = 0;
 
     foreach my $file (@file_list) {
+        my $F = lower_on_Windows($file);
+        if ($ON_WINDOWS) {
+            (my $lc = lc $file) =~ s{\\}{/}g;
+            $upper_lower_map{$lc} = $file;
+            $file = $lc;
+        }
         printf "classifying $file\n" if $opt_v > 2;
 
         my $basename = basename $file;
         if ($Not_Code_Filename{$basename}) {
-            $rh_ignored->{$file} = "listed in " . '$' .
+            $rh_ignored->{$F} = "listed in " . '$' .
                 "Not_Code_Filename{$basename}";
             next;
         } elsif ($basename =~ m{~$}) {
-            $rh_ignored->{$file} = "temporary editor file";
+            $rh_ignored->{$F} = "temporary editor file";
             next;
         }
 
@@ -4307,9 +5372,15 @@ sub make_file_list {                         # {{{1
                                            $raa_errors,
                                            $rh_ignored);
         }
-die  "make_file_list($file) undef size" unless defined $size_in_bytes;
-die  "make_file_list($file) undef lang" unless defined $language;
-        printf $fh "%d,%s,%s\n", $size_in_bytes, $language, $file;
+        if (!defined $size_in_bytes) {
+            $rh_ignored->{$F} = "no longer readable";
+            next;
+        } elsif (!defined $language) {
+            $rh_ignored->{$F} = "unable to associate with a language";
+            next;
+        }
+        printf $fh "%d%s%s%s%s\n", $size_in_bytes, $separator,
+                                   $language, $separator, $file;
         ++$nFiles_Categorized;
         #printf "classified %d files\n", $nFiles_Categorized
         #    unless (!$opt_progress_rate or
@@ -4373,6 +5444,26 @@ sub invoke_generator {                       # {{{1
             push @post_filter, $F unless basename($F) =~ m{$opt_not_match_f};
             next;
         }
+        my $nBytes = -s $F ;
+        if (!$nBytes) {
+            $Ignored{$F} = 'zero sized file';
+            printf "files(%s)  zero size\n", $F if $opt_v > 5;
+        }
+        next unless $nBytes;
+        if ($nBytes > $opt_max_file_size*1024**2) {
+            $Ignored{$F} = "file size of " .
+                $nBytes/1024**2 . " MB exceeds max file size of " .
+                "$opt_max_file_size MB";
+            printf "file(%s)  exceeds $opt_max_file_size MB\n",
+                $F if $opt_v > 5;
+            next;
+        }
+        my $is_bin = -B $F ;
+        printf "files(%s)  size=%d  -B=%d\n",
+            $F, $nBytes, $is_bin if $opt_v > 5;
+        $is_bin = 0 if $opt_unicode and unicode_file($_);
+        $is_bin = 0 if $opt_read_binary_files;
+        next if $is_bin;
         push @post_filter, $F;
     }
     print "<- invoke_generator\n" if $opt_v > 2;
@@ -4391,13 +5482,22 @@ sub remove_duplicate_files {                 # {{{1
     # Where files are equally sized, compare their MD5 checksums.
     print "-> remove_duplicate_files\n" if $opt_v > 2;
 
+    my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ",";
     my $n = 0;
     my %files_by_size = (); # files_by_size{ # bytes } = [ list of files ]
     seek($fh, 0, 0); # rewind to beginning of the temp file
     while (<$fh>) {
         ++$n;
-        my ($size_in_bytes, $language, $file) = split(/,/, $_, 3);
+        my ($size_in_bytes, $language, $file) = split(/$separator/, $_, 3);
+        if (!defined($size_in_bytes) or
+            !defined($language)      or
+            !defined($file)) {
+            print "-> remove_duplicate_files skipping error line [$_]\n"
+                if $opt_v;
+            next;
+        }
         chomp($file);
+        $file =~ s{\\}{/}g if $ON_WINDOWS;
         $rh_Language->{$file} = $language;
         push @{$files_by_size{$size_in_bytes}}, $file;
         if ($opt_skip_uniqueness) {
@@ -4445,6 +5545,57 @@ sub remove_duplicate_files {                 # {{{1
     }
     print "<- remove_duplicate_files\n" if $opt_v > 2;
 } # 1}}}
+sub manual_find_preprocessor {               # {{{1
+    # When running with --follow-links, find_preprocessor() is not
+    # called by find().  Have to do it manually.  Inputs here
+    # are only files, which differs from find_preprocessor() which
+    # gets directories too.
+    # Reads global variable %Exclude_Dir.
+    # Populates global variable %Ignored.
+    # Reject files/directories in cwd which are in the exclude list.
+    print "-> manual_find_preprocessor(", cwd(), ")\n" if $opt_v > 2;
+    my @ok = ();
+
+    foreach my $File (@_) {  # pure file or directory name, no separators
+        my $Dir = dirname($File);
+        my $got_exclude_dir = 0;
+        foreach my $d (File::Spec->splitdir( $Dir )) {
+            # tests/inputs/issues/407/level2/level/Test/level2 ->
+            # $d iterates over tests, inputs, issues, 407,
+            #                  level2, level, Test, level2
+            # check every item against %Exclude_Dir
+            if ($Exclude_Dir{$d}) {
+                $got_exclude_dir = $d;
+                last;
+            }
+        }
+        if ($got_exclude_dir) {
+            $Ignored{$File} = "--exclude-dir=$Exclude_Dir{$got_exclude_dir}";
+#print "ignoring $File\n";
+        } else {
+            if ($opt_not_match_d) {
+                if ($opt_fullpath) {
+                    if ($Dir =~ m{^${opt_not_match_d}$}) {
+                        $Ignored{$File} = "--not-match-d=$opt_not_match_d";
+#print "matched fullpath\n"
+                    } else {
+                        push @ok, $File;
+                    }
+                } elsif (basename($Dir) =~ m{$opt_not_match_d}) {
+                    $Ignored{$File} = "--not-match-d=$opt_not_match_d";
+#print "matched partial\n"
+                } else {
+                    push @ok, $File;
+                }
+            } else {
+                push @ok, $File;
+            }
+        }
+    }
+
+    print "<- manual_find_preprocessor(@ok)\n" if $opt_v > 2;
+    return @ok;
+} # 1}}}
 sub find_preprocessor {                      # {{{1
     # invoked by File::Find's find() each time it enters a new directory
     # Reads global variable %Exclude_Dir.
@@ -4462,8 +5613,12 @@ sub find_preprocessor {                      # {{{1
         } else {
 #printf "  F_or_D=%-20s File::Find::name=%s\n", $F_or_D, $File::Find::name;
             if ($opt_not_match_d) {
-                if ($opt_fullpath and $File::Find::name =~ m{$opt_not_match_d}) {
-                    $Ignored{$File::Find::name} = "--not-match-d=$opt_not_match_d";
+                if ($opt_fullpath) {
+                    if ($File::Find::name =~ m{$opt_not_match_d}) {
+                        $Ignored{$File::Find::name} = "--not-match-d=$opt_not_match_d";
+                    } else {
+                        push @ok, $F_or_D;
+                    }
                 } elsif (!-d $F_or_D and basename($File::Find::name) =~ m{$opt_not_match_d}) {
                     $Ignored{$File::Find::name} = "--not-match-d (basename) =$opt_not_match_d";
                 } else {
@@ -4609,16 +5764,16 @@ sub classify_file {                          # {{{1
       my $extension = $1;
          # Windows file names are case insensitive so map
          # all extensions to lowercase there.
-         $extension = lc $extension if $ON_WINDOWS;
+         $extension = lc $extension if $ON_WINDOWS or $opt_ignore_case_ext;
       my @extension_list = ( $extension );
       if ($file =~ /\.([^\.]+\.[^\.]+)$/) { # has a double extension
           my $extension = $1;
-          $extension = lc $extension if $ON_WINDOWS;
+          $extension = lc $extension if $ON_WINDOWS or $opt_ignore_case_ext;
           unshift @extension_list, $extension;  # examine double ext first
       }
       if ($file =~ /\.([^\.]+\.[^\.]+\.[^\.]+)$/) { # has a triple extension
           my $extension = $1;
-          $extension = lc $extension if $ON_WINDOWS;
+          $extension = lc $extension if $ON_WINDOWS or $opt_ignore_case_ext;
           unshift @extension_list, $extension;  # examine triple ext first
       }
       foreach my $extension (@extension_list) {
@@ -4631,9 +5786,10 @@ sub classify_file {                          # {{{1
                 'listed in $Not_Code_Extension{' . $extension . '}';
             return $language;
         }
+        # handle extension collisions
         if (defined $Language_by_Extension{$extension}) {
             if ($Language_by_Extension{$extension} eq
-                'MATLAB/Mathematica/Objective C/MUMPS/Mercury') {
+                'MATLAB/Mathematica/Objective-C/MUMPS/Mercury') {
                 my $lang_M_or_O = "";
                 matlab_or_objective_C($full_file ,
                                       $rh_Err    ,
@@ -4673,6 +5829,8 @@ sub classify_file {                          # {{{1
                 return Lisp_or_Julia( $full_file, $rh_Err, $raa_errors);
             } elsif ($Language_by_Extension{$extension} eq 'Perl/Prolog') {
                 return Perl_or_Prolog($full_file, $rh_Err, $raa_errors);
+            } elsif ($Language_by_Extension{$extension} eq 'Raku/Prolog') {
+                return Raku_or_Prolog($full_file, $rh_Err, $raa_errors);
             } elsif ($Language_by_Extension{$extension} eq
                      'IDL/Qt Project/Prolog/ProGuard') {
                 return IDL_or_QtProject($full_file, $rh_Err, $raa_errors);
@@ -4698,7 +5856,7 @@ sub classify_file {                          # {{{1
             } elsif ($Language_by_Extension{$extension} eq 'Verilog-SystemVerilog/Coq') {
                 return Verilog_or_Coq( $full_file, $rh_Err, $raa_errors);
             } elsif ($Language_by_Extension{$extension} eq 'Smarty') {
-                if ($extension eq "smarty") {
+                if ($extension ne "tpl") {
                     # unambiguous -- if ends with .smarty, is Smarty
                     return $Language_by_Extension{$extension};
                 }
@@ -4712,7 +5870,7 @@ sub classify_file {                          # {{{1
                         last;
                     }
                 }
-                if (really_is_php($full_file) or $force_smarty) {
+                if (really_is_smarty($full_file) or $force_smarty) {
                     return 'Smarty';
                 } else {
                     return $language; # (unknown)
@@ -4722,7 +5880,30 @@ sub classify_file {                          # {{{1
             } elsif ($Language_by_Extension{$extension} eq 'Qt/Glade') {
                 return Qt_or_Glade( $full_file, $rh_Err, $raa_errors);
             } elsif ($Language_by_Extension{$extension} eq 'C#/Smalltalk') {
-                return Csharp_or_Smalltalk( $full_file, $rh_Err, $raa_errors);
+                my $L = Csharp_or_Smalltalk( $full_file, $rh_Err, $raa_errors);
+                if ($L eq 'C#') {
+                    my $lines = first_line($full_file, 2, $rh_Err, $raa_errors);
+                    $lines =~ s/\n//mg;
+                    if ($lines =~ m[^//-{70,}.*?//\s+<auto-generated>]) {
+                        $L = "C# Generated";
+                    }
+                }
+                return $L;
+            } elsif ($Language_by_Extension{$extension} eq 'Scheme/SaltStack') {
+                return Scheme_or_SaltStack( $full_file, $rh_Err, $raa_errors);
+            } elsif ($Language_by_Extension{$extension} eq 'Visual Basic/TeX/Apex Class') {
+                my $lang_VB_T_A = "";
+                Visual_Basic_or_TeX_or_Apex($full_file ,
+                                            $rh_Err    ,
+                                            $raa_errors,
+                                           \$lang_VB_T_A);
+                if ($lang_VB_T_A) {
+                    return $lang_VB_T_A;
+                } else { # an error happened in Visual_Basic_or_TeX_or_Apex
+                    $rh_ignored->{$full_file} =
+                        'failure in Visual_Basic_or_TeX_or_Apex()';
+                    return $language; # (unknown)
+                }
             } elsif ($Language_by_Extension{$extension} eq 'Brainfuck') {
                 if (really_is_bf($full_file)) {
                     return $Language_by_Extension{$extension};
@@ -4736,6 +5917,11 @@ sub classify_file {                          # {{{1
             $look_at_first_line = 1;
         }
       }
+      # if all else fails look at the prefix instead of extension
+      ( my $stem = $file ) =~ s/^(.*?)\.\S+$/$1/;
+      if ($stem and defined($Language_by_Prefix{$stem})) {
+          return $Language_by_Prefix{$stem}
+      }
     } elsif (defined $Language_by_File{lc $file}) {
         return $Language_by_File{lc $file};
     } elsif ($opt_lang_no_ext and
@@ -4750,9 +5936,9 @@ sub classify_file {                          # {{{1
         # starts with pound bang:
         #   #!/usr/bin/perl
         #   #!/usr/bin/env perl
-        my $script_language = peek_at_first_line($full_file ,
-                                                 $rh_Err    ,
-                                                 $raa_errors);
+        my ($script_language, $L) = peek_at_first_line($full_file ,
+                                                       $rh_Err    ,
+                                                       $raa_errors);
         if (!$script_language) {
             $rh_ignored->{$full_file} = "language unknown (#2)";
             # returns (unknown)
@@ -4769,7 +5955,15 @@ sub classify_file {                          # {{{1
                 # returns (unknown)
             }
         } else {
-            $rh_ignored->{$full_file} = "language unknown (#3)";
+            # #456:  XML files can have a variety of domain-specific file
+            #        extensions.  If the extension is unrecognized, examine
+            #        the first line of the file to see if it is XML
+            if ($L =~ /<\?xml\s/) {
+                $language = "XML";
+                delete $rh_ignored->{$full_file};
+            } else {
+                $rh_ignored->{$full_file} = "language unknown (#3)";
+            }
             # returns (unknown)
         }
     }
@@ -4777,13 +5971,14 @@ sub classify_file {                          # {{{1
     return $language;
 } # 1}}}
 sub first_line {                             # {{{1
-    # return back the first line of text in the file
+    # return the first $n_lines of text in the file as one string
     my ($file        , # in
+        $n_lines     , # in
         $rh_Err      , # in   hash of error codes
         $raa_errors  , # out
        ) = @_;
     my $line = "";
-    print "-> first_line($file)\n" if $opt_v > 2;
+    print "-> first_line($file, $n_lines)\n" if $opt_v > 2;
     if (!-r $file) {
         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
         return $line;
@@ -4791,12 +5986,14 @@ sub first_line {                             # {{{1
     my $IN = new IO::File $file, "r";
     if (!defined $IN) {
         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
-        print "<- first_line($file)\n" if $opt_v > 2;
+        print "<- first_line($file, $n_lines)\n" if $opt_v > 2;
         return $line;
     }
-    chomp($line = <$IN>);
+    for (my $i = 0; $i < $n_lines; $i++) {
+        chomp($line .= <$IN>);
+    }
     $IN->close;
-    print "<- first_line($file, '$line')\n" if $opt_v > 2;
+    print "<- first_line($file, $n_lines, '$line')\n" if $opt_v > 2;
     return $line;
 } # 1}}}
 sub peek_at_first_line {                     # {{{1
@@ -4808,7 +6005,7 @@ sub peek_at_first_line {                     # {{{1
     print "-> peek_at_first_line($file)\n" if $opt_v > 2;
 
     my $script_language = "";
-    my $first_line = first_line($file, $rh_Err, $raa_errors);
+    my $first_line = first_line($file, 1, $rh_Err, $raa_errors);
 
     if (defined $first_line) {
 #print "peek_at_first_line of [$file] first_line=[$first_line]\n";
@@ -4827,7 +6024,7 @@ sub peek_at_first_line {                     # {{{1
         }
     }
     print "<- peek_at_first_line($file)\n" if $opt_v > 2;
-    return $script_language;
+    return ($script_language, $first_line);
 } # 1}}}
 sub different_files {                        # {{{1
     # See which of the given files are unique by computing each file's MD5
@@ -4895,11 +6092,12 @@ sub call_counter {                           # {{{1
        ) = @_;
 
     # Logic:  pass the file through the following filters:
-    #         1. remove blank lines
-    #         2. remove comments using each filter defined for this language
+    #         1. remove leading lines (if --skip-leading)
+    #         2. remove blank lines
+    #         3. remove comments using each filter defined for this language
     #            (example:  SQL has two, remove_starts_with(--) and
     #             remove_c_comments() )
-    #         3. compute comment lines as
+    #         4. compute comment lines as
     #               total lines - blank lines - lines left over after all
     #                   comment filters have been applied
 
@@ -4927,11 +6125,26 @@ sub call_counter {                           # {{{1
         $ascii = join('', @lines);
     }
 
+    # implement --perl-ignore-data here
+
+    if ($opt_skip_leading) {
+        my $strip = 1;
+        my ($N, @exts) = split(/,/, $opt_skip_leading);
+        if (@exts) {
+            # only apply if this file's extension is listed
+            my $this_file_ext = file_extension($file);
+            $strip = grep(/^${this_file_ext}$/, @exts);
+        }
+        @lines = remove_first_n($N, \@lines) if $strip;
+    }
+
     my @original_lines = @lines;
     my $total_lines    = scalar @lines;
 
     print_lines($file, "Original file:", \@lines) if $opt_print_filter_stages;
     @lines = rm_blanks(\@lines, $language, \%EOL_Continuation_re); # remove blank lines
+    print "   call_counter: total_lines=$total_lines  blank_lines=",
+        scalar(@lines), "\n" if $opt_v > 2;
     my $blank_lines = $total_lines - scalar @lines;
     print_lines($file, "Blank lines removed:", \@lines)
         if $opt_print_filter_stages;
@@ -4947,7 +6160,7 @@ sub call_counter {                           # {{{1
         } else {
             $stripped_file = basename $file . ".$opt_strip_comments";
         }
-        write_file($stripped_file, @lines);
+        write_file($stripped_file, {}, @lines);
     }
     if ($opt_html and !$opt_diff) {
         chomp(@original_lines);  # includes blank lines, comments
@@ -4994,10 +6207,16 @@ sub windows_glob {                           # {{{1
           } @_;
 } # 1}}}
 sub write_file {                             # {{{1
-    my ($file  , # in
-        @lines , # in
+    my ($file       , # in
+        $rh_options , # in
+        @lines      , # in
        ) = @_;
 
+    my $local_formatting = 0;
+    foreach my $opt (sort keys %{$rh_options}) {
+#       print "write_file option $opt = $rh_options->{$opt}\n";
+        $local_formatting = 1;
+    }
 #print "write_file 1 [$file]\n";
     # Do ~ expansion (by Tim LaBerge, fixes bug 2787984)
     my $preglob_filename = $file;
@@ -5018,22 +6237,157 @@ sub write_file {                             # {{{1
     my ($volume, $directories, $filename) = File::Spec->splitpath( $abs_file_path );
     mkpath($volume . $directories, 1, 0777);
 
-    my $OUT = new IO::File $file, "w";
-    if (defined $OUT) {
-        chomp(@lines);
-        print $OUT join("\n", @lines), "\n";
-        $OUT->close;
+    my $OUT = undef;
+    unlink $file;
+    if ($opt_file_encoding) {
+#       $OUT = IO::File->new($file, ">:$opt_file_encoding");  # doesn't work?
+        open($OUT, "> :encoding($opt_file_encoding)", $file);
     } else {
-        warn "Unable to write to $file\n";
+        $OUT = new IO::File $file, "w";
+    }
+
+    my $n_col = undef;
+    if ($local_formatting) {
+        $n_col = scalar @{$rh_options->{'columns'}};
+        if ($opt_xml) {
+            print $OUT '<?xml version="1.0" encoding="UTF-8"?>', "\n";
+            print $OUT "<all_$rh_options->{'file_type'}>\n";
+        } elsif ($opt_yaml) {
+            print $OUT "---\n";
+        } elsif ($opt_md) {
+            print $OUT join("|", @{$rh_options->{'columns'}}) , "\n";
+            print $OUT join("|", map( ":------", 1 .. $n_col)), "\n";
+        }
+    }
+
+    if (!defined $OUT) {
+        warn "Unable to write to $file\n";
+        print "<- write_file\n" if $opt_v > 2;
+        return;
+    }
+    chomp(@lines);
+
+    if ($local_formatting) {
+        my @json_lines = ();
+        foreach my $L (@lines) {
+            my @entries;
+            if ($rh_options->{'separator'}) {
+                @entries = split($rh_options->{'separator'}, $L, $n_col);
+            } else {
+                @entries = ( $L );
+            }
+            if ($opt_xml) {
+                print $OUT "  <$rh_options->{'file_type'} ";
+                for (my $i = 0; $i < $n_col; $i++) {
+                    printf $OUT "%s=\"%s\" ", $rh_options->{'columns'}[$i], $entries[$i];
+                }
+                print $OUT "/>\n";
+            } elsif ($opt_yaml or $opt_json) {
+                my @pairs = ();
+                for (my $i = 0; $i < $n_col; $i++) {
+                    push @pairs,
+                        sprintf "\"%s\":\"%s\"", $rh_options->{'columns'}[$i], $entries[$i];
+                }
+                if ($opt_json) {
+                    # JSON can't literal '\x' in filenames, #575
+                    $pairs[0] =~ s/\\x//g;
+                    push @json_lines, join(", ", @pairs );
+                } else {
+                    print $OUT "- {", join(", ", @pairs), "}\n";
+                }
+            } elsif ($opt_csv) {
+                print $OUT join(",", @entries), "\n";
+            } elsif ($opt_md) {
+                print $OUT join("|", @entries), "\n";
+            }
+        }
+        if ($opt_json) {
+            print $OUT "[{", join("},\n {", @json_lines), "}]\n";
+        }
+        if (!$opt_json and !$opt_yaml and !$opt_xml and !$opt_csv) {
+            print $OUT join("\n", @lines), "\n";
+        }
+    } else {
+        print $OUT join("\n", @lines), "\n";
+    }
+
+    if ($local_formatting and $opt_xml) {
+        print $OUT "</all_$rh_options->{'file_type'}>\n";
+    }
+    $OUT->close;
+
+    if (-r $file) {
+        print "Wrote $file" unless $opt_quiet;
+        print ", $CLOC_XSL" if $opt_xsl and $opt_xsl eq $CLOC_XSL;
+        print "\n" unless $opt_quiet;
     }
-    print "Wrote $file" unless $opt_quiet;
-    print ", $CLOC_XSL" if $opt_xsl and $opt_xsl eq $CLOC_XSL;
-    print "\n" unless $opt_quiet;
 
     print "<- write_file\n" if $opt_v > 2;
 } # 1}}}
+sub file_pairs_from_file {                   # {{{1
+    my ($file             , # in
+        $ra_added         , # out
+        $ra_removed       , # out
+        $ra_compare_list  , # out
+       ) = @_;
+    #
+    # Example valid input format for $file
+    # 1)
+    #   A/d1/hello.f90 | B/d1/hello.f90
+    #   A/hello.C | B/hello.C
+    #   A/d2/hi.py | B/d2/hi.py
+    #
+    # 2)
+    # Files added: 1
+    #   + B/extra_file.pl ; Perl
+    #
+    # Files removed: 1
+    #   - A/d2/hello.java ; Java
+    #
+    # File pairs compared: 3
+    #   != A/d1/hello.f90 | B/d1/hello.f90 ; Fortran 90
+    #   != A/hello.C | B/hello.C ; C++
+    #   == A/d2/hi.py | B/d2/hi.py ; Python
+
+    print "-> file_pairs_from_file($file)\n" if $opt_v and $opt_v > 2;
+    @{$ra_compare_list} = ();
+    my @lines = read_file($file);
+    my $mode = "compare";
+    foreach my $L (@lines) {
+        next if $L =~ /^\s*$/ or $L =~ /^\s*#/;
+        chomp($L);
+        if      ($L =~ /^Files\s+(added|removed):/) {
+            $mode = $1;
+        } elsif ($L =~ /^File\s+pairs\s+compared:/) {
+            $mode = "compare";
+        } elsif ($mode eq "added" or $mode eq "removed") {
+            $L =~ m/^\s*[+-]\s+(.*?)\s+;/;
+            my $F = $1;
+            if (!defined $1) {
+                warn "file_pairs_from_file($file) parse failure\n",
+                     "in $mode mode for '$L', ignoring\n";
+                next;
+            }
+            if ($mode eq "added") {
+                push @{$ra_added}  , $F;
+            } else {
+                push @{$ra_removed}, $F;
+            }
+        } else {
+            $L =~ m/^\s*([!=]=\s*)?(.*?)\s*\|\s*(.*?)\s*(;.*?)?$/;
+            if (!defined $2 or !defined $3) {
+                warn "file_pairs_from_file($file) parse failure\n",
+                     "in compare mode for '$L', ignoring\n";
+                next;
+            }
+            push @{$ra_compare_list}, ( [$2, $3] );
+        }
+    }
+    print "<- file_pairs_from_file\n" if $opt_v and $opt_v > 2;
+}
 sub read_file  {                             # {{{1
     my ($file, ) = @_;
+    print "-> read_file($file)\n" if $opt_v and $opt_v > 2;
     my %BoM = (
         "fe ff"           => 2 ,
         "ff fe"           => 2 ,
@@ -5052,7 +6406,6 @@ sub read_file  {                             # {{{1
         "2b 2f 76 38 2d"  => 5 ,
         );
 
-    print "-> read_file($file)\n" if $opt_v > 2;
     my @lines = ();
     my $IN = new IO::File $file, "r";
     if (defined $IN) {
@@ -5090,7 +6443,7 @@ sub read_file  {                             # {{{1
     # causing every line to differ.
     foreach (@lines) { s/\cM$// }
 
-    print "<- read_file\n" if $opt_v > 2;
+    print "<- read_file\n" if $opt_v and $opt_v > 2;
     return @lines;
 } # 1}}}
 sub rm_blanks {                              # {{{1
@@ -5112,7 +6465,8 @@ sub rm_blanks {                              # {{{1
         }
     }
 
-    print "<- rm_blanks(language=$language)\n" if $opt_v > 2;
+    print "<- rm_blanks(language=$language, n_remain= ",
+        scalar(@out), "\n" if $opt_v > 2;
     return @out;
 } # 1}}}
 sub rm_comments {                            # {{{1
@@ -5134,6 +6488,7 @@ sub rm_comments {                            # {{{1
 
     foreach my $call_string (@routines) {
         my $subroutine = $call_string->[0];
+        next if $subroutine eq "rm_comments_in_strings" and !$opt_strip_str_comments;
         if (! defined &{$subroutine}) {
             warn "rm_comments undefined subroutine $subroutine for $file\n";
             next;
@@ -5145,18 +6500,15 @@ sub rm_comments {                            # {{{1
             shift   @args;
             unshift @args, $file;
         }
-#use Data::Dumper;
-#print "\ncall_string=", Dumper($call_string);
-#print "args=\n";
-#print Dumper(\@args);
-#print "lines before=\n";
-#print Dumper(\@lines);
 
         # Unusual inputs, namely /* within strings without
         # a corresponding */ can cause huge delays so put a timer on this.
         my $max_duration_sec = scalar(@lines)/1000.0; # est lines per second
            $max_duration_sec = 1.0 if $max_duration_sec < 1;
-#print "max_duration_sec=$max_duration_sec\n";
+        if (defined $opt_timeout) {
+            $max_duration_sec = $opt_timeout if $opt_timeout > 0;
+        }
+#my $T_start = Time::HiRes::time();
         eval {
             local $SIG{ALRM} = sub { die "alarm\n" };
             alarm $max_duration_sec;
@@ -5169,11 +6521,16 @@ sub rm_comments {                            # {{{1
             die unless $@ eq "alarm\n";
             push @{$raa_Errors},
                 [ $Error_Codes{'Line count, exceeded timeout'}, $file ];
+            @lines = ();
             if ($opt_v) {
                 warn "rm_comments($subroutine): exceeded timeout for $file--ignoring\n";
             }
             next;
         }
+#print "end time = ",Time::HiRes::time() - $T_start;
+
+        print "   rm_comments after $subroutine line count=",
+            scalar(@lines), "\n" if $opt_v > 2;
 
 #print "lines after=\n";
 #print Dumper(\@lines);
@@ -5191,20 +6548,35 @@ sub rm_comments {                            # {{{1
         print_lines($file, "post $subroutine(@args) blank cleanup:", \@lines)
             if $opt_print_filter_stages;
     }
-    # Exception for scripting languages:  treat the first #! line as code.
-    # Will need to add it back in if it was removed earlier.
-    if (defined $Script_Language{$language} and
-        $original_lines[0] =~ /^#!/ and
-        (scalar(@lines) == 0 or
-         $lines[0] ne $original_lines[0])) {
-        unshift @lines, $original_lines[0];  # add the first line back
-    }
 
     foreach (@lines) { chomp }   # make sure no spurious newlines were added
 
+    # Exception for scripting languages:  treat the first #! line as code.
+    # Will need to add it back in if it was removed earlier.
+    chomp( $original_lines[0] );
+    if (defined $Script_Language{$language} and
+        $original_lines[0] =~ /^#!/ and
+        (!scalar(@lines) or ($lines[0] ne $original_lines[0]))) {
+        unshift @lines, $original_lines[0];  # add the first line back
+    }
+
     print "<- rm_comments\n" if $opt_v > 2;
     return @lines;
 } # 1}}}
+sub remove_first_n {                         # {{{1
+    my ($n, $ra_lines, ) = @_;
+    print "-> remove_first_n\n" if $opt_v > 2;
+
+    my @save_lines = ();
+    if (scalar @{$ra_lines} > $n) {
+        for (my $i = $n; $i < scalar @{$ra_lines}; $i++) {
+            push @save_lines, $ra_lines->[$i];
+        }
+    }
+
+    print "<- remove_first_n\n" if $opt_v > 2;
+    return @save_lines;
+} # 1}}}
 sub remove_f77_comments {                    # {{{1
     my ($ra_lines, ) = @_;
     print "-> remove_f77_comments\n" if $opt_v > 2;
@@ -5238,6 +6610,27 @@ sub remove_f90_comments {                    # {{{1
     print "<- remove_f90_comments\n" if $opt_v > 2;
     return @save_lines;
 } # 1}}}
+sub reduce_to_rmd_code_blocks {              #{{{1
+    my ($ra_lines) = @_; #in
+    print "-> reduce_to_rmd_code_blocks()\n" if $opt_v > 2;
+
+    my $in_code_block = 0;
+    my @save_lines = ();
+    foreach (@{$ra_lines}) {
+        if ( m/^```\{\s*[[:alpha:]]/ ) {
+            $in_code_block = 1;
+            next;
+        }
+        if ( m/^```\s*$/ ) {
+            $in_code_block = 0;
+        }
+        next if (!$in_code_block);
+        push @save_lines, $_;
+    }
+
+    print "<- reduce_to_rmd_code_blocks()\n" if $opt_v> 2;
+    return @save_lines;
+} # 1}}}
 sub remove_matches {                         # {{{1
     my ($ra_lines, # in
         $pattern , # in   Perl regular expression (case insensitive)
@@ -5412,6 +6805,87 @@ sub remove_between {                         # {{{1
     print "<- remove_between\n" if $opt_v > 2;
     return split("\n", $all_lines);
 } # 1}}}
+sub rm_comments_in_strings {                 # {{{1
+    my ($ra_lines, $string_marker, $start_comment, $end_comment, $multiline_mode) = @_;
+    $multiline_mode = 0 if not defined $multiline_mode;
+    # Replace comments within strings with 'xx'.
+
+    print "-> rm_comments_in_strings(string_marker=$string_marker, " .
+          "start_comment=$start_comment, end_comment=$end_comment)\n"
+        if $opt_v > 2;
+
+    my @save_lines = ();
+    my $in_ml_string = 0;
+    foreach my $line (@{$ra_lines}) {
+       #print "line=[$line]\n";
+        my $new_line = "";
+
+        if ($line !~ /${string_marker}/) {
+            # short circuit; no strings on this line
+            if ( $in_ml_string ) {
+                $line =~ s/\Q${start_comment}\E/xx/g;
+                $line =~ s/\Q${end_comment}\E/xx/g if $end_comment;
+            }
+            push @save_lines, $line;
+            next;
+        }
+
+        # replace backslashed string markers with 'Q'
+        $line =~ s/\\${string_marker}/Q/g;
+
+        if ( $in_ml_string and $line =~ /^(.*?)(${string_marker})(.*)$/ ) {
+            # A multiline string ends on this line. Process the part
+            # until the end of the multiline string first.
+            my ($lastpart_ml_string, $firstpart_marker, $rest_of_line )  = ($1, $2, $3);
+            $lastpart_ml_string =~ s/\Q${start_comment}\E/xx/g;
+            $lastpart_ml_string =~ s/\Q${end_comment}\E/xx/g if $end_comment;
+            $new_line = $lastpart_ml_string . $firstpart_marker;
+            $line = $rest_of_line;
+            $in_ml_string = 0;
+        }
+
+        my @tokens = split(/(${string_marker}.*?${string_marker})/, $line);
+        foreach my $t (@tokens) {
+           #printf "  t0 = [$t]\n";
+            if ($t =~ /${string_marker}.*${string_marker}$/) {
+                # enclosed in quotes; process this token
+                $t =~ s/\Q${start_comment}\E/xx/g;
+                $t =~ s/\Q${end_comment}\E/xx/g if $end_comment;
+            }
+            elsif ( $multiline_mode and $t =~ /(${string_marker})/ ) {
+                # Unclosed quote present in line. If multiline_mode is enabled,
+                # consider it the start of a multiline string.
+                my $firstpart_marker = $1;
+                my @sub_token = split(/${string_marker}/, $t );
+
+                if ( scalar @sub_token == 1 ) {
+                    # The line ends with a string marker that starts
+                    # a multiline string.
+                    $t = $sub_token[0] . $firstpart_marker;
+                    $in_ml_string = 1;
+                }
+                elsif ( scalar @sub_token == 2 ) {
+                    # The line has some more content after the string
+                    # marker that starts a multiline string
+                    $t = $sub_token[0] . $firstpart_marker;
+                    $sub_token[1] =~ s/\Q${start_comment}\E/xx/g;
+                    $sub_token[1] =~ s/\Q${end_comment}\E/xx/g if $end_comment;
+                    $t .= $sub_token[1];
+                    $in_ml_string = 1;
+                } else {
+                    print "Warning: rm_comments_in_string length \@sub_token > 2\n";
+                }
+
+            }
+            #printf "  t1 = [$t]\n";
+            $new_line .= $t;
+        }
+        push @save_lines, $new_line;
+    }
+
+    print "<- rm_comments_in_strings\n" if $opt_v > 2;
+    return @save_lines;
+} # 1}}}
 sub remove_between_general {                 # {{{1
     my ($ra_lines, $start_marker, $end_marker, ) = @_;
     # Start and end markers may be any length strings.
@@ -5484,11 +6958,35 @@ sub remove_between_regex   {                 # {{{1
     print "<- remove_between_regex\n" if $opt_v > 2;
     return @save_lines;
 } # 1}}}
+sub replace_regex  {                         # {{{1
+    my ($ra_lines, $regex, $replace, ) = @_;
+
+    print "-> replace_regex(regex=$regex, replace=$replace)\n"
+        if $opt_v > 2;
+
+    my $all_lines = join("", @{$ra_lines});
+
+    my @save_lines = ();
+    my $in_comment = 0;
+    foreach (@{$ra_lines}) {
+
+        next if /^\s*$/;
+        s/${regex}/${replace}/g;
+        next if /^\s*$/;
+        push @save_lines, $_;
+    }
+
+    print "<- replace_regex\n" if $opt_v > 2;
+    return @save_lines;
+} # 1}}}
 sub replace_between_regex  {                 # {{{1
-    my ($ra_lines, $start_RE, $end_RE, $replace_RE, ) = @_;
+    my ($ra_lines, $start_RE, $end_RE, $replace_RE, $multiline_mode ) = @_;
+    # If multiline_mode is enabled, $replace_RE should not refer
+    # to any captured groups in $start_RE.
+    $multiline_mode = 1 if not defined $multiline_mode;
     # Start and end regex's may be any length strings.
 
-    print "-> replace_between_regex(start=$start_RE, end=$end_RE)\n"
+    print "-> replace_between_regex(start=$start_RE, end=$end_RE, replace=$replace_RE)\n"
         if $opt_v > 2;
 
     my $all_lines = join("", @{$ra_lines});
@@ -5508,7 +7006,7 @@ sub replace_between_regex  {                 # {{{1
             next if $in_comment;
         }
         next if /^\s*$/;
-        $in_comment = 1 if /^(.*?)${start_RE}/; # $1 may be blank or code
+        $in_comment = 1 if $multiline_mode and /^(.*?)${start_RE}/ ; # $1 may be blank or code
         next if defined $1 and $1 =~ /^\s*$/; # leading blank; all comment
         if ($in_comment) {
             # part code, part comment; strip the comment and keep the code
@@ -5573,7 +7071,7 @@ sub remove_jcl_comments {                    # {{{1
     my $in_comment = 0;
     foreach (@{$ra_lines}) {
         next if /^\s*$/;
-        next if m{^\s*//\*};
+        next if m{^//\*};
         last if m{^\s*//\s*$};
         push @save_lines, $_;
     }
@@ -5676,7 +7174,7 @@ sub really_is_bf {                           # {{{1
         }
         # if ($ind) { print "YES: $L"; } else { print "NO : $L"; }
     }
-    my $ratio = $n_bf_indicators/scalar(@lines);
+    my $ratio = scalar(@lines) > 0 ? $n_bf_indicators / scalar(@lines) : 0;
     my $decision = ($ratio > 0.5) || ($n_bf_indicators > 5);
     printf "<- really_is_bf(Y/N=%d %s, R=%.3f, N=%d)\n",
             $decision, $file, $ratio, $n_bf_indicators if $opt_v > 2;
@@ -5745,6 +7243,34 @@ sub remove_pug_block {                       # {{{1
     my ($ra_lines, ) = @_;
     return remove_intented_block($ra_lines, '^(\s*)(//)\s*$');
 } # 1}}}
+sub remove_OCaml_comments {                  # {{{1
+    my ($ra_lines, ) = @_;
+
+    print "-> remove_OCaml_comments\n" if $opt_v > 2;
+
+    my @save_lines = ();
+    my $in_comment = 0;   # counter to depth of nested comments
+    foreach my $L (@{$ra_lines}) {
+        next if $L =~ /^\s*$/;
+        # make an array of tokens where a token is a start comment
+        # marker, end comment marker, string, or anything else
+        my $clean_line = ""; # ie, free of comments
+        my @tokens = split(/(\(\*|\*\)|".*?")/, $L);
+        foreach my $t (@tokens) {
+            next unless $t;
+            if      ($t eq "(*") {
+                ++$in_comment;
+            } elsif ($t eq "*)") {
+                --$in_comment;
+            } elsif (!$in_comment) {
+                $clean_line .= $t;
+            }
+        }
+        push @save_lines, $clean_line if $clean_line;
+    }
+    print "<- remove_OCaml_comments\n" if $opt_v > 2;
+    return @save_lines;
+} # 1}}}
 sub remove_slim_block {                      # {{{1
     # slim comments start with /
     # followed by indented text on subsequent lines.
@@ -5769,6 +7295,10 @@ sub docstring_to_C {                         # {{{1
     my ($ra_lines, ) = @_;
     # Converts Python docstrings to C comments.
 
+    if ($opt_docstring_as_code) {
+        return @{$ra_lines};
+    }
+
     print "-> docstring_to_C()\n" if $opt_v > 2;
 
     my $in_docstring = 0;
@@ -5787,6 +7317,39 @@ sub docstring_to_C {                         # {{{1
     print "<- docstring_to_C\n" if $opt_v > 2;
     return @{$ra_lines};
 } # 1}}}
+sub jupyter_nb {                             # {{{1
+    my ($ra_lines, ) = @_;
+    # Translate .ipynb file content into an equivalent set of code
+    # lines as expected by cloc.
+
+    print "-> jupyter_nb()\n" if $opt_v > 2;
+
+    my @save_lines = ();
+    my $in_code   = 0;
+    my $in_source = 0;
+    foreach (@{$ra_lines}) {
+        if (!$in_code and !$in_source and /^\s*"cell_type":\s*"code",\s*$/) {
+            $in_code = 1;
+        } elsif ($in_code and !$in_source and /^\s*"source":\s*\[\s*$/) {
+            $in_source = 1;
+        } elsif ($in_code and $in_source) {
+            if (/^\s*"\s*\\n",\s*$/) {    #  "\n",  -> empty line
+                next;
+            } elsif (/^\s*"\s*#/) {       #  comment within the code block
+                next;
+            } elsif (/^\s*\]\s*$/) {
+                $in_code   = 0;
+                $in_source = 0;
+            } else {
+                push @save_lines, $_;
+            }
+        }
+    }
+
+    print "<- jupyter_nb\n" if $opt_v > 2;
+
+    return @save_lines;
+} # 1}}}
 sub elixir_doc_to_C {                        # {{{1
     my ($ra_lines, ) = @_;
     # Converts Elixir docs to C comments.
@@ -5887,7 +7450,8 @@ sub determine_lit_type {                     # {{{1
   return 0;
 } # 1}}}
 sub remove_haskell_comments {                # {{{1
-    # Bulk of code taken from SLOCCount's haskell_count script.
+    # SLOCCount's haskell_count script with modifications to handle
+    # Elm empty and nested block comments.
     # Strips out {- .. -} and -- comments and counts the rest.
     # Pragmas, {-#...}, are counted as SLOC.
     # BUG: Doesn't handle strings with embedded block comment markers gracefully.
@@ -5899,12 +7463,16 @@ sub remove_haskell_comments {                # {{{1
     my @save_lines = ();
     my $in_comment = 0;
     my $incomment  = 0;
-    my ($literate, $inlitblock) = (0,0);
+    my $inlitblock = 0;
+    my $literate   = 0;
+    my $is_elm     = 0;
 
+    $is_elm   = 1 if $file =~ /\.elm$/;
     $literate = 1 if $file =~ /\.lhs$/;
-    if($literate) { $literate = determine_lit_type($file) }
+    if ($literate) { $literate = determine_lit_type($file) }
 
     foreach (@{$ra_lines}) {
+        chomp;
         if ($literate == 1) {
             if (!s/^>//) { s/.*//; }
         } elsif ($literate == 2) {
@@ -5916,16 +7484,39 @@ sub remove_haskell_comments {                # {{{1
             }
         }
 
-        if ($incomment) {
-            if (m/\-\}/) { s/^.*?\-\}//;  $incomment = 0;}
-            else { s/.*//; }
+        # keep pragmas
+        if (/^\s*{-#/) {
+            push @save_lines, $_;
+            next;
         }
-        if (!$incomment) {
+
+        # Elm allows nested comments so track nesting depth
+        # with $incomment.
+
+        my $n_open  = () = $_ =~ /{-/g;
+        my $n_close = () = $_ =~ /-}/g;
+        s/{-.*?-}//g;
+
+        if ($incomment) {
+            if (m/\-\}/) {
+                s/^.*?\-\}//;
+                if ($is_elm) {
+                    $incomment += $n_open - $n_close;
+                } else {
+                    $incomment = 0;
+                }
+            } else {
+                s/.*//;
+            }
+        } else {
             s/--.*//;
-            s!{-[^#].*?-}!!g;
             if (m/{-/ && (!m/{-#/)) {
-              s/{-.*//;
-              $incomment = 1;
+                s/{-.*//;
+                if ($is_elm) {
+                    $incomment += $n_open - $n_close;
+                } else {
+                    $incomment = 1;
+                }
             }
         }
         if (m/\S/) { push @save_lines, $_; }
@@ -5950,6 +7541,7 @@ sub set_constants {                          # {{{1
     my ($rh_Language_by_Extension , # out
         $rh_Language_by_Script    , # out
         $rh_Language_by_File      , # out
+        $rh_Language_by_Prefix    , # out
         $rhaa_Filters_by_Language , # out
         $rh_Not_Code_Extension    , # out
         $rh_Not_Code_Filename     , # out
@@ -5965,19 +7557,36 @@ sub set_constants {                          # {{{1
             'adb'         => 'Ada'                   ,
             'ads'         => 'Ada'                   ,
             'adso'        => 'ADSO/IDSM'             ,
+            'ahkl'        => 'AutoHotkey'            ,
             'ahk'         => 'AutoHotkey'            ,
+            'agda'        => 'Agda'                  ,
+            'lagda'       => 'Agda'                  ,
             'aj'          => 'AspectJ'               ,
             'am'          => 'make'                  ,
             'ample'       => 'AMPLE'                 ,
+            'apl'         => 'APL'                   ,
+            'apla'        => 'APL'                   ,
+            'aplf'        => 'APL'                   ,
+            'aplo'        => 'APL'                   ,
+            'apln'        => 'APL'                   ,
+            'aplc'        => 'APL'                   ,
+            'apli'        => 'APL'                   ,
+            'dyalog'      => 'APL'                   ,
+            'dyapp'       => 'APL'                   ,
+            'mipage'      => 'APL'                   ,
             'as'          => 'ActionScript'          ,
             'adoc'        => 'AsciiDoc'              ,
             'asciidoc'    => 'AsciiDoc'              ,
             'dofile'      => 'AMPLE'                 ,
             'startup'     => 'AMPLE'                 ,
+            'axd'         => 'ASP'                   ,
+            'ashx'        => 'ASP'                   ,
             'asa'         => 'ASP'                   ,
             'asax'        => 'ASP.NET'               ,
             'ascx'        => 'ASP.NET'               ,
             'asd'         => 'Lisp'                  , # system definition file
+            'nasm'        => 'Assembly'              ,
+            'a51'         => 'Assembly'              ,
             'asm'         => 'Assembly'              ,
             'asmx'        => 'ASP.NET'               ,
             'asp'         => 'ASP'                   ,
@@ -5985,9 +7594,15 @@ sub set_constants {                          # {{{1
             'master'      => 'ASP.NET'               ,
             'sitemap'     => 'ASP.NET'               ,
             'cshtml'      => 'Razor'                 ,
+            'razor'       => 'Razor'                 , # Client-side Blazor
+            'nawk'        => 'awk'                   ,
+            'mawk'        => 'awk'                   ,
+            'gawk'        => 'awk'                   ,
+            'auk'         => 'awk'                   ,
             'awk'         => 'awk'                   ,
             'bash'        => 'Bourne Again Shell'    ,
-            'bas'         => 'Visual Basic'          ,
+            'bazel'       => 'Starlark'              ,
+            'BUILD'       => 'Bazel'                 ,
             'dxl'         => 'DOORS Extension Language',
             'bat'         => 'DOS Batch'             ,
             'BAT'         => 'DOS Batch'             ,
@@ -5995,41 +7610,69 @@ sub set_constants {                          # {{{1
             'CMD'         => 'DOS Batch'             ,
             'btm'         => 'DOS Batch'             ,
             'BTM'         => 'DOS Batch'             ,
+            'blade'       => 'Blade'                 ,
             'blade.php'   => 'Blade'                 ,
             'build.xml'   => 'Ant'                   ,
             'b'           => 'Brainfuck'             ,
             'bf'          => 'Brainfuck'             ,
             'brs'         => 'BrightScript'          ,
-            'bzl'         => 'Skylark'               ,
+            'bzl'         => 'Starlark'              ,
+            'btp'         => 'BizTalk Pipeline'      ,
+            'odx'         => 'BizTalk Orchestration' ,
+            'cpy'         => 'COBOL'                 ,
+            'cobol'       => 'COBOL'                 ,
+            'ccp'         => 'COBOL'                 ,
             'cbl'         => 'COBOL'                 ,
             'CBL'         => 'COBOL'                 ,
+            'idc'         => 'C'                     ,
+            'cats'        => 'C'                     ,
             'c'           => 'C'                     ,
+            'tpp'         => 'C++'                   ,
+            'tcc'         => 'C++'                   ,
+            'ipp'         => 'C++'                   ,
+            'inl'         => 'C++'                   ,
+            'h++'         => 'C++'                   ,
             'C'           => 'C++'                   ,
             'cc'          => 'C++'                   ,
             'c++'         => 'C++'                   ,
             'ccs'         => 'CCS'                   ,
             'cfc'         => 'ColdFusion CFScript'   ,
+            'cfml'        => 'ColdFusion'            ,
             'cfm'         => 'ColdFusion'            ,
             'chpl'        => 'Chapel'                ,
             'cl'          => 'Lisp/OpenCL'           ,
+            'riemann.config'=> 'Clojure'               ,
+            'hic'         => 'Clojure'               ,
+            'cljx'        => 'Clojure'               ,
+            'cljscm'      => 'Clojure'               ,
+            'cljs.hl'     => 'Clojure'               ,
+            'cl2'         => 'Clojure'               ,
+            'boot'        => 'Clojure'               ,
             'clj'         => 'Clojure'               ,
             'cljs'        => 'ClojureScript'         ,
             'cljc'        => 'ClojureC'              ,
-            'cls'         => 'Visual Basic'          , # also Apex Class
+            'cls'         => 'Visual Basic/TeX/Apex Class' ,
+            'cmake.in'    => 'CMake'                 ,
             'CMakeLists.txt' => 'CMake'              ,
             'cmake'       => 'CMake'                 ,
             'cob'         => 'COBOL'                 ,
             'COB'         => 'COBOL'                 ,
+            'iced'        => 'CoffeeScript'          ,
+            'cjsx'        => 'CoffeeScript'          ,
+            'cakefile'    => 'CoffeeScript'          ,
+            '_coffee'     => 'CoffeeScript'          ,
             'coffee'      => 'CoffeeScript'          ,
             'component'   => 'Visualforce Component' ,
             'cpp'         => 'C++'                   ,
             'CPP'         => 'C++'                   ,
             'cr'          => 'Crystal'               ,
             'cs'          => 'C#/Smalltalk'          ,
+            'designer.cs' => 'C# Designer'           ,
+            'cake'        => 'Cake Build Script'     ,
             'csh'         => 'C Shell'               ,
             'cson'        => 'CSON'                  ,
             'css'         => "CSS"                   ,
-            'ctl'         => 'Visual Basic'          ,
+            'csv'         => "CSV"                   ,
             'cu'          => 'CUDA'                  ,
             'cuh'         => 'CUDA'                  , # CUDA header file
             'cxx'         => 'C++'                   ,
@@ -6039,14 +7682,27 @@ sub set_constants {                          # {{{1
             'da'          => 'DAL'                   ,
             'dart'        => 'Dart'                  ,
             'def'         => 'Windows Module Definition',
+            'dhall'       => 'dhall'                 ,
+            'dt'          => 'DIET'                  ,
+            'patch'       => 'diff'                  ,
             'diff'        => 'diff'                  ,
             'dmap'        => 'NASTRAN DMAP'          ,
+            'sthlp'       => 'Stata'                 ,
+            'matah'       => 'Stata'                 ,
+            'mata'        => 'Stata'                 ,
+            'ihlp'        => 'Stata'                 ,
+            'doh'         => 'Stata'                 ,
+            'ado'         => 'Stata'                 ,
             'do'          => 'Stata'                 ,
             'DO'          => 'Stata'                 ,
+            'Dockerfile'  => 'Dockerfile'            ,
+            'dockerfile'  => 'Dockerfile'            ,
+            'pascal'      => 'Pascal'                ,
+            'lpr'         => 'Pascal'                ,
+            'dfm'         => 'Delphi Form'           ,
             'dpr'         => 'Pascal'                ,
             'dita'        => 'DITA'                  ,
             'drl'         => 'Drools'                ,
-            'dsr'         => 'Visual Basic'          ,
             'dtd'         => 'DTD'                   ,
             'ec'          => 'C'                     ,
             'ecpp'        => 'ECPP'                  ,
@@ -6055,12 +7711,22 @@ sub set_constants {                          # {{{1
             'elm'         => 'Elm'                   ,
             'exs'         => 'Elixir'                ,
             'ex'          => 'Elixir'                ,
+            'ecr'         => 'Embedded Crystal'      ,
+            'ejs'         => 'EJS'                   ,
             'erb'         => 'ERB'                   ,
             'ERB'         => 'ERB'                   ,
+            'yrl'         => 'Erlang'                ,
+            'xrl'         => 'Erlang'                ,
+            'rebar.lock'  => 'Erlang'                ,
+            'rebar.config.lock'=> 'Erlang'           ,
+            'rebar.config'=> 'Erlang'                ,
+            'emakefile'   => 'Erlang'                ,
+            'app.src'     => 'Erlang'                ,
             'erl'         => 'Erlang'                ,
             'exp'         => 'Expect'                ,
             '4th'         => 'Forth'                 ,
             'fish'        => 'Fish Shell'            ,
+            'fnl'         => 'Fennel'                ,
             'forth'       => 'Forth'                 ,
             'fr'          => 'Forth'                 ,
             'frt'         => 'Forth'                 ,
@@ -6086,30 +7752,47 @@ sub set_constants {                          # {{{1
             'FTN'         => 'Fortran 77'            ,
             'fmt'         => 'Oracle Forms'          ,
             'focexec'     => 'Focus'                 ,
-            'frm'         => 'Visual Basic'          ,
             'fs'          => 'F#/Forth'              ,
             'fsi'         => 'F#'                    ,
             'fsx'         => 'F# Script'             ,
+            'fxml'        => 'FXML'                  ,
             'gnumakefile' => 'make'                  ,
             'Gnumakefile' => 'make'                  ,
             'gd'          => 'GDScript'              ,
+            'vshader'     => 'GLSL'                  ,
+            'vsh'         => 'GLSL'                  ,
+            'vrx'         => 'GLSL'                  ,
+            'gshader'     => 'GLSL'                  ,
+            'glslv'       => 'GLSL'                  ,
+            'geo'         => 'GLSL'                  ,
+            'fshader'     => 'GLSL'                  ,
+            'fsh'         => 'GLSL'                  ,
+            'frg'         => 'GLSL'                  ,
+            'fp'          => 'GLSL'                  ,
             'glsl'        => 'GLSL'                  ,
+            'graphqls'    => 'GraphQL'               ,
             'gql'         => 'GraphQL'               ,
             'graphql'     => 'GraphQL'               ,
-			'vert'        => 'GLSL'                  ,
-			'tesc'        => 'GLSL'                  ,
-			'tese'        => 'GLSL'                  ,
-			'geom'        => 'GLSL'                  ,
-			'feature'     => 'Cucumber'              ,
-			'frag'        => 'GLSL'                  ,
-			'comp'        => 'GLSL'                  ,
+            'vert'        => 'GLSL'                  ,
+            'tesc'        => 'GLSL'                  ,
+            'tese'        => 'GLSL'                  ,
+            'geom'        => 'GLSL'                  ,
+            'feature'     => 'Cucumber'              ,
+            'frag'        => 'GLSL'                  ,
+            'comp'        => 'GLSL'                  ,
             'g'           => 'ANTLR Grammar'         ,
             'g4'          => 'ANTLR Grammar'         ,
+            'gleam'       => 'Gleam'                 ,
             'go'          => 'Go'                    ,
             'gsp'         => 'Grails'                ,
+            'jenkinsfile' => 'Groovy'                ,
+            'gvy'         => 'Groovy'                ,
+            'gtpl'        => 'Groovy'                ,
+            'grt'         => 'Groovy'                ,
             'groovy'      => 'Groovy'                ,
             'gant'        => 'Groovy'                ,
-            'gradle'      => 'Groovy'                ,
+            'gradle'      => 'Gradle'                ,
+            'gradle.kts'  => 'Gradle'                ,
             'h'           => 'C/C++ Header'          ,
             'H'           => 'C/C++ Header'          ,
             'hh'          => 'C/C++ Header'          ,
@@ -6117,78 +7800,169 @@ sub set_constants {                          # {{{1
             'hxx'         => 'C/C++ Header'          ,
             'hb'          => 'Harbour'               ,
             'hrl'         => 'Erlang'                ,
+            'hsc'         => 'Haskell'               ,
             'hs'          => 'Haskell'               ,
+            'tfvars'      => 'HCL'                   ,
+            'hcl'         => 'HCL'                   ,
+            'tf'          => 'HCL'                   ,
+            'nomad'       => 'HCL'                   ,
+            'hlsli'       => 'HLSL'                  ,
+            'fxh'         => 'HLSL'                  ,
             'hlsl'        => 'HLSL'                  ,
             'shader'      => 'HLSL'                  ,
             'cg'          => 'HLSL'                  ,
             'cginc'       => 'HLSL'                  ,
+            'haml.deface' => 'Haml'                  ,
             'haml'        => 'Haml'                  ,
             'handlebars'  => 'Handlebars'            ,
             'hbs'         => 'Handlebars'            ,
+            'hxsl'        => 'Haxe'                  ,
             'hx'          => 'Haxe'                  ,
+            'hoon'        => 'Hoon'                  ,
+            'xht'         => 'HTML'                  ,
+            'html.hl'     => 'HTML'                  ,
             'htm'         => 'HTML'                  ,
             'html'        => 'HTML'                  ,
             'i3'          => 'Modula3'               ,
             'ice'         => 'Slice'                 ,
             'icl'         => 'Clean'                 ,
             'dcl'         => 'Clean'                 ,
+            'dlm'         => 'IDL'                   ,
             'idl'         => 'IDL'                   ,
             'idr'         => 'Idris'                 ,
             'lidr'        => 'Literate Idris'        ,
+            'imba'        => 'Imba'                  ,
+            'prefs'       => 'INI'                   ,
+            'lektorproject'=> 'INI'                  ,
+            'buildozer.spec'=> 'INI'                 ,
             'ini'         => 'INI'                   ,
             'ism'         => 'InstallShield'         ,
+            'ipl'         => 'IPL'                   ,
             'pro'         => 'IDL/Qt Project/Prolog/ProGuard' ,
             'ig'          => 'Modula3'               ,
             'il'          => 'SKILL'                 ,
             'ils'         => 'SKILL++'               ,
             'inc'         => 'PHP/Pascal'            , # might be PHP or Pascal
+            'inl'         => 'C++'                   ,
             'ino'         => 'Arduino Sketch'        ,
+            'ipf'         => 'Igor Pro'              ,
             'pde'         => 'Arduino Sketch'        , # pre 1.0
             'itk'         => 'Tcl/Tk'                ,
             'java'        => 'Java'                  ,
             'jcl'         => 'JCL'                   , # IBM Job Control Lang.
             'jl'          => 'Lisp/Julia'            ,
+            'xsjslib'     => 'JavaScript'            ,
+            'xsjs'        => 'JavaScript'            ,
+            'ssjs'        => 'JavaScript'            ,
+            'sjs'         => 'JavaScript'            ,
+            'pac'         => 'JavaScript'            ,
+            'njs'         => 'JavaScript'            ,
+            'mjs'         => 'JavaScript'            ,
+            'jss'         => 'JavaScript'            ,
+            'jsm'         => 'JavaScript'            ,
+            'jsfl'        => 'JavaScript'            ,
+            'jscad'       => 'JavaScript'            ,
+            'jsb'         => 'JavaScript'            ,
+            'jakefile'    => 'JavaScript'            ,
+            'jake'        => 'JavaScript'            ,
+            'bones'       => 'JavaScript'            ,
+            '_js'         => 'JavaScript'            ,
             'js'          => 'JavaScript'            ,
             'es6'         => 'JavaScript'            ,
             'jsf'         => 'JavaServer Faces'      ,
             'jsx'         => 'JSX'                   ,
             'xhtml'       => 'XHTML'                 ,
+            'jinja'       => 'Jinja Template'        ,
+            'jinja2'      => 'Jinja Template'        ,
+            'yyp'         => 'JSON'                  ,
+            'webmanifest' => 'JSON'                  ,
+            'webapp'      => 'JSON'                  ,
+            'topojson'    => 'JSON'                  ,
+            'tfstate.backup'=> 'JSON'                  ,
+            'tfstate'     => 'JSON'                  ,
+            'mcmod.info'  => 'JSON'                  ,
+            'mcmeta'      => 'JSON'                  ,
+            'json-tmlanguage'=> 'JSON'                  ,
+            'jsonl'       => 'JSON'                  ,
+            'har'         => 'JSON'                  ,
+            'gltf'        => 'JSON'                  ,
+            'geojson'     => 'JSON'                  ,
+            'composer.lock'=> 'JSON'                  ,
+            'avsc'        => 'JSON'                  ,
+            'watchmanconfig'=> 'JSON'                  ,
+            'tern-project'=> 'JSON'                  ,
+            'tern-config' => 'JSON'                  ,
+            'htmlhintrc'  => 'JSON'                  ,
+            'arcconfig'   => 'JSON'                  ,
             'json'        => 'JSON'                  ,
+            'json5'       => 'JSON5'                 ,
             'jsp'         => 'JSP'                   , # Java server pages
             'jspf'        => 'JSP'                   , # Java server pages
+            'junos'       => 'Juniper Junos'         ,
             'vm'          => 'Velocity Template Language' ,
             'ksc'         => 'Kermit'                ,
             'ksh'         => 'Korn Shell'            ,
+            'ktm'         => 'Kotlin'                ,
             'kt'          => 'Kotlin'                ,
             'kts'         => 'Kotlin'                ,
+            'hlean'       => 'Lean'                  ,
+            'lean'        => 'Lean'                  ,
             'lhs'         => 'Haskell'               ,
+            'lex'         => 'lex'                   ,
             'l'           => 'lex'                   ,
             'less'        => 'LESS'                  ,
             'lfe'         => 'LFE'                   ,
             'liquid'      => 'liquid'                ,
             'lsp'         => 'Lisp'                  ,
             'lisp'        => 'Lisp'                  ,
+            'll'          => 'LLVM IR'               ,
             'lgt'         => 'Logtalk'               ,
             'logtalk'     => 'Logtalk'               ,
+            'wlua'        => 'Lua'                   ,
+            'rbxs'        => 'Lua'                   ,
+            'pd_lua'      => 'Lua'                   ,
+            'p8'          => 'Lua'                   ,
+            'nse'         => 'Lua'                   ,
             'lua'         => 'Lua'                   ,
             'm3'          => 'Modula3'               ,
             'm4'          => 'm4'                    ,
             'makefile'    => 'make'                  ,
             'Makefile'    => 'make'                  ,
+            'mao'         => 'Mako'                  ,
             'mako'        => 'Mako'                  ,
+            'workbook'    => 'Markdown'              ,
+            'ronn'        => 'Markdown'              ,
+            'mkdown'      => 'Markdown'              ,
+            'mkdn'        => 'Markdown'              ,
+            'mkd'         => 'Markdown'              ,
+            'mdx'         => 'Markdown'              ,
+            'mdwn'        => 'Markdown'              ,
+            'mdown'       => 'Markdown'              ,
+            'markdown'    => 'Markdown'              ,
+            'contents.lr' => 'Markdown'              ,
             'md'          => 'Markdown'              ,
             'mc'          => 'Windows Message File'  ,
             'met'         => 'Teamcenter met'        ,
             'mg'          => 'Modula3'               ,
+            'mojom'       => 'Mojo'                  ,
+            'meson.build' => 'Meson'                 ,
             'mk'          => 'make'                  ,
 #           'mli'         => 'ML'                    , # ML not implemented
 #           'ml'          => 'ML'                    ,
+            'ml4'         => 'OCaml'                 ,
+            'eliomi'      => 'OCaml'                 ,
+            'eliom'       => 'OCaml'                 ,
             'ml'          => 'OCaml'                 ,
             'mli'         => 'OCaml'                 ,
             'mly'         => 'OCaml'                 ,
             'mll'         => 'OCaml'                 ,
-            'm'           => 'MATLAB/Mathematica/Objective C/MUMPS/Mercury' ,
-            'mm'          => 'Objective C++'         ,
+            'm'           => 'MATLAB/Mathematica/Objective-C/MUMPS/Mercury' ,
+            'mm'          => 'Objective-C++'         ,
+            'msg'         => 'Gencat NLS'            ,
+            'nbp'         => 'Mathematica'           ,
+            'mathematica' => 'Mathematica'           ,
+            'ma'          => 'Mathematica'           ,
+            'cdf'         => 'Mathematica'           ,
             'mt'          => 'Mathematica'           ,
             'wl'          => 'Mathematica'           ,
             'wlt'         => 'Mathematica'           ,
@@ -6197,11 +7971,20 @@ sub set_constants {                          # {{{1
             'csproj'      => 'MSBuild script'        ,
             'vcproj'      => 'MSBuild script'        ,
             'wixproj'     => 'MSBuild script'        ,
-            'vbproj'      => 'MSBuild script'        ,
+            'btproj'      => 'MSBuild script'        ,
+            'msbuild'     => 'MSBuild script'        ,
+            'sln'         => 'Visual Studio Solution',
             'mps'         => 'MUMPS'                 ,
             'mth'         => 'Teamcenter mth'        ,
             'n'           => 'Nemerle'               ,
+            'nims'        => 'Nim'                   ,
+            'nimrod'      => 'Nim'                   ,
+            'nimble'      => 'Nim'                   ,
+            'nim.cfg'     => 'Nim'                   ,
             'nim'         => 'Nim'                   ,
+            'nix'         => 'Nix'                   ,
+            'nut'         => 'Squirrel'              ,
+            'odin'        => 'Odin'                  ,
             'oscript'     => 'LiveLink OScript'      ,
             'bod'         => 'Oracle PL/SQL'         ,
             'spc'         => 'Oracle PL/SQL'         ,
@@ -6212,9 +7995,23 @@ sub set_constants {                          # {{{1
             'page'        => 'Visualforce Page'      ,
             'pas'         => 'Pascal'                ,
             'pcc'         => 'C++'                   , # Oracle C++ preprocessor
+            'rexfile'     => 'Perl'                  ,
+            'psgi'        => 'Perl'                  ,
+            'ph'          => 'Perl'                  ,
+            'makefile.pl' => 'Perl'                  ,
+            'cpanfile'    => 'Perl'                  ,
+            'al'          => 'Perl'                  ,
+            'ack'         => 'Perl'                  ,
             'perl'        => 'Perl'                  ,
             'pfo'         => 'Fortran 77'            ,
             'pgc'         => 'C'                     , # Postgres embedded C/C++
+            'phpt'        => 'PHP'                   ,
+            'phps'        => 'PHP'                   ,
+            'phakefile'   => 'PHP'                   ,
+            'ctp'         => 'PHP'                   ,
+            'aw'          => 'PHP'                   ,
+            'php_cs.dist' => 'PHP'                   ,
+            'php_cs'      => 'PHP'                   ,
             'php3'        => 'PHP'                   ,
             'php4'        => 'PHP'                   ,
             'php5'        => 'PHP'                   ,
@@ -6223,49 +8020,130 @@ sub set_constants {                          # {{{1
             'pig'         => 'Pig Latin'             ,
             'plh'         => 'Perl'                  ,
             'pl'          => 'Perl/Prolog'           ,
-            'p6'          => 'Perl/Prolog'           ,
             'PL'          => 'Perl/Prolog'           ,
+            'p6'          => 'Raku/Prolog'           ,
+            'P6'          => 'Raku/Prolog'           ,
             'plx'         => 'Perl'                  ,
             'pm'          => 'Perl'                  ,
-            'pm6'         => 'Perl'                  ,
+            'pm6'         => 'Raku'                  ,
+            'raku'        => 'Raku'                  ,
+            'rakumod'     => 'Raku'                  ,
             'pom.xml'     => 'Maven'                 ,
             'pom'         => 'Maven'                 ,
+            'yap'         => 'Prolog'                ,
+            'prolog'      => 'Prolog'                ,
             'P'           => 'Prolog'                ,
             'p'           => 'Pascal'                ,
             'pp'          => 'Pascal/Puppet'         ,
+            'viw'         => 'SQL'                   ,
+            'udf'         => 'SQL'                   ,
+            'tab'         => 'SQL'                   ,
+            'mysql'       => 'SQL'                   ,
+            'cql'         => 'SQL'                   ,
             'psql'        => 'SQL'                   ,
+            'xpy'         => 'Python'                ,
+            'wsgi'        => 'Python'                ,
+            'wscript'     => 'Python'                ,
+            'workspace'   => 'Python'                ,
+            'tac'         => 'Python'                ,
+            'snakefile'   => 'Python'                ,
+            'sconstruct'  => 'Python'                ,
+            'sconscript'  => 'Python'                ,
+            'pyt'         => 'Python'                ,
+            'pyp'         => 'Python'                ,
+            'pyi'         => 'Python'                ,
+            'pyde'        => 'Python'                ,
+            'py3'         => 'Python'                ,
+            'lmi'         => 'Python'                ,
+            'gypi'        => 'Python'                ,
+            'gyp'         => 'Python'                ,
+            'build.bazel' => 'Python'                ,
+            'buck'        => 'Python'                ,
+            'gclient'     => 'Python'                ,
             'py'          => 'Python'                ,
+            'pyw'         => 'Python'                ,
+            'ipynb'       => 'Jupyter Notebook'      ,
             'pyj'         => 'RapydScript'           ,
+            'pxi'         => 'Cython'                ,
+            'pxd'         => 'Cython'                ,
             'pyx'         => 'Cython'                ,
+            'qbs'         => 'QML'                   ,
             'qml'         => 'QML'                   ,
+            'watchr'      => 'Ruby'                  ,
+            'vagrantfile' => 'Ruby'                  ,
+            'thorfile'    => 'Ruby'                  ,
+            'thor'        => 'Ruby'                  ,
+            'snapfile'    => 'Ruby'                  ,
+            'ru'          => 'Ruby'                  ,
+            'rbx'         => 'Ruby'                  ,
+            'rbw'         => 'Ruby'                  ,
+            'rbuild'      => 'Ruby'                  ,
+            'rabl'        => 'Ruby'                  ,
+            'puppetfile'  => 'Ruby'                  ,
+            'podfile'     => 'Ruby'                  ,
+            'mspec'       => 'Ruby'                  ,
+            'mavenfile'   => 'Ruby'                  ,
+            'jbuilder'    => 'Ruby'                  ,
+            'jarfile'     => 'Ruby'                  ,
+            'guardfile'   => 'Ruby'                  ,
+            'god'         => 'Ruby'                  ,
+            'gemspec'     => 'Ruby'                  ,
+            'gemfile.lock'=> 'Ruby'                  ,
+            'gemfile'     => 'Ruby'                  ,
+            'fastfile'    => 'Ruby'                  ,
+            'eye'         => 'Ruby'                  ,
+            'deliverfile' => 'Ruby'                  ,
+            'dangerfile'  => 'Ruby'                  ,
+            'capfile'     => 'Ruby'                  ,
+            'buildfile'   => 'Ruby'                  ,
+            'builder'     => 'Ruby'                  ,
+            'brewfile'    => 'Ruby'                  ,
+            'berksfile'   => 'Ruby'                  ,
+            'appraisals'  => 'Ruby'                  ,
+            'pryrc'       => 'Ruby'                  ,
+            'irbrc'       => 'Ruby'                  ,
             'rb'          => 'Ruby'                  ,
+            'podspec'     => 'Ruby'                  ,
             'rake'        => 'Ruby'                  ,
          #  'resx'        => 'ASP.NET'               ,
             'rex'         => 'Oracle Reports'        ,
+            'pprx'        => 'Rexx'                  ,
             'rexx'        => 'Rexx'                  ,
             'rhtml'       => 'Ruby HTML'             ,
+            'rs.in'       => 'Rust'                  ,
             'rs'          => 'Rust'                  ,
+            'rst.txt'     => 'reStructuredText'      ,
+            'rest.txt'    => 'reStructuredText'      ,
+            'rest'        => 'reStructuredText'      ,
+            'rst'         => 'reStructuredText'      ,
             's'           => 'Assembly'              ,
             'S'           => 'Assembly'              ,
             'SCA'         => 'Visual Fox Pro'        ,
             'sca'         => 'Visual Fox Pro'        ,
+            'sbt'         => 'Scala'                 ,
+            'kojo'        => 'Scala'                 ,
             'scala'       => 'Scala'                 ,
             'sbl'         => 'Softbridge Basic'      ,
             'SBL'         => 'Softbridge Basic'      ,
             'sed'         => 'sed'                   ,
             'ses'         => 'Patran Command Language'   ,
+            'sp'          => 'SparForte'             ,
             'sol'         => 'Solidity'              ,
             'pcl'         => 'Patran Command Language'   ,
             'pl1'         => 'PL/I'                  ,
             'plm'         => 'PL/M'                  ,
             'lit'         => 'PL/M'                  ,
+            'puml'        => 'PlantUML'              ,
+            'properties'  => 'Properties'            ,
             'po'          => 'PO File'               ,
+            'pbt'         => 'PowerBuilder'          ,
             'sra'         => 'PowerBuilder'          ,
             'srf'         => 'PowerBuilder'          ,
             'srm'         => 'PowerBuilder'          ,
             'srs'         => 'PowerBuilder'          ,
             'sru'         => 'PowerBuilder'          ,
             'srw'         => 'PowerBuilder'          ,
+            'jade'        => 'Pug'                   ,
             'pug'         => 'Pug'                   ,
             'purs'        => 'PureScript'            ,
             'prefab'      => 'Unity-Prefab'          ,
@@ -6274,19 +8152,32 @@ sub set_constants {                          # {{{1
             'ps1'         => 'PowerShell'            ,
             'psd1'        => 'PowerShell'            ,
             'psm1'        => 'PowerShell'            ,
+            'rsx'         => 'R'                     ,
+            'rd'          => 'R'                     ,
+            'expr-dist'   => 'R'                     ,
+            'rprofile'    => 'R'                     ,
             'R'           => 'R'                     ,
             'r'           => 'R'                     ,
             'raml'        => 'RAML'                  ,
+            'ring'        => 'Ring'                  ,
+            'rh'          => 'Ring'                  ,
+            'rform'       => 'Ring'                  ,
+            'rktd'        => 'Racket'                ,
             'rkt'         => 'Racket'                ,
             'rktl'        => 'Racket'                ,
+            'Rmd'         => 'Rmd'                   ,
+            're'          => 'ReasonML'              ,
+            'rei'         => 'ReasonML'              ,
+            'res'         => 'ReScript'              ,
+            'resi'        => 'ReScript'              ,
             'scrbl'       => 'Racket'                ,
+            'sps'         => 'Scheme'                ,
             'sc'          => 'Scheme'                ,
             'ss'          => 'Scheme'                ,
             'scm'         => 'Scheme'                ,
             'sch'         => 'Scheme'                ,
-            'sls'         => 'Scheme'                ,
+            'sls'         => 'Scheme/SaltStack'      ,
             'sld'         => 'Scheme'                ,
-            'tsv'         => 'RobotFramework'        ,
             'robot'       => 'RobotFramework'        ,
             'rc'          => 'Windows Resource File' ,
             'rc2'         => 'Windows Resource File' ,
@@ -6307,26 +8198,58 @@ sub set_constants {                          # {{{1
             'spc.sql'     => 'SQL Stored Procedure'  ,
             'udf.sql'     => 'SQL Stored Procedure'  ,
             'data.sql'    => 'SQL Data'              ,
+            'sss'         => 'SugarSS'               ,
             'st'          => 'Smalltalk'             ,
             'styl'        => 'Stylus'                ,
+            'i'           => 'SWIG'                  ,
+            'svelte'      => 'Svelte'                ,
             'sv'          => 'Verilog-SystemVerilog' ,
             'svh'         => 'Verilog-SystemVerilog' ,
+            'svg'         => 'SVG'                   ,
+            'SVG'         => 'SVG'                   ,
             'v'           => 'Verilog-SystemVerilog/Coq' ,
             'tcl'         => 'Tcl/Tk'                ,
             'tcsh'        => 'C Shell'               ,
             'tk'          => 'Tcl/Tk'                ,
+            'mkvi'        => 'TeX'                   ,
+            'mkiv'        => 'TeX'                   ,
+            'mkii'        => 'TeX'                   ,
+            'ltx'         => 'TeX'                   ,
+            'lbx'         => 'TeX'                   ,
+            'ins'         => 'TeX'                   ,
+            'cbx'         => 'TeX'                   ,
+            'bib'         => 'TeX'                   ,
+            'bbx'         => 'TeX'                   ,
+            'aux'         => 'TeX'                   ,
             'tex'         => 'TeX'                   , # TeX, LaTex, MikTex, ..
             'toml'        => 'TOML'                  ,
             'sty'         => 'TeX'                   ,
 #           'cls'         => 'TeX'                   ,
             'dtx'         => 'TeX'                   ,
             'bst'         => 'TeX'                   ,
+            'tres'        => 'Godot Resource'        ,
+            'tscn'        => 'Godot Scene'           ,
+            'thrift'      => 'Thrift'                ,
             'tpl'         => 'Smarty'                ,
             'trigger'     => 'Apex Trigger'          ,
             'ttcn'        => 'TTCN'                  ,
             'ttcn2'       => 'TTCN'                  ,
             'ttcn3'       => 'TTCN'                  ,
             'ttcnpp'      => 'TTCN'                  ,
+            'sdl'         => 'TNSDL'                 ,
+            'ssc'         => 'TNSDL'                 ,
+            'sdt'         => 'TNSDL'                 ,
+            'spd'         => 'TNSDL'                 ,
+            'sst'         => 'TNSDL'                 ,
+            'rou'         => 'TNSDL'                 ,
+            'cin'         => 'TNSDL'                 ,
+            'cii'         => 'TNSDL'                 ,
+            'interface'   => 'TNSDL'                 ,
+            'in1'         => 'TNSDL'                 ,
+            'in2'         => 'TNSDL'                 ,
+            'in3'         => 'TNSDL'                 ,
+            'in4'         => 'TNSDL'                 ,
+            'inf'         => 'TNSDL'                 ,
             'tpd'         => 'TITAN Project File Information',
             'ts'          => 'TypeScript/Qt Linguist',
             'tsx'         => 'TypeScript'            ,
@@ -6336,22 +8259,130 @@ sub set_constants {                          # {{{1
             'glade'       => 'Glade'                 ,
             'vala'        => 'Vala'                  ,
             'vapi'        => 'Vala Header'           ,
+            'vhw'         => 'VHDL'                  ,
+            'vht'         => 'VHDL'                  ,
+            'vhs'         => 'VHDL'                  ,
+            'vho'         => 'VHDL'                  ,
+            'vhi'         => 'VHDL'                  ,
+            'vhf'         => 'VHDL'                  ,
             'vhd'         => 'VHDL'                  ,
             'VHD'         => 'VHDL'                  ,
             'vhdl'        => 'VHDL'                  ,
             'VHDL'        => 'VHDL'                  ,
-            'vba'         => 'Visual Basic'          ,
-            'VBA'         => 'Visual Basic'          ,
-         #  'vbp'         => 'Visual Basic'          , # .vbp - autogenerated
-            'vb'          => 'Visual Basic'          ,
-            'VB'          => 'Visual Basic'          ,
-         #  'vbw'         => 'Visual Basic'          , # .vbw - autogenerated
-            'vbs'         => 'Visual Basic'          ,
-            'VBS'         => 'Visual Basic'          ,
+            'bas'         => 'Visual Basic'          ,
+            'BAS'         => 'Visual Basic'          ,
+            'ctl'         => 'Visual Basic'          ,
+            'dsr'         => 'Visual Basic'          ,
+            'frm'         => 'Visual Basic'          ,
+            'frx'         => 'Visual Basic'          ,
+            'FRX'         => 'Visual Basic'          ,
+            'vba'         => 'VB for Applications'   ,
+            'VBA'         => 'VB for Applications'   ,
+            'vbhtml'      => 'Visual Basic'          ,
+            'VBHTML'      => 'Visual Basic'          ,
+            'vbproj'      => 'Visual Basic .NET'     ,
+            'vbp'         => 'Visual Basic'          , # .vbp - autogenerated
+            'vbs'         => 'Visual Basic Script'   ,
+            'VBS'         => 'Visual Basic Script'   ,
+            'vb'          => 'Visual Basic .NET'     ,
+            'VB'          => 'Visual Basic .NET'     ,
+            'vbw'         => 'Visual Basic'          , # .vbw - autogenerated
             'vue'         => 'Vuejs Component'       ,
             'webinfo'     => 'ASP.NET'               ,
+            'wsdl'        => 'Web Services Description',
+            'x'           => 'Logos'                 ,
+            'xm'          => 'Logos'                 ,
             'xmi'         => 'XMI'                   ,
             'XMI'         => 'XMI'                   ,
+            'zcml'        => 'XML'                   ,
+            'xul'         => 'XML'                   ,
+            'xspec'       => 'XML'                   ,
+            'xproj'       => 'XML'                   ,
+            'xml.dist'    => 'XML'                   ,
+            'xliff'       => 'XML'                   ,
+            'xlf'         => 'XML'                   ,
+            'xib'         => 'XML'                   ,
+            'xacro'       => 'XML'                   ,
+            'x3d'         => 'XML'                   ,
+            'wsf'         => 'XML'                   ,
+            'web.release.config'=> 'XML'             ,
+            'web.debug.config'=> 'XML'               ,
+            'web.config'  => 'XML'                   ,
+            'wxml'        => 'WXML'                  ,
+            'wxss'        => 'WXSS'                  ,
+            'vxml'        => 'XML'                   ,
+            'vstemplate'  => 'XML'                   ,
+            'vssettings'  => 'XML'                   ,
+            'vsixmanifest'=> 'XML'                   ,
+            'vcxproj'     => 'XML'                   ,
+            'ux'          => 'XML'                   ,
+            'urdf'        => 'XML'                   ,
+            'tmtheme'     => 'XML'                   ,
+            'tmsnippet'   => 'XML'                   ,
+            'tmpreferences'=> 'XML'                  ,
+            'tmlanguage'  => 'XML'                   ,
+            'tml'         => 'XML'                   ,
+            'tmcommand'   => 'XML'                   ,
+            'targets'     => 'XML'                   ,
+            'sublime-snippet'=> 'XML'                   ,
+            'sttheme'     => 'XML'                   ,
+            'storyboard'  => 'XML'                   ,
+            'srdf'        => 'XML'                   ,
+            'shproj'      => 'XML'                   ,
+            'sfproj'      => 'XML'                   ,
+            'settings.stylecop'=> 'XML'                   ,
+            'scxml'       => 'XML'                   ,
+            'rss'         => 'XML'                   ,
+            'resx'        => 'XML'                   ,
+            'rdf'         => 'XML'                   ,
+            'pt'          => 'XML'                   ,
+            'psc1'        => 'XML'                   ,
+            'ps1xml'      => 'XML'                   ,
+            'props'       => 'XML'                   ,
+            'proj'        => 'XML'                   ,
+            'plist'       => 'XML'                   ,
+            'pkgproj'     => 'XML'                   ,
+            'packages.config'=> 'XML'                   ,
+            'osm'         => 'XML'                   ,
+            'odd'         => 'XML'                   ,
+            'nuspec'      => 'XML'                   ,
+            'nuget.config'=> 'XML'                   ,
+            'nproj'       => 'XML'                   ,
+            'ndproj'      => 'XML'                   ,
+            'natvis'      => 'XML'                   ,
+            'mjml'        => 'XML'                   ,
+            'mdpolicy'    => 'XML'                   ,
+            'launch'      => 'XML'                   ,
+            'kml'         => 'XML'                   ,
+            'jsproj'      => 'XML'                   ,
+            'jelly'       => 'XML'                   ,
+            'ivy'         => 'XML'                   ,
+            'iml'         => 'XML'                   ,
+            'grxml'       => 'XML'                   ,
+            'gmx'         => 'XML'                   ,
+            'fsproj'      => 'XML'                   ,
+            'filters'     => 'XML'                   ,
+            'dotsettings' => 'XML'                   ,
+            'dll.config'  => 'XML'                   ,
+            'ditaval'     => 'XML'                   ,
+            'ditamap'     => 'XML'                   ,
+            'depproj'     => 'XML'                   ,
+            'ct'          => 'XML'                   ,
+            'csl'         => 'XML'                   ,
+            'csdef'       => 'XML'                   ,
+            'cscfg'       => 'XML'                   ,
+            'cproject'    => 'XML'                   ,
+            'clixml'      => 'XML'                   ,
+            'ccxml'       => 'XML'                   ,
+            'ccproj'      => 'XML'                   ,
+            'builds'      => 'XML'                   ,
+            'axml'        => 'XML'                   ,
+            'app.config'  => 'XML'                   ,
+            'ant'         => 'XML'                   ,
+            'admx'        => 'XML'                   ,
+            'adml'        => 'XML'                   ,
+            'project'     => 'XML'                   ,
+            'classpath'   => 'XML'                   ,
             'xml'         => 'XML'                   ,
             'XML'         => 'XML'                   ,
             'mxml'        => 'MXML'                  ,
@@ -6360,11 +8391,17 @@ sub set_constants {                          # {{{1
             'vim'         => 'vim script'            ,
             'swift'       => 'Swift'                 ,
             'xaml'        => 'XAML'                  ,
+            'wast'        => 'WebAssembly'           ,
+            'wat'         => 'WebAssembly'           ,
             'wxs'         => 'WiX source'            ,
             'wxi'         => 'WiX include'           ,
             'wxl'         => 'WiX string localization' ,
+            'prw'         => 'xBase'                 ,
             'prg'         => 'xBase'                 ,
             'ch'          => 'xBase Header'          ,
+            'xqy'         => 'XQuery'                ,
+            'xqm'         => 'XQuery'                ,
+            'xql'         => 'XQuery'                ,
             'xq'          => 'XQuery'                ,
             'xquery'      => 'XQuery'                ,
             'xsd'         => 'XSD'                   ,
@@ -6373,9 +8410,23 @@ sub set_constants {                          # {{{1
             'XSLT'        => 'XSLT'                  ,
             'xsl'         => 'XSLT'                  ,
             'XSL'         => 'XSLT'                  ,
+            'xtend'       => 'Xtend'                 ,
+            'yacc'        => 'yacc'                  ,
             'y'           => 'yacc'                  ,
+            'yml.mysql'   => 'YAML'                  ,
+            'yaml-tmlanguage'=> 'YAML'                  ,
+            'syntax'      => 'YAML'                  ,
+            'sublime-syntax'=> 'YAML'                  ,
+            'rviz'        => 'YAML'                  ,
+            'reek'        => 'YAML'                  ,
+            'mir'         => 'YAML'                  ,
+            'glide.lock'  => 'YAML'                  ,
+            'gemrc'       => 'YAML'                  ,
+            'clang-tidy'  => 'YAML'                  ,
+            'clang-format'=> 'YAML'                  ,
             'yaml'        => 'YAML'                  ,
             'yml'         => 'YAML'                  ,
+            'zig'         => 'Zig'                   ,
             'zsh'         => 'zsh'                   ,
             );
 # 1}}}
@@ -6387,6 +8438,8 @@ sub set_constants {                          # {{{1
             'csh'      => 'C Shell'               ,
             'dmd'      => 'D'                     ,
             'dtrace'   => 'dtrace'                ,
+            'escript'  => 'Erlang'                ,
+            'groovy'   => 'Groovy'                ,
             'idl'      => 'IDL'                   ,
             'kermit'   => 'Kermit'                ,
             'ksh'      => 'Korn Shell'            ,
@@ -6394,7 +8447,6 @@ sub set_constants {                          # {{{1
             'make'     => 'make'                  ,
             'octave'   => 'Octave'                ,
             'perl5'    => 'Perl'                  ,
-            'perl6'    => 'Perl'                  ,
             'perl'     => 'Perl'                  ,
             'miniperl' => 'Perl'                  ,
             'php'      => 'PHP'                   ,
@@ -6406,6 +8458,12 @@ sub set_constants {                          # {{{1
             'python3.3'=> 'Python'                ,
             'python3.4'=> 'Python'                ,
             'python3.5'=> 'Python'                ,
+            'python3.6'=> 'Python'                ,
+            'python3.7'=> 'Python'                ,
+            'python3.8'=> 'Python'                ,
+            'perl6'    => 'Raku'                  ,
+            'raku'     => 'Raku'                  ,
+            'rakudo'   => 'Raku'                  ,
             'rexx'     => 'Rexx'                  ,
             'regina'   => 'Rexx'                  ,
             'ruby'     => 'Ruby'                  ,
@@ -6420,38 +8478,64 @@ sub set_constants {                          # {{{1
             );
 # 1}}}
 %{$rh_Language_by_File}      = (             # {{{1
-            'build.xml'      => 'Ant/XML'            ,
-            'CMakeLists.txt' => 'CMake'              ,
-            'Jamfile'        => 'Jam'                ,
-            'Jamrules'       => 'Jam'                ,
-            'Makefile'       => 'make'               ,
-            'makefile'       => 'make'               ,
-            'gnumakefile'    => 'make'               ,
-            'Gnumakefile'    => 'make'               ,
-            'pom.xml'        => 'Maven/XML'          ,
-            'Rakefile'       => 'Ruby'               ,
-            'rakefile'       => 'Ruby'               ,
-            'Dockerfile'     => 'Dockerfile'         ,
+            'build.xml'         => 'Ant/XML'            ,
+            'BUILD'             => 'Bazel'              ,
+            'WORKSPACE'         => 'Bazel'              ,
+            'CMakeLists.txt'    => 'CMake'              ,
+            'Jamfile'           => 'Jam'                ,
+            'Jamrules'          => 'Jam'                ,
+            'Makefile'          => 'make'               ,
+            'makefile'          => 'make'               ,
+            'meson.build'       => 'Meson'              ,
+            'gnumakefile'       => 'make'               ,
+            'Gnumakefile'       => 'make'               ,
+            'pom.xml'           => 'Maven/XML'          ,
+            'Rakefile'          => 'Ruby'               ,
+            'rakefile'          => 'Ruby'               ,
+            'Dockerfile'        => 'Dockerfile'         ,
+            'Dockerfile.m4'     => 'Dockerfile'         ,
+            'Dockerfile.cmake'  => 'Dockerfile'         ,
             );
 # 1}}}
-%{$rhaa_Filters_by_Language} = (             # {{{1
+%{$rh_Language_by_Prefix}     = (             # {{{1
+            'Dockerfile'        => 'Dockerfile'         ,
+            );
+# 1}}}
+%{$rhaa_Filters_by_Language} = (            # {{{1
     '(unknown)'          => [ ],
     'ABAP'               => [   [ 'remove_matches'      , '^\*'    ], ],
     'ActionScript'       => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C++'    ],
+                            ],
+    'Apex Class'         => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-#                               [ 'remove_matches'      , '^\s*//' ],
                             ],
-
     'ASP'                => [   [ 'remove_matches'      , '^\s*\47'], ],  # \47 = '
-    'ASP.NET'            => [   [ 'call_regexp_common'  , 'C'      ], ],
+    'ASP.NET'            => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C'      ],
+                                [ 'remove_between_general', '<%--', '--%>' ],
+                                [ 'remove_between_general', '<!--', '-->' ],
+                            ],
     'Ada'                => [   [ 'remove_matches'      , '^\s*--' ], ],
     'ADSO/IDSM'          => [   [ 'remove_matches'      , '^\s*\*[\+\!]' ], ],
+    'Agda'               => [   [ 'remove_haskell_comments', '>filename<' ], ],
     'AMPLE'              => [   [ 'remove_matches'      , '^\s*//' ], ],
+    'APL'                => [
+                                [ 'remove_matches'      , '^\s*⍝' ],
+                            ],
     'Ant/XML'            => [
                                 [ 'remove_html_comments',          ],
                                 [ 'call_regexp_common'  , 'HTML'   ],
                             ],
     'ANTLR Grammar'      => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
     'Ant'                => [
@@ -6459,20 +8543,24 @@ sub set_constants {                          # {{{1
                                 [ 'call_regexp_common'  , 'HTML'   ],
                             ],
     'Apex Trigger'       => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
-    'Arduino Sketch'     => [   # same as C
+    'Arduino Sketch'     => [ # Arduino IDE inserts problematic 0xA0 characters; strip them
+                                [ 'replace_regex' , '\xa0', " " ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'AsciiDoc'           => [
                                 [ 'remove_between_general', '////', '////' ],
                                 [ 'remove_matches'      , '^\s*\/\/'  ],
                             ],
     'AspectJ'            => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'Assembly'           => [
                                 [ 'remove_matches'      , '^\s*#'  ],
@@ -6482,13 +8570,13 @@ sub set_constants {                          # {{{1
                                 [ 'remove_matches'      , '^\s*\|' ],
                                 [ 'remove_matches'      , '^\s*!'  ],
                                 [ 'remove_matches'      , '^\s*--' ],
-                                [ 'remove_inline'       , '//.*$'  ],
                                 [ 'remove_inline'       , ';.*$'   ],
                                 [ 'remove_inline'       , '\@.*$'  ],
                                 [ 'remove_inline'       , '\|.*$'  ],
                                 [ 'remove_inline'       , '!.*$'   ],
                                 [ 'remove_inline'       , '#.*$'   ],
                                 [ 'remove_inline'       , '--.*$'  ],
+                                [ 'remove_matches'      , '^\*'    ],  # z/OS Assembly
                             ],
     'AutoHotkey'         => [
                                 [ 'remove_matches'      , '^\s*;'  ],
@@ -6498,6 +8586,10 @@ sub set_constants {                          # {{{1
                                 [ 'remove_matches'      , '^\s*#'  ],
                                 [ 'remove_inline'       , '#.*$'   ],
                             ],
+    'Bazel'              => [
+                                [ 'remove_matches'      , '^\s*#'  ],
+                                [ 'remove_inline'       , '#.*$'   ],
+                            ],
     'bc'                 => [
                                 [ 'remove_matches'      , '^\s*#'  ],
                                 [ 'remove_inline'       , '#.*$'   ],
@@ -6526,29 +8618,34 @@ sub set_constants {                          # {{{1
                                 [ 'remove_matches'      , '^\s*xml_markup.comment!'  ],
                             ],
     'C'                  => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-#                               [ 'remove_matches'      , '^\s*//' ], # C99
-                                [ 'remove_inline'       , '//.*$'  ], # C99
                             ],
     'Chapel'       => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'C++'                => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-#                               [ 'remove_matches'      , '^\s*//' ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'C/C++ Header'       => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-#                               [ 'remove_matches'      , '^\s*//' ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'Clean'              => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
-    'Clojure'            => [   [ 'remove_matches'      , '^\s*;'  ], ],
+    'Clojure'            => [
+                                [ 'remove_matches'      , '^\s*;'  ],
+                                [ 'remove_matches'      , '^\s*#_'  ],
+                            ],
     'ClojureScript'      => [   [ 'remove_matches'      , '^\s*;'  ], ],
     'ClojureC'           => [   [ 'remove_matches'      , '^\s*;'  ], ],
     'CMake'              => [
@@ -6560,11 +8657,13 @@ sub set_constants {                          # {{{1
                                 [ 'remove_inline'       , '#.*$'   ],
                             ],
     'CUDA'               => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_matches'      , '^\s*//' ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'Cython'             => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'remove_matches'      , '^\s*#'  ],
                                 [ 'docstring_to_C'                 ],
                                 [ 'call_regexp_common'  , 'C'      ],
@@ -6573,21 +8672,51 @@ sub set_constants {                          # {{{1
     'C#/Smalltalk' => [ [ 'die' ,  ], ], # never called
     'C#'                 => [
 #                               [ 'remove_matches'      , '^\s*//' ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
-    'CCS'                => [   [ 'call_regexp_common'  , 'C'      ], ],
-    'CSS'                => [   [ 'call_regexp_common'  , 'C'      ], ],
+    'C# Designer'        => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C++'    ],
+                            ],
+    'C# Generated'        => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C++'    ],
+                            ],
+    'Cake Build Script'  => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C++'    ],
+                            ],
+    'CCS'                => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C'      ],
+                            ],
+    'CSS'                => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C'      ],
+                            ],
+    'CSV'                => [  # comma separated value files have no comments;
+                                [ 'remove_matches'      , '^\s*$'  ],
+                            ], # included simply to allow diff's
     'COBOL'              => [   [ 'remove_cobol_comments',         ], ],
     'CoffeeScript'       => [
                                 [ 'remove_matches'      , '^\s*#'  ],
                                 [ 'remove_inline'       , '#.*$'   ],
                             ],
     'ColdFusion'         => [   [ 'remove_html_comments',          ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'HTML'   ], ],
     'ColdFusion CFScript'=> [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'Coq'                => [
                                 [ 'remove_between_general', '(*', '*)' ],
@@ -6602,19 +8731,31 @@ sub set_constants {                          # {{{1
                             ],
     'D/dtrace'           => [ [ 'die' ,          ], ], # never called
     'D'                  => [
-#                               [ 'remove_matches'      , '^\s*//' ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'rm_comments_in_strings', '"', '/+', '+/' ],
                                 [ 'remove_between_general', '/+', '+/' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'DAL'                => [
                                 [ 'remove_between_general', '[', ']', ],
                             ],
     'Dart'               => [
-#                               [ 'remove_matches'      , '^\s*//' ],
-                                [ 'remove_inline'       , '//.*$'  ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
+    'dhall'              => [   [ 'remove_haskell_comments', '>filename<' ], ],
+    'Delphi Form'        => [ # same as Pascal
+                                [ 'remove_between_regex', '\{[^$]', '}' ],
+                                [ 'remove_between_general', '(*', '*)' ],
+                                [ 'remove_matches'      , '^\s*//' ],
+                            ],
+    'DIET'               => [  # same as Pug
+                                [ 'remove_pug_block'    ,          ],
+                                [ 'remove_matches'      , '^\s*//' ],
+                                [ 'remove_inline'       , '//.*$'  ],
+                            ],
     # diff is kind of weird: anything but a space in the first column
     # will count as code, with the exception of #, ---, +++.  Spaces
     # in the first column denote context lines which aren't part of the
@@ -6627,14 +8768,18 @@ sub set_constants {                          # {{{1
                             ],
     'DITA'               => [
                                 [ 'remove_html_comments',          ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'HTML'   ],
                             ],
     'DOORS Extension Language' => [
-#                               [ 'remove_matches'      , '^\s*//' ],
-                                [ 'remove_inline'       , '//.*$'  ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
     'Drools'             => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
     'dtrace'             => [
@@ -6651,10 +8796,18 @@ sub set_constants {                          # {{{1
     'EEx'                => [
                                 [ 'remove_between_general', '<%#', '%>' ],
                             ],
+    'EJS'                => [
+                                [ 'remove_between_general', '<%#', '%>' ],
+                                [ 'remove_html_comments',          ],
+                            ],
     'Elm'                => [   [ 'remove_haskell_comments', '>filename<' ], ],
+    'Embedded Crystal'   => [
+                                [ 'remove_between_general', '<%#', '%>' ],
+                            ],
     'ERB'                => [
                                 [ 'remove_between_general', '<%#', '%>' ],
                             ],
+    'Gencat NLS'         => [   [ 'remove_matches'       , '^\$ .*$' ], ],
     'NASTRAN DMAP'       => [
                                 [ 'remove_matches'      , '^\s*\$' ],
                                 [ 'remove_inline'       , '\$.*$'  ],
@@ -6680,6 +8833,10 @@ sub set_constants {                          # {{{1
                                 [ 'remove_matches'      , '^\s*#'  ],
                                 [ 'remove_inline'       , '#.*$'   ],
                             ],
+    'Fennel'             => [
+                                [ 'remove_matches'      , '^\s*;'  ],
+                                [ 'remove_inline'       , ';.*$'   ],
+                            ],
     'Fish Shell'         => [
                                 [ 'remove_matches'      , '^\s*#'  ],
                                 [ 'remove_inline'       , '#.*$'   ],
@@ -6688,6 +8845,8 @@ sub set_constants {                          # {{{1
     'Forth'              => [
                                 [ 'remove_matches'      , '^\s*\\\\.*$'  ],
                                 [ 'Forth_paren_to_C'                 ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'      ],
                                 [ 'remove_inline'       , '\\\\.*$'  ],
                             ],
@@ -6710,6 +8869,10 @@ sub set_constants {                          # {{{1
     'Freemarker Template' => [
                                 [ 'remove_between_general', '<#--', '-->' ],
                             ],
+    'FXML'               => [
+                                [ 'remove_html_comments',          ],
+                                [ 'call_regexp_common'  , 'HTML'   ],
+                            ],
     'F#'                 => [
                                 [ 'remove_between_general', '(*', '*)' ],
                                 [ 'remove_matches'      , '^\s*//' ],
@@ -6718,6 +8881,12 @@ sub set_constants {                          # {{{1
                                 [ 'call_regexp_common'  , 'Pascal' ],
                                 [ 'remove_matches'      , '^\s*//' ],
                             ],
+    'Godot Scene'        => [
+                                [ 'remove_matches'      , '^\s*;'  ],
+                            ],
+    'Godot Resource'     => [
+                                [ 'remove_matches'      , '^\s*;'  ],
+                            ],
     'GDScript'           => [
                                 [ 'remove_matches'      , '^\s*#'  ],
                                 [ 'remove_inline'       , '#.*$'   ],
@@ -6726,19 +8895,40 @@ sub set_constants {                          # {{{1
                                 [ 'remove_html_comments',          ],
                                 [ 'call_regexp_common'  , 'HTML'   ],
                             ],
+    'Gleam'              => [
+                                [ 'remove_matches'      , '^\s*//' ],
+                                [ 'remove_inline'       , '//.*$'  ],
+                            ],
     'GLSL'               => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
     'Go'                 => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                                 [ 'remove_inline'       , '//.*$'  ],
                             ],
+    'Gradle'             => [ # same as Groovy
+                                [ 'remove_inline'       , '//.*$'  ],
+                                # separate /* inside quoted strings with two
+                                # concatenated strings split between / and *
+                                [ 'replace_between_regex', '(["\'])(.*?/)(\*.*?)\g1',
+                                  '(.*?)' , '"$1$2$1 + $1$3$1$4"', 0],
+                                [ 'rm_comments_in_strings', '"""', '/*', '*/', 1],
+                                [ 'rm_comments_in_strings', '"""', '//', '', 1],
+                                [ 'rm_comments_in_strings', "'''", '/*', '*/', 1],
+                                [ 'rm_comments_in_strings', "'''", '//', '', 1],
+                                [ 'call_regexp_common'  , 'C++'    ],
+                            ],
     'Grails'             => [
                                 [ 'remove_html_comments',          ],
                                 [ 'call_regexp_common'  , 'HTML'   ],
                                 [ 'remove_jsp_comments' ,          ],
-#                               [ 'remove_matches'      , '^\s*//' ],
                                 [ 'add_newlines'        ,          ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
     'GraphQL'            => [
@@ -6750,7 +8940,11 @@ sub set_constants {                          # {{{1
                                 # separate /* inside quoted strings with two
                                 # concatenated strings split between / and *
                                 [ 'replace_between_regex', '(["\'])(.*?/)(\*.*?)\g1',
-                                  '(.*?)' , '"$1$2$1 + $1$3$1$4"'],
+                                  '(.*?)' , '"$1$2$1 + $1$3$1$4"', 0],
+                                [ 'rm_comments_in_strings', '"""', '/*', '*/', 1],
+                                [ 'rm_comments_in_strings', '"""', '//', '', 1],
+                                [ 'rm_comments_in_strings', "'''", '/*', '*/', 1],
+                                [ 'rm_comments_in_strings', "'''", '//', '', 1],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
     'Handlebars'         => [
@@ -6759,7 +8953,6 @@ sub set_constants {                          # {{{1
                                 [ 'remove_html_comments',          ],
                             ],
     'Harbour'            => [
-#                               [ 'remove_matches'      , '^\s*//' ],
                                 [ 'remove_matches'      , '^\s*\&\&' ],
                                 [ 'remove_matches'      , '^\s*\*' ],
                                 [ 'remove_matches'      , '^\s*NOTE' ],
@@ -6767,11 +8960,8 @@ sub set_constants {                          # {{{1
                                 [ 'remove_matches'      , '^\s*Note' ],
                                 [ 'remove_inline'       , '//.*$'  ],
                                 [ 'remove_inline'       , '\&\&.*$' ],
-                                [ 'call_regexp_common'  , 'C++'    ],
-                            ],
-    'HLSL'               => [
-#                               [ 'remove_matches'      , '^\s*//' ],
-                                [ 'remove_inline'       , '//.*$'  ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
     'Haml'               => [
@@ -6781,12 +8971,30 @@ sub set_constants {                          # {{{1
                                 [ 'remove_matches'      , '^\s*-#\s*\S+' ],
                             ],
     'Haxe'               => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C++'    ],
+                            ],
+    'HCL'                => [
+                                [ 'remove_matches'      , '^\s*#'  ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C++'    ],
+                            ],
+    'HLSL'               => [
+                                [ 'remove_inline'       , '//.*$'  ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
     'HTML'               => [
                                 [ 'remove_html_comments',          ],
                                 [ 'call_regexp_common'  , 'HTML'   ],
                             ],
+    'Hoon'               => [
+                                [ 'remove_matches'      , '^\s*:[:><]' ],
+                                [ 'remove_inline'       , ':[:><].*$'  ],
+                            ],
     'INI'                => [
                                 [ 'remove_matches'      , '^\s*;'  ],
                             ],
@@ -6798,14 +9006,30 @@ sub set_constants {                          # {{{1
     'IDL'                => [   [ 'remove_matches'      , '^\s*;'  ], ],
     'IDL/Qt Project/Prolog/ProGuard' => [ [ 'die' ,          ], ], # never called
     'Idris'              => [
-                                [ 'remove_matches'      , '^--'    ],
-                                [ 'remove_matches'      , '^\|{3}' ],
+                                [ 'remove_haskell_comments', '>filename<' ],
+                                [ 'remove_matches'      , '^\s*\|{3}' ],
                             ],
-    'Literate Idris'        => [
+    'Igor Pro'           => [
+                                [ 'remove_matches'      , '^\s*//' ],
+                                [ 'remove_inline'       , '//.*$'  ],
+                            ],
+    'Literate Idris'     => [
                                 [ 'remove_matches'      , '^[^>]'  ],
                             ],
-    'InstallShield'      => [   [ 'remove_html_comments',          ],
-                                [ 'call_regexp_common'  , 'HTML'   ], ],
+    'Imba'               => [
+                                [ 'remove_matches'      , '^\s*#\s'],
+                                [ 'remove_inline'       , '#\s.*$' ],
+                                [ 'remove_between_regex', '###', '###' ],
+                            ],
+    'InstallShield'      => [
+                                [ 'remove_html_comments',          ],
+                                [ 'call_regexp_common'  , 'HTML'   ],
+                            ],
+    'IPL'                => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C++'    ],
+                            ],
     'Jam'                => [
                                 [ 'remove_matches'      , '^\s*#'  ],
                                 [ 'remove_inline'       , '#.*$'   ],
@@ -6818,24 +9042,40 @@ sub set_constants {                          # {{{1
                                 [ 'call_regexp_common'  , 'C'      ],
                             ],
     'JavaServer Faces'   => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'Java'               => [
-                                # separate /* inside quoted strings with two
-                                # concatenated strings split between / and *
-                                [ 'replace_between_regex', '(["\'])(.*?/)(\*.*?)\g1',
-                                  '(.*?)' , '"$1$2$1 + $1$3$1$4"'],
+                                [ 'replace_regex', '\\\\$', ' '],
+                                # Java seems to have more path globs in strings
+                                # than other languages.  The variations makes
+                                # it tricky to craft a universal fix.
+                                [ 'replace_between_regex', '(["\'])(.*?/\*)\g1',
+                                  '(.*?)' , '"xx"'],
+                                [ 'replace_between_regex', '(["\'])(.*?\*/)\g1',
+                                  '(.*?)' , '"xx"'],
+                               ## separate /* inside quoted strings with two
+                               ## concatenated strings split between / and *
+                               ##    -> defeated by "xx/**/*_xx" issue 365
+                               #[ 'replace_between_regex', '(["\'])(.*?/)(\*.*?)\g1',
+                               #  '(.*?)' , '"$1$2$1 + $1$3$1$4"'],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'JavaScript'         => [
+                                [ 'rm_comments_in_strings', "'", '/*', '*/' ],
+                                [ 'rm_comments_in_strings', "'", '//', '' ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
+                            ],
+    'Jinja Template'     => [
+                                [ 'remove_between_general', '{#', '#}' ],
                             ],
     'JSX'                => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'JCL'                => [   [ 'remove_jcl_comments' ,          ], ],
     'JSON'               => [   # ECMA-404, the JSON standard definition
@@ -6843,21 +9083,40 @@ sub set_constants {                          # {{{1
                                 # so just use a placeholder filter
                                 [ 'remove_matches'      , '^\s*$'  ],
                             ],
+    'JSON5'              => [   # same as JavaScript
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C++'    ],
+                            ],
     'Julia'              => [
                                 [ 'remove_between_general', '#=', '=#' ],
                                 [ 'remove_matches'      , '^\s*#'  ],
+                                [ 'docstring_to_C'                 ],
+                                [ 'call_regexp_common'  , 'C'      ],
+                                [ 'remove_inline'       , '#.*$'   ],
+                            ],
+    'Juniper Junos'      => [
+                                [ 'remove_matches'      , '^\s*#'  ],
+                                [ 'call_regexp_common'  , 'C'      ],
                                 [ 'remove_inline'       , '#.*$'   ],
                             ],
     'Kotlin'             => [
+                                [ 'rm_comments_in_strings', '"""', '/*', '*/', 1],
+                                [ 'rm_comments_in_strings', '"""', '//', '', 1],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-#                               [ 'remove_matches'      , '^\s*//' ],
-                                [ 'remove_inline'       , '//.*$'  ],
-#                               [ 'remove_between_general', '/*', '*/' ],
+                            ],
+    'Lean'               => [
+                                [ 'remove_between_general', '/-', '-/' ],
+                                [ 'remove_matches'      , '^\s*--' ],
+                                [ 'remove_inline'       , '--.*$'  ],
                             ],
     'LESS'               => [
 #                               [ 'remove_matches'      , '^\s*//' ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'LFE'                => [
                                 [ 'remove_matches'      , '^\s*;'  ],
@@ -6875,6 +9134,14 @@ sub set_constants {                          # {{{1
     'Lisp/OpenCL'        => [ [ 'die' ,          ], ], # never called
     'Lisp/Julia'         => [ [ 'die' ,          ], ], # never called
     'LiveLink OScript'   => [   [ 'remove_matches'      , '^\s*//' ], ],
+    'LLVM IR'            => [
+                                [ 'remove_matches'      , '^\s*;'  ],
+                                [ 'remove_inline'       , ';.*$'   ],
+                            ],
+    'Logos'              => [
+                                [ 'remove_matches'      , '^\s*#'  ],
+                                [ 'remove_inline'       , '#.*$'   ],
+                            ],
     'Logtalk'            => [  # same filters as Prolog
                                 [ 'remove_matches'      , '^\s*\%' ],
                                 [ 'call_regexp_common'  , 'C'      ],
@@ -6882,6 +9149,11 @@ sub set_constants {                          # {{{1
                             ],
 #   'Lua'                => [   [ 'call_regexp_common'  , 'lua'    ], ],
     'Lua'                => [
+                                [ 'remove_between_general', '--[=====[', ']=====]' ],
+                                [ 'remove_between_general', '--[====[', ']====]' ],
+                                [ 'remove_between_general', '--[===[', ']===]' ],
+                                [ 'remove_between_general', '--[==[', ']==]' ],
+                                [ 'remove_between_general', '--[=[', ']=]' ],
                                 [ 'remove_between_general', '--[[', ']]' ],
                                 [ 'remove_matches'      , '^\s*\-\-' ],
                             ],
@@ -6889,7 +9161,12 @@ sub set_constants {                          # {{{1
                                 [ 'remove_matches'      , '^\s*#'  ],
                                 [ 'remove_inline'       , '#.*$'   ],
                             ],
+    'Meson'              => [
+                                [ 'remove_matches'      , '^\s*#'  ],
+                                [ 'remove_inline'       , '#.*$'   ],
+                            ],
     'MATLAB'             => [
+                                [ 'remove_between_general', '%{', '%}' ],
                                 [ 'remove_matches'      , '^\s*%'  ],
                                 [ 'remove_inline'       , '%.*$'   ],
                             ],
@@ -6911,28 +9188,31 @@ sub set_constants {                          # {{{1
     'Modula3'            => [   [ 'call_regexp_common'  , 'Pascal' ], ],
         # Modula 3 comments are (* ... *) so applying the Pascal filter
         # which also treats { ... } as a comment is not really correct.
+    'Mojo'               => [   [ 'call_regexp_common' , 'C++' ], ],
     'Nemerle'            => [
-#                               [ 'remove_matches'      , '^\s*//' ],
-                                [ 'remove_inline'       , '//.*$'  ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
-    'Objective C'        => [
+    'Objective-C'        => [
 #                               [ 'remove_matches'      , '^\s*//' ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
-    'Objective C++'      => [
+    'Objective-C++'      => [
 #                               [ 'remove_matches'      , '^\s*//' ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'OCaml'              => [
-                                [ 'call_regexp_common'  , 'Pascal' ],
+                                [ 'remove_OCaml_comments',         ],
                             ],
     'OpenCL'             => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-#                               [ 'remove_matches'      , '^\s*//' ], # C99
-                                [ 'remove_inline'       , '//.*$'  ], # C99
                             ],
     'PHP/Pascal'               => [ [ 'die' ,          ], ], # never called
     'Mako'               => [
@@ -6943,7 +9223,7 @@ sub set_constants {                          # {{{1
                                   '\[(comment|\/\/)?\]\s*:?\s*(<\s*>|#)?\s*\(.*?', '.*?\)' ],
                                 # http://stackoverflow.com/questions/4823468/comments-in-markdown
                             ],
-    'MATLAB/Mathematica/Objective C/MUMPS/Mercury' => [ [ 'die' ,          ], ], # never called
+    'MATLAB/Mathematica/Objective-C/MUMPS/Mercury' => [ [ 'die' ,          ], ], # never called
     'MUMPS'              => [   [ 'remove_matches'      , '^\s*;'  ], ],
     'Mustache'           => [
                                 [ 'remove_between_general', '{{!', '}}' ],
@@ -6955,10 +9235,20 @@ sub set_constants {                          # {{{1
 #                               [ 'call_regexp_common'  , 'C'      ],
                                 [ 'remove_inline'       , '#.*$'   ],
                             ],
+    'Nix'                => [
+                                [ 'call_regexp_common'  , 'C'      ],
+                                [ 'remove_matches'      , '^\s*#'  ],
+                                [ 'remove_inline'       , '#.*$'   ],
+                            ],
     'Octave'             => [
                                 [ 'remove_matches'      , '^\s*#'  ],
                                 [ 'remove_inline'       , '#.*$'   ],
                             ],
+    'Odin'               => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C++'    ],
+                            ],
     'Oracle Forms'       => [   [ 'call_regexp_common'  , 'C'      ], ],
     'Oracle Reports'     => [   [ 'call_regexp_common'  , 'C'      ], ],
     'Oracle PL/SQL'      => [
@@ -6979,11 +9269,6 @@ sub set_constants {                          # {{{1
                                 [ 'call_regexp_common'  , 'C'       ],
                                 [ 'remove_inline'       , '#.*$'   ],
                             ],
-    'PureScript'         => [
-                                [ 'remove_matches'      , '^\s*--' ],
-                                [ 'remove_between_general', '{-', '-}' ],
-                                [ 'remove_inline'       , '--.*$'  ],
-                            ],
     'Patran Command Language'=> [
                                 [ 'remove_matches'      , '^\s*#'   ],
                                 [ 'remove_matches'      , '^\s*\$#' ],
@@ -6995,6 +9280,10 @@ sub set_constants {                          # {{{1
     'PL/M'               => [
                                 [ 'call_regexp_common'  , 'C'      ],
                             ],
+    'PlantUML'           => [
+                                [ 'remove_between_general', "/'", "'/" ],
+                                [ 'remove_matches'      , "^\\s*'" ],
+                            ],
     'Perl'               => [   [ 'remove_below'        , '^__(END|DATA)__'],
                                 [ 'remove_matches'      , '^\s*#'  ],
                                 [ 'remove_below_above'  , '^=head1', '^=cut'  ],
@@ -7014,8 +9303,8 @@ sub set_constants {                          # {{{1
                                 [ 'remove_matches'      , '^\s*#[^,]' ],  # '#,' is not a comment
                             ],
     'PowerBuilder'       => [
-#                               [ 'remove_matches'      , '^\s*//' ],
-                                [ 'remove_inline'       , '//.*$'  ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
     'PowerShell'         => [
@@ -7029,9 +9318,13 @@ sub set_constants {                          # {{{1
                                 [ 'call_regexp_common'  , 'C'      ],
                                 [ 'remove_inline'       , '(//|\%).*$' ],
                             ],
+    'Properties'         => [
+                                [ 'remove_matches'      , '^\s*#'  ],
+                                [ 'remove_matches'      , '^\s*!'  ],
+                            ],
     'Protocol Buffers'   => [
-#                               [ 'remove_matches'      , '^\s*//' ],
-                                [ 'remove_inline'       , '//.*$'  ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
     'Pug'                => [
@@ -7039,21 +9332,36 @@ sub set_constants {                          # {{{1
                                 [ 'remove_matches'      , '^\s*//' ],
                                 [ 'remove_inline'       , '//.*$'  ],
                             ],
+    'PureScript'         => [
+                                [ 'remove_matches'      , '^\s*--' ],
+                                [ 'remove_between_general', '{-', '-}' ],
+                                [ 'remove_inline'       , '--.*$'  ],
+                            ],
     'Python'             => [
+                                [ 'remove_matches'      , '/\*'    ],
+                                [ 'remove_matches'      , '\*/'    ],
                                 [ 'remove_matches'      , '^\s*#'  ],
                                 [ 'docstring_to_C'                 ],
                                 [ 'call_regexp_common'  , 'C'      ],
                                 [ 'remove_inline'       , '#.*$'   ],
                             ],
+    'Jupyter Notebook'   => [   # these are JSON files; have no comments
+                                # would have to parse JSON for
+                                #      "cell_type": "code"
+                                # to count code lines
+                                [ 'jupyter_nb'                     ],
+                                [ 'remove_matches'      , '^\s*$'  ],
+                            ],
     'PHP'                => [
                                 [ 'remove_matches'      , '^\s*#'  ],
-#                               [ 'remove_matches'      , '^\s*//' ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                                 [ 'remove_inline'       , '#.*$'   ],
-#                               [ 'remove_inline'       , '//.*$'  ],
                             ],
     'QML'                => [
-#                               [ 'remove_matches'      , '^\s*//' ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                                 [ 'remove_inline'       , '//.*$'  ],
                             ],
@@ -7073,10 +9381,27 @@ sub set_constants {                          # {{{1
                                 [ 'remove_matches'      , '^\s*#'  ],
                                 [ 'remove_inline'       , '#.*$'   ],
                             ],
+    'Ring'               => [
+                                [ 'remove_inline'       , '#.*$'   ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C++'    ],
+                            ],
+    'Rmd'                => [
+                                [ 'reduce_to_rmd_code_blocks'      ],
+                                [ 'remove_matches'      , '^\s*#'  ],
+                                [ 'remove_inline'       , '#.*$'   ],
+                            ],
     'Racket'             => [
                                 [ 'remove_matches'      , '^\s*;'  ],
                                 [ 'remove_inline'       , ';.*$'   ],
                             ],
+    'Raku'               => [   [ 'remove_matches'      , '^\s*#'  ],
+                                [ 'remove_below_above'  , '^=head1', '^=cut'  ],
+                                [ 'remove_below_above'  , '^=begin', '^=end'  ],
+                                [ 'remove_inline'       , '#.*$'   ],
+                            ],
+    'Raku/Prolog'        => [ [ 'die' ,          ], ], # never called
     'RAML'               => [
                                 [ 'remove_matches'      , '^\s*#'  ],
                                 [ 'remove_inline'       , '#.*$'   ],
@@ -7088,10 +9413,24 @@ sub set_constants {                          # {{{1
                                 [ 'remove_inline'       , '#.*$'   ],
                             ],
     'Razor'              => [
-#                               [ 'remove_matches'      , '^\s*//' ],
                                 [ 'remove_between_general', '@*', '*@' ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C++'    ],
+                                [ 'remove_between_general', '<!--', '-->' ],
+                            ],
+    'reStructuredText'   => [
+                                [ 'remove_between_regex', '^\.\.', '^[^ \n\t\r\f\.]' ]
+                            ],
+    'Rexx'               => [   [ 'call_regexp_common'  , 'C'      ], ],
+    'ReasonML'           => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'remove_between_general', '/*', '*/' ],
+                            ],
+    'ReScript'           => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'RobotFramework'     => [
                                 [ 'remove_matches'      , '^\s*#'   ],
@@ -7100,7 +9439,6 @@ sub set_constants {                          # {{{1
                                 [ 'remove_matches'      , '^\s*\[(Documentation|Tags)\]' ],
                                 [ 'remove_inline'       , '#.*$'   ],
                             ],
-    'Rexx'               => [   [ 'call_regexp_common'  , 'C'      ], ],
     'Ruby'               => [
                                 [ 'remove_matches'      , '^\s*#'  ],
                                 [ 'remove_below_above'  , '^=begin', '^=end' ],
@@ -7109,10 +9447,14 @@ sub set_constants {                          # {{{1
     'Ruby HTML'          => [   [ 'remove_html_comments',          ],
                                 [ 'call_regexp_common'  , 'HTML'   ], ],
     'Rust'               => [
-#                               [ 'remove_matches'      , '^\s*//' ],
-                                [ 'remove_inline'       , '//.*$'  ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
+    'SaltStack'          => [
+                                [ 'remove_matches'      , '^\s*#'  ],
+                                [ 'remove_inline'       , '#.*$'   ],
+                            ],
     'SAS'                => [
                                 [ 'call_regexp_common'  , 'C'      ],
                                 [ 'remove_between_general', '*', ';' ],
@@ -7122,19 +9464,23 @@ sub set_constants {                          # {{{1
                                 [ 'remove_inline'       , '//.*$'  ],
                             ],
     'Scala'              => [
-#                               [ 'remove_matches'      , '^\s*//' ],
-#                               [ 'remove_inline'       , '//.*$'  ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
+    'Scheme/SaltStack' => [ [ 'die' ,          ], ], # never called
     'Scheme'             => [
                                 [ 'remove_matches'      , '^\s*;'  ],
                                 [ 'remove_inline'       , ';.*$'   ],
                             ],
+    'sed'                => [
+                                [ 'remove_matches'      , '^\s*#'  ],
+                                [ 'remove_inline'       , '#.*$'   ],
+                            ],
     'Slice'             => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-#                               [ 'remove_matches'      , '^\s*//' ],
-                                [ 'remove_inline'       , '//.*$'  ],
-#                               [ 'remove_between_general', '/*', '*/' ],
                             ],
     'Slim'               => [
                                 [ 'remove_slim_block'   ,          ],
@@ -7147,19 +9493,42 @@ sub set_constants {                          # {{{1
                                 [ 'call_regexp_common'  , 'C'      ],
                                 [ 'remove_matches'      , '^\s*;'  ],
                             ],
-    'Skylark'            => [
+    'Squirrel'           => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C++'    ],
+                                [ 'remove_matches'      , '^\s*#'  ],
+                                [ 'remove_inline'       , '#.*$'   ],
+                            ],
+    'Starlark'           => [
                                 [ 'remove_matches'      , '^\s*#'  ],
                                 [ 'docstring_to_C'                 ],
                                 [ 'call_regexp_common'  , 'C'      ],
                                 [ 'remove_inline'       , '#.*$'   ],
                             ],
+    'BizTalk Pipeline' =>   [
+                                [ 'remove_html_comments',          ],
+                                [ 'call_regexp_common'  , 'HTML'   ],
+                            ],
+    'BizTalk Orchestration' => [
+                                [ 'remove_matches'      , '^\s*#'  ],
+                                [ 'remove_html_comments',          ],
+                                [ 'call_regexp_common'  , 'HTML'   ],
+                            ],
     'Solidity'           => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
+                            ],
+    'SparForte'          => [
+                                [ 'remove_matches'      , '^\s*#!' ],
+                                [ 'remove_matches'      , '^\s*--' ],
                             ],
     'Specman e'          => [
                                 [ 'pre_post_fix'        , "'>", "<'"],
                                 [ 'remove_between_general', "^'>", "^<'" ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++',   ],
                                 [ 'remove_matches'      , '^\s*--' ],
                                 [ 'rm_last_line'        , ],  # undo pre_post_fix addition
@@ -7180,10 +9549,6 @@ sub set_constants {                          # {{{1
                                 [ 'remove_matches'      , '^\s*--' ],
                                 [ 'remove_inline'       , '--.*$'  ],
                             ],
-    'sed'                => [
-                                [ 'remove_matches'      , '^\s*#'  ],
-                                [ 'remove_inline'       , '#.*$'   ],
-                            ],
     'Smalltalk'          => [
                                 [ 'call_regexp_common'  , 'Smalltalk'      ],
                             ],
@@ -7195,17 +9560,37 @@ sub set_constants {                          # {{{1
                                 [ 'remove_between_general', '(*', '*)' ],
                             ],
     'Stata'              => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
     'Stylus'             => [
-#                               [ 'remove_matches'      , '^\s*//' ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
+                            ],
+    'SugarSS'            => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C++'    ],
+                            ],
+    'Svelte'             => [
+                                [ 'remove_html_comments',          ],
+                                [ 'call_regexp_common'  , 'HTML'   ],
+                            ],
+    'SVG'                => [
+                                [ 'remove_html_comments',          ],
+                                [ 'call_regexp_common'  , 'HTML'   ],
                             ],
     'Swift'              => [
-#                               [ 'remove_matches'      , '^\s*//' ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C++'    ],
+                            ],
+    'SWIG'               => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
 
     'm4'                 => [   [ 'remove_matches'      , '^dnl\s'  ], ],
@@ -7232,18 +9617,30 @@ sub set_constants {                          # {{{1
                                 [ 'remove_matches'      , '^\s*%'  ],
                                 [ 'remove_inline'       , '%.*$'   ],
                             ],
+    'Thrift'             => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C++'    ],
+                                [ 'remove_matches'      , '^\s*#'  ],
+                                [ 'remove_inline'       , '#.*$'   ],
+                            ],
     'Titanium Style Sheet'  => [
                                 [ 'remove_matches'      , '^\s*//' ],
                                 [ 'remove_inline'       , '//.*$'  ],
                                 [ 'remove_between_regex', '/[^/]', '[^/]/' ],
                             ],
+    'TNSDL'               => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'call_regexp_common'  , 'C++'      ],
+                            ],
     'TOML'               => [
                                 [ 'remove_matches'      , '^\s*#'  ],
                                 [ 'remove_inline'       , '#.*$'   ],
                             ],
     'TTCN'               => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'      ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'TITAN Project File Information'               => [
                                 [ 'remove_html_comments',          ],
@@ -7253,8 +9650,8 @@ sub set_constants {                          # {{{1
                                 [ 'remove_between_general', '{#', '#}' ],
                             ],
     'TypeScript'         => [
-#                               [ 'remove_matches'      , '^\s*//' ],
-                                [ 'remove_inline'       , '//.*$'  ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
     'Unity-Prefab'       => [
@@ -7272,35 +9669,52 @@ sub set_constants {                          # {{{1
                                 [ 'remove_matches'      , '^\s*\47'], ],  # \47 = '
     # http://www.altium.com/files/learningguides/TR0114%20VHDL%20Language%20Reference.pdf
     'Vala'               => [
-#                               [ 'remove_matches'      , '^\s*//' ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'Vala Header'        => [
-#                               [ 'remove_matches'      , '^\s*//' ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'Verilog-SystemVerilog/Coq' => [ ['die'] ], # never called
     'Verilog-SystemVerilog' => [
-#                               [ 'remove_matches'      , '^\s*//' ],
-                                [ 'remove_inline'       , '//.*$'  ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
     'VHDL'               => [
                                 [ 'remove_matches'      , '^\s*--' ],
-#                               [ 'remove_matches'      , '^\s*//' ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                                 [ 'remove_inline'       , '--.*$'  ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'vim script'         => [
                                 [ 'remove_matches'      , '^\s*"'  ],
                                 [ 'remove_inline'       , '".*$'   ],
                             ],
-    'Visual Basic'       => [   [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],
+    'Visual Basic Script' => [
+                                [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],
                                 [ 'remove_matches'      , '^\s*Attribute\s+'],
-                                [ 'remove_matches'      , '^\s*\47'], ],  # \47 = '
+                                [ 'remove_matches'      , '^\s*\47'],     # \47 = '
+                            ],
+    'Visual Basic .NET' => [
+                                [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],
+                                [ 'remove_matches'      , '^\s*Attribute\s+'],
+                                [ 'remove_matches'      , '^\s*\47'],     # \47 = '
+                            ],
+    'VB for Applications' => [
+                                [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],
+                                [ 'remove_matches'      , '^\s*Attribute\s+'],
+                                [ 'remove_matches'      , '^\s*\47'],     # \47 = '
+                            ],
+    'Visual Basic'       => [
+                                [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],
+                                [ 'remove_matches'      , '^\s*Attribute\s+'],
+                                [ 'remove_matches'      , '^\s*\47'],     # \47 = '
+                            ],
     'Visualforce Component' => [
                                 [ 'remove_html_comments',          ],
                                 [ 'call_regexp_common'  , 'HTML'   ],
@@ -7313,13 +9727,15 @@ sub set_constants {                          # {{{1
                                 [ 'remove_html_comments',          ],
                                 [ 'call_regexp_common'  , 'HTML'   ],
                                 [ 'remove_jsp_comments' ,          ],
-                                [ 'remove_matches'      , '^\s*//' ],
+                                [ 'remove_matches'      , '^\s*##' ],
+                                [ 'remove_between_general', '#**', '*#' ],
                                 [ 'add_newlines'        ,          ],
-                                [ 'call_regexp_common'  , 'C'      ],
                             ],
     'Vuejs Component'     => [
                                 [ 'remove_html_comments',          ],
                                 [ 'call_regexp_common'  , 'HTML'   ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
     'Teamcenter def'     => [   [ 'remove_matches'      , '^\s*#'  ], ],
@@ -7328,9 +9744,9 @@ sub set_constants {                          # {{{1
                                 [ 'remove_inline'       , ';.*$'  ],
                             ],
     'yacc'               => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
-#                               [ 'remove_matches'      , '^\s*//' ],
-                                [ 'remove_inline'       , '//.*$'  ],
                             ],
     'YAML'               => [
                                 [ 'remove_matches'      , '^\s*#'  ],
@@ -7346,8 +9762,9 @@ sub set_constants {                          # {{{1
                                 [ 'remove_matches'      , '^\s*NOTE' ],
                                 [ 'remove_matches'      , '^\s*note' ],
                                 [ 'remove_matches'      , '^\s*Note' ],
-                                [ 'remove_inline'       , '//.*$'  ],
                                 [ 'remove_inline'       , '\&\&.*$' ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
     'xBase'              => [
@@ -7357,8 +9774,9 @@ sub set_constants {                          # {{{1
                                 [ 'remove_matches'      , '^\s*NOTE' ],
                                 [ 'remove_matches'      , '^\s*note' ],
                                 [ 'remove_matches'      , '^\s*Note' ],
-                                [ 'remove_inline'       , '//.*$'  ],
                                 [ 'remove_inline'       , '\&\&.*$' ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
     'MXML'               => [
@@ -7368,6 +9786,13 @@ sub set_constants {                          # {{{1
                                 [ 'add_newlines'        ,          ],
                                 [ 'call_regexp_common'  , 'C'      ],
                             ],
+    'Web Services Description' => [
+                                [ 'remove_html_comments',          ],
+                                [ 'call_regexp_common'  , 'HTML'   ],
+                            ],
+    'WebAssembly'           => [
+                                [ 'remove_matches'      , '^\s*;;' ],
+                            ],
     'Windows Message File'  => [
                                 [ 'remove_matches'      , '^\s*;\s*//' ],
                                 [ 'call_regexp_common'  , 'C'          ],
@@ -7377,8 +9802,8 @@ sub set_constants {                          # {{{1
 #                                                         '^\s*;\s*\*/', ],
                             ],
     'Windows Resource File' => [
-#                               [ 'remove_matches'      , '^\s*//' ],
-                                [ 'remove_inline'       , '//.*$'  ],
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
                                 [ 'call_regexp_common'  , 'C++'    ],
                             ],
     'WiX source'         => [
@@ -7393,6 +9818,15 @@ sub set_constants {                          # {{{1
                                 [ 'remove_html_comments',          ],
                                 [ 'call_regexp_common'  , 'HTML'   ],
                             ],
+    'WXML'               => [
+                                [ 'remove_html_comments',          ],
+                                [ 'call_regexp_common'  , 'HTML'   ],
+                            ],
+    'WXSS'               => [
+                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
+                                [ 'rm_comments_in_strings', '"', '//', '' ],
+                                [ 'call_regexp_common'  , 'C'      ],
+                            ],
     'XMI'                => [
                                 [ 'remove_html_comments',          ],
                                 [ 'call_regexp_common'  , 'HTML'   ],
@@ -7408,10 +9842,28 @@ sub set_constants {                          # {{{1
                                 [ 'call_regexp_common'  , 'HTML'   ], ],
     'XSLT'               => [   [ 'remove_html_comments',          ],
                                 [ 'call_regexp_common'  , 'HTML'   ], ],
+    'Xtend'              => [   # copy of Java, plus triple << inline
+                                # separate /* inside quoted strings with two
+                                # concatenated strings split between / and *
+                                [ 'replace_between_regex', '(["\'])(.*?/)(\*.*?)\g1',
+                                  '(.*?)' , '"$1$2$1 + $1$3$1$4"', 0],
+                                [ 'call_regexp_common'  , 'C++'    ],
+                                [ 'remove_matches'      , '^\s*\x{c2ab}{3}'  ], # doesn't work
+                                # \xCA2B is unicode << character
+                            ],
     'NAnt script'       => [    [ 'remove_html_comments',          ],
                                 [ 'call_regexp_common'  , 'HTML'   ], ],
     'MSBuild script'    => [    [ 'remove_html_comments',          ],
                                 [ 'call_regexp_common'  , 'HTML'   ], ],
+    'Visual Studio Solution' => [
+                                [ 'remove_matches'      , '^\s*#'  ],
+                                [ 'remove_html_comments',          ],
+                                [ 'call_regexp_common'  , 'HTML'   ],
+                            ],
+    'Zig'                => [
+                                [ 'remove_matches'      , '^\s*//'  ],
+                                [ 'remove_inline'       , '//.*$'   ],
+                            ],
     'zsh'                => [
                                 [ 'remove_matches'      , '^\s*#'  ],
                                 [ 'remove_inline'       , '#.*$'   ],
@@ -7433,21 +9885,27 @@ sub set_constants {                          # {{{1
     'CMake'              =>     '\\\\$'         ,
     'Cython'             =>     '\\\\$'         ,
     'C#'                 =>     '\\\\$'         ,
+    'C# Designer'        =>     '\\\\$'         ,
+    'Cake Build Script'  =>     '\\\\$'         ,
     'D'                  =>     '\\\\$'         ,
     'Dart'               =>     '\\\\$'         ,
     'Expect'             =>     '\\\\$'         ,
+    'Gencat NLS'         =>     '\\\\$'         ,
     'Go'                 =>     '\\\\$'         ,
     'IDL'                =>     '\$\\$'         ,
-    'Java'               =>     '\\\\$'         ,
+    'Igor Pro'           =>     '\\$'           ,
+#   'Java'               =>     '\\\\$'         ,
     'JavaScript'         =>     '\\\\$'         ,
+    'JSON5'              =>     '\\\\$'         ,
     'JSX'                =>     '\\\\$'         ,
     'LESS'               =>     '\\\\$'         ,
     'Lua'                =>     '\\\\$'         ,
     'make'               =>     '\\\\$'         ,
     'MATLAB'             =>     '\.\.\.\s*$'    ,
+    'Meson'              =>     '\\\\$'         ,
     'MXML'               =>     '\\\\$'         ,
-    'Objective C'        =>     '\\\\$'         ,
-    'Objective C++'      =>     '\\\\$'         ,
+    'Objective-C'        =>     '\\\\$'         ,
+    'Objective-C++'      =>     '\\\\$'         ,
     'OCaml'              =>     '\\\\$'         ,
     'Octave'             =>     '\.\.\.\s*$'    ,
     'Qt Project'         =>     '\\\\$'         ,
@@ -7456,6 +9914,7 @@ sub set_constants {                          # {{{1
     'PowerShell'         =>     '\\\\$'         ,
     'Python'             =>     '\\\\$'         ,
     'R'                  =>     '\\\\$'         ,
+    'Rmd'                =>     '\\\\$'         ,
     'Ruby'               =>     '\\\\$'         ,
     'sed'                =>     '\\\\$'         ,
     'Swift'              =>     '\\\\$'         ,
@@ -7464,7 +9923,7 @@ sub set_constants {                          # {{{1
     'C Shell'            =>     '\\\\$'         ,
     'Kermit'             =>     '\\\\$'         ,
     'Korn Shell'         =>     '\\\\$'         ,
-    'Skylark'            =>     '\\\\$'         ,
+    'Starlark'           =>     '\\\\$'         ,
     'Solidity'           =>     '\\\\$'         ,
     'Stata'              =>     '///$'          ,
     'Stylus'             =>     '\\\\$'         ,
@@ -7497,7 +9956,6 @@ sub set_constants {                          # {{{1
    'bdf'       => 1,
    'bmp'       => 1,
    'bz2'       => 1,  # bzip2(1) compressed file
-   'csv'       => 1,  # comma separated values
    'desktop'   => 1,
    'dic'       => 1,
    'doc'       => 1,
@@ -7529,6 +9987,7 @@ sub set_constants {                          # {{{1
    'tfm'       => 1,
    'tgz'       => 1,  # gzipped tarball
    'tiff'      => 1,
+   'tsv'       => 1,  # tab separated values
    'txt'       => 1,
    'vf'        => 1,
    'wav'       => 1,
@@ -7550,13 +10009,14 @@ sub set_constants {                          # {{{1
    'CHANGES'     => 1,
    'COPYING'     => 1,
    'COPYING'     => 1,
+   'DESCRIPTION' => 1, # R packages metafile
    '.cvsignore'  => 1,
    'Entries'     => 1,
    'FAQ'         => 1,
-   'iconfig.h'   => 1, # Skip "iconfig.h" files; they're used in Imakefiles.
    'INSTALL'     => 1,
    'MAINTAINERS' => 1,
    'MD5SUMS'     => 1,
+   'NAMESPACE'   => 1, # R packages metafile
    'NEWS'        => 1,
    'readme'      => 1,
    'Readme'      => 1,
@@ -7569,69 +10029,33 @@ sub set_constants {                          # {{{1
 # 1}}}
 %{$rh_Scale_Factor}          = (             # {{{1
     '(unknown)'                    =>   0.00,
-    '1032/af'                      =>   5.00,
     '1st generation default'       =>   0.25,
     '2nd generation default'       =>   0.75,
     '3rd generation default'       =>   1.00,
     '4th generation default'       =>   4.00,
     '5th generation default'       =>  16.00,
-    'aas macro'                    =>   0.88,
-    'abap/4'                       =>   5.00,
     'ABAP'                         =>   5.00,
-    'accel'                        =>   4.21,
-    'access'                       =>   2.11,
     'ActionScript'                 =>   1.36,
-    'actor'                        =>   3.81,
-    'acumen'                       =>   2.86,
     'Ada'                          =>   0.52,
-    'Ada 83'                       =>   1.13,
-    'Ada 95'                       =>   1.63,
-    'adr/dl'                       =>   2.00,
-    'adr/ideal/pdl'                =>   4.00,
-    'ads/batch'                    =>   4.00,
-    'ads/online'                   =>   4.00,
     'ADSO/IDSM'                    =>   3.00,
-    'advantage'                    =>   2.11,
-    'ai shell default'             =>   1.63,
-    'ai shells'                    =>   1.63,
-    'algol 68'                     =>   0.75,
-    'algol w'                      =>   0.75,
+    'Agda'                         =>   2.11,
     'ambush'                       =>   2.50,
     'aml'                          =>   1.63,
     'AMPLE'                        =>   2.00,
     'Ant/XML'                      =>   1.90,
     'Ant'                          =>   1.90,
     'ANTLR Grammar'                =>   2.00,
-    'amppl ii'                     =>   1.25,
-    'ansi basic'                   =>   1.25,
-    'ansi cobol 74'                =>   0.75,
-    'ansi cobol 85'                =>   0.88,
     'SQL'                          =>   6.15,
     'SQL Stored Procedure'         =>   6.15,
     'SQL Data'                     =>   1.00,
-    'answer/db'                    =>   6.15,
-    'apl 360/370'                  =>   2.50,
-    'apl default'                  =>   2.50,
-    'apl*plus'                     =>   2.50,
-    'applesoft basic'              =>   0.63,
-    'application builder'          =>   4.00,
-    'application manager'          =>   2.22,
+    'Apex Class'                   =>   1.50,
+    'APL'                          =>   2.50,
     'aps'                          =>   0.96,
     'aps'                          =>   4.71,
     'apt'                          =>   1.13,
-    'aptools'                      =>   4.00,
     'arc'                          =>   1.63,
-    'ariel'                        =>   0.75,
-    'arity'                        =>   1.63,
-    'art'                          =>   1.63,
-    'art enterprise'               =>   1.74,
-    'artemis'                      =>   2.00,
-    'artim'                        =>   1.74,
     'AsciiDoc'                     =>   1.50,
     'AspectJ'                      =>   1.36,
-    'as/set'                       =>   4.21,
-    'asi/inquiry'                  =>   6.15,
-    'ask windows'                  =>   1.74,
     'asa'                          =>   1.29,
     'ASP'                          =>   1.29,
     'ASP.NET'                      =>   1.29,
@@ -7640,7 +10064,6 @@ sub set_constants {                          # {{{1
     'ascx'                         =>   1.29,
     'asmx'                         =>   1.29,
     'config'                       =>   1.29,
-    'webinfo'                      =>   1.29,
     'CCS'                          =>   5.33,
     'Apex Trigger'                 =>   1.4 ,
     'Arduino Sketch'               =>   1.00,
@@ -7650,18 +10073,12 @@ sub set_constants {                          # {{{1
     'autocoder'                    =>   0.25,
     'AutoHotkey'                   =>   1.29,
     'awk'                          =>   3.81,
-    'aztec c'                      =>   0.63,
-    'balm'                         =>   0.75,
-    'base sas'                     =>   1.51,
     'basic'                        =>   0.75,
-    'basic a'                      =>   0.63,
+    'Bazel'                        =>   1.00,
     'bc'                           =>   1.50,
-    'berkeley pascal'              =>   0.88,
-    'better basic'                 =>   0.88,
     'Blade'                        =>   2.00,
     'bliss'                        =>   0.75,
     'bmsgen'                       =>   2.22,
-    'boeingcalc'                   =>  13.33,
     'bteq'                         =>   6.15,
     'Brainfuck'                    =>   0.10,
     'BrightScript'                 =>   2.00,
@@ -7669,6 +10086,9 @@ sub set_constants {                          # {{{1
     'C'                            =>   0.77,
     'c set 2'                      =>   0.88,
     'C#'                           =>   1.36,
+    'C# Designer'                  =>   1.36,
+    'C# Generated'                 =>   1.36,
+    'Cake Build Script'            =>   1.36,
     'C++'                          =>   1.51,
     'c86plus'                      =>   0.63,
     'cadbfast'                     =>   2.00,
@@ -7679,7 +10099,7 @@ sub set_constants {                          # {{{1
     'cellsim'                      =>   1.74,
     'ColdFusion'                   =>   4.00,
     'ColdFusion CFScript'          =>   4.00,
-    'Chapel'                       =>   2.96,  # estimate from some test code bases comparing to Python
+    'Chapel'                       =>   2.96,
     'chili'                        =>   0.75,
     'chill'                        =>   0.75,
     'cics'                         =>   1.74,
@@ -7725,6 +10145,7 @@ sub set_constants {                          # {{{1
     'csp'                          =>   1.51,
     'cssl'                         =>   1.74,
     'CSS'                          =>   1.0,
+    'CSV'                          =>   0.1,
     'Cucumber'                     =>   3.00,
     'CUDA'                         =>   1.00,
     'culprit'                      =>   1.57,
@@ -7738,10 +10159,13 @@ sub set_constants {                          # {{{1
     'datatrieve'                   =>   4.00,
     'dbase iii'                    =>   2.00,
     'dbase iv'                     =>   1.54,
+    'Delphi Form'                  =>   2.00,
+    'DIET'                         =>   2.00,
     'diff'                         =>   1.00,
     'decision support default'     =>   2.22,
     'decrally'                     =>   2.00,
     'delphi'                       =>   2.76,
+    'dhall'                        =>   2.11,
     'DITA'                         =>   1.90,
     'dl/1'                         =>   2.00,
     'dtrace'                       =>   2.00,
@@ -7765,12 +10189,14 @@ sub set_constants {                          # {{{1
     'edscheme 3.4'                 =>   1.51,
     'EEx'                          =>   2.00,
     'eiffel'                       =>   3.81,
+    'EJS'                          =>   2.50,
     'Elixir'                       =>   2.11,
     'Elm'                          =>   2.50,
     'enform'                       =>   1.74,
     'englishbased default'         =>   1.51,
     'ensemble'                     =>   2.76,
     'epos'                         =>   4.00,
+    'Embedded Crystal'             =>   2.00,
     'ERB'                          =>   2.00,
     'Erlang'                       =>   2.11,
     'esf'                          =>   2.00,
@@ -7788,6 +10214,7 @@ sub set_constants {                          # {{{1
     'facets'                       =>   4.00,
     'factorylink iv'               =>   2.76,
     'fame'                         =>   2.22,
+    'Fennel'                       =>   2.50,
     'filemaker pro'                =>   2.22,
     'flavors'                      =>   2.76,
     'flex'                         =>   1.74,
@@ -7818,6 +10245,7 @@ sub set_constants {                          # {{{1
     'geode 2.0'                    =>   5.00,
     'gfa basic'                    =>   2.35,
     'Glade'                        =>   2.00,
+    'Gleam'                        =>   2.50,
     'GLSL'                         =>   2.00,
     'gml'                          =>   1.74,
     'golden common lisp'           =>   1.25,
@@ -7825,29 +10253,36 @@ sub set_constants {                          # {{{1
     'guest'                        =>   2.86,
     'guru'                         =>   1.63,
     'GDScript'                     =>   2.50,
+    'Godot Scene'                  =>   2.50,
+    'Godot Resource'               =>   2.50,
     'Go'                           =>   2.50,
+    'Gradle'                       =>   4.00,
     'Grails'                       =>   1.48,
     'GraphQL'                      =>   4.00,
     'Groovy'                       =>   4.10,
     'gw basic'                     =>   0.82,
     'Harbour'                      =>   2.00,
     'Haskell'                      =>   2.11,
+    'HCL'                          =>   2.50,
     'high c'                       =>   0.63,
     'hlevel'                       =>   1.38,
     'hp basic'                     =>   0.63,
     'Haml'                         =>   2.50,
     'Handlebars'                   =>   2.50,
     'Haxe'                         =>   2.00,
+    'Hoon'                         =>   2.00,
     'HTML'                         =>   1.90,
     'XHTML'                        =>   1.90,
     'XMI'                          =>   1.90,
     'XML'                          =>   1.90,
+    'FXML'                         =>   1.90,
     'MXML'                         =>   1.90,
     'XSLT'                         =>   1.90,
     'DTD'                          =>   1.90,
     'XSD'                          =>   1.90,
     'NAnt script'                  =>   1.90,
     'MSBuild script'               =>   1.90,
+    'Visual Studio Solution'       =>   1.00,
     'HLSL'                         =>   2.00,
     'HTML 2'                       =>   5.00,
     'HTML 3'                       =>   5.33,
@@ -7869,6 +10304,8 @@ sub set_constants {                          # {{{1
     'ief/cool:gen'                 =>   2.58,
     'iew'                          =>   5.71,
     'ifps/plus'                    =>   2.50,
+    'Igor Pro'                     =>   4.00,
+    'Imba'                         =>   3.00,
     'imprs'                        =>   2.00,
     'informix'                     =>   2.58,
     'ingres'                       =>   2.00,
@@ -7881,6 +10318,7 @@ sub set_constants {                          # {{{1
     'interlisp'                    =>   1.38,
     'interpreted basic'            =>   0.75,
     'interpreted c'                =>   0.63,
+    'IPL'                          =>   2.00,
     'iqlisp'                       =>   1.38,
     'iqrp'                         =>   6.15,
     'j2ee'                         =>   1.60,
@@ -7889,7 +10327,9 @@ sub set_constants {                          # {{{1
     'Java'                         =>   1.36,
     'JavaScript'                   =>   1.48,
     'JavaServer Faces'             =>   1.5 ,
+    'Jinja Template'               =>   1.5 ,
     'JSON'                         =>   2.50,
+    'JSON5'                        =>   2.50,
     'JSP'                          =>   1.48,
     'JSX'                          =>   1.48,
     'Velocity Template Language'   =>   1.00,
@@ -7897,6 +10337,7 @@ sub set_constants {                          # {{{1
     'joss'                         =>   0.75,
     'jovial'                       =>   0.75,
     'jsp'                          =>   1.36,
+    'Juniper Junos'                =>   2.00,
     'kappa'                        =>   2.00,
     'kbms'                         =>   1.63,
     'kcl'                          =>   1.25,
@@ -7912,6 +10353,7 @@ sub set_constants {                          # {{{1
     'ladder logic'                 =>   2.22,
     'lambit/l'                     =>   1.25,
     'lattice c'                    =>   0.63,
+    'Lean'                         =>   3.00,
     'LESS'                         =>   1.50,
     'LFE'                          =>   1.25,
     'liana'                        =>   0.63,
@@ -7920,7 +10362,9 @@ sub set_constants {                          # {{{1
     'liquid'                       =>   3.00,
     'Lisp'                         =>   1.25,
     'LiveLink OScript'             =>   3.5 ,
+    'LLVM IR'                      =>   0.90,
     'loglisp'                      =>   1.38,
+    'Logos'                        =>   2.00,
     'Logtalk'                      =>   2.00,
     'loops'                        =>   3.81,
     'lotus 123 dos'                =>  13.33,
@@ -7949,6 +10393,7 @@ sub set_constants {                          # {{{1
     'mdl'                          =>   2.22,
     'mentor'                       =>   1.51,
     'mesa'                         =>   0.75,
+    'Meson'                        =>   1.00,
     'microfocus cobol'             =>   1.00,
     'microforth'                   =>   1.25,
     'microsoft c'                  =>   0.63,
@@ -7974,6 +10419,7 @@ sub set_constants {                          # {{{1
     'nexpert'                      =>   1.63,
     'nial'                         =>   1.63,
     'Nim'                          =>   2.00,
+    'Nix'                          =>   2.70,
     'nomad2'                       =>   2.00,
     'nonprocedural default'        =>   2.22,
     'notes vip'                    =>   2.22,
@@ -7983,11 +10429,12 @@ sub set_constants {                          # {{{1
     'object logo'                  =>   2.76,
     'object pascal'                =>   2.76,
     'object star'                  =>   5.00,
-    'Objective C'                  =>   2.96,
-    'Objective C++'                =>   2.96,
+    'Objective-C'                  =>   2.96,
+    'Objective-C++'                =>   2.96,
     'objectoriented default'       =>   2.76,
     'objectview'                   =>   3.20,
     'OCaml'                        =>   3.00,
+    'Odin'                         =>   2.00,
     'ogl'                          =>   1.00,
     'omnis 7'                      =>   2.00,
     'oodl'                         =>   2.76,
@@ -7998,38 +10445,25 @@ sub set_constants {                          # {{{1
     'Oracle Forms'                 =>   2.67,
     'Oracle Developer/2000'        =>   3.48,
     'oscar'                        =>   0.75,
+    'Other'                        =>   1.00,
     'pacbase'                      =>   1.67,
     'pace'                         =>   2.00,
     'paradox/pal'                  =>   2.22,
     'Pascal'                       =>   0.88,
     'Patran Command Language'      =>   2.50,
-    'pc focus'                     =>   2.22,
-    'pdl millenium'                =>   3.81,
-    'pdp11 ade'                    =>   1.51,
-    'peoplesoft'                   =>   2.50,
     'Perl'                         =>   4.00,
-    'persistance object builder'   =>   3.81,
     'Pig Latin'                    =>   1.00,
-    'pilot'                        =>   1.51,
     'PL/I'                         =>   1.38,
     'pl/1'                         =>   1.38,
     'PL/M'                         =>   1.13,
+    'PlantUML'                     =>   2.00,
     'pl/s'                         =>   0.88,
     'Oracle PL/SQL'                =>   2.58,
     'pl/sql'                       =>   2.58,
-    'planit'                       =>   1.51,
-    'planner'                      =>   1.25,
-    'planperfect 1'                =>  11.43,
-    'plato'                        =>   1.51,
     'PO File'                      =>   1.50,
-    'polyforth'                    =>   1.25,
-    'pop'                          =>   1.38,
-    'poplog'                       =>   1.38,
     'power basic'                  =>   1.63,
     'PowerBuilder'                 =>   3.33,
-    'powerhouse'                   =>   5.71,
     'PowerShell'                   =>   3.00,
-    'ppl (plus)'                   =>   2.00,
     'problemoriented default'      =>   1.13,
     'proc'                         =>   2.96,
     'procedural default'           =>   0.75,
@@ -8039,6 +10473,7 @@ sub set_constants {                          # {{{1
     'ProGuard'                     =>   2.50,
     'proiv'                        =>   1.38,
     'Prolog'                       =>   1.25,
+    'Properties'                   =>   1.36,
     'prose'                        =>   0.75,
     'proteus'                      =>   0.75,
     'Protocol Buffers'             =>   2.00,
@@ -8063,7 +10498,9 @@ sub set_constants {                          # {{{1
     'quickbuild'                   =>   2.86,
     'quiz'                         =>   5.33,
     'R'                            =>   3.00,
+    'Rmd'                          =>   3.00,
     'Racket'                       =>   1.50,
+    'Raku'                         =>   4.00,
     'rally'                        =>   2.00,
     'ramis ii'                     =>   2.00,
     'RAML'                         =>   0.90,
@@ -8073,10 +10510,14 @@ sub set_constants {                          # {{{1
     'realia'                       =>   1.74,
     'realizer 1.0'                 =>   2.00,
     'realizer 2.0'                 =>   2.22,
+    'ReasonML'                     =>   2.50,
+    'ReScript'                     =>   2.50,
     'relate/3000'                  =>   2.00,
+    'reStructuredText'             =>   1.50,
     'reuse default'                =>  16.00,
     'Razor'                        =>   2.00,
     'Rexx'                         =>   1.19,
+    'Ring'                         =>   4.20,
     'rm basic'                     =>   0.88,
     'rm cobol'                     =>   0.75,
     'rm fortran'                   =>   0.75,
@@ -8090,13 +10531,8 @@ sub set_constants {                          # {{{1
     'sail'                         =>   0.75,
     'sapiens'                      =>   5.00,
     'sas'                          =>   1.95,
-    'savvy'                        =>   6.15,
-    'sbasic'                       =>   0.88,
     'Scala'                        =>   4.10,
-    'sceptre'                      =>   1.13,
     'Scheme'                       =>   1.51,
-    'screen painter default'       =>  13.33,
-    'sequal'                       =>   6.67,
     'Slim'                         =>   3.00,
     'Solidity'                     =>   1.48,
     'Bourne Shell'                 =>   3.81,
@@ -8106,6 +10542,7 @@ sub set_constants {                          # {{{1
     'Fish Shell'                   =>   3.81,
     'C Shell'                      =>   3.81,
     'siebel tools '                =>   6.15,
+    'SaltStack'                    =>   2.00,
     'SAS'                          =>   1.5 ,
     'Sass'                         =>   1.5 ,
     'simplan'                      =>   2.22,
@@ -8127,8 +10564,8 @@ sub set_constants {                          # {{{1
     'softscreen'                   =>   5.71,
     'Softbridge Basic'             =>   2.76,
     'solo'                         =>   1.38,
+    'SparForte'                    =>   3.80,
     'speakeasy'                    =>   2.22,
-    'spinnaker ppl'                =>   2.22,
     'splus'                        =>   2.50,
     'spreadsheet default'          =>  13.33,
     'sps'                          =>   0.25,
@@ -8136,6 +10573,7 @@ sub set_constants {                          # {{{1
     'Specman e'                    =>   2.00,
     'SQL'                          =>   2.29,
     'sqlwindows'                   =>   6.67,
+    'Squirrel'                     =>   2.50,
     'statistical default'          =>   2.50,
     'Standard ML'                  =>   3.00,
     'Stata'                        =>   3.00,
@@ -8144,9 +10582,13 @@ sub set_constants {                          # {{{1
     'strongly typed default'       =>   0.88,
     'style'                        =>   1.74,
     'Stylus'                       =>   1.48,
+    'SugarSS'                      =>   2.50,
     'superbase 1.3'                =>   2.22,
     'surpass'                      =>  13.33,
+    'Svelte'                       =>   2.00,
+    'SVG'                          =>   1.00,
     'Swift'                        =>   2.50,
+    'SWIG'                         =>   2.50,
     'sybase'                       =>   2.00,
     'symantec c++'                 =>   2.76,
     'symbolang'                    =>   1.25,
@@ -8162,6 +10604,7 @@ sub set_constants {                          # {{{1
     'telon'                        =>   5.00,
     'tessaract'                    =>   2.00,
     'the twin'                     =>  13.33,
+    'Thrift'                       =>   2.50,
     'tiief'                        =>   5.71,
     'Titanium Style Sheet'         =>   2.00,
     'TOML'                         =>   2.76,
@@ -8175,6 +10618,7 @@ sub set_constants {                          # {{{1
     'tutor'                        =>   1.51,
     'twaice'                       =>   1.63,
     'Twig'                         =>   2.00,
+    'TNSDL'                        =>   2.00,
     'TTCN'                         =>   2.00,
     'TITAN Project File Information' =>   1.90,
     'TypeScript'                   =>   2.00,
@@ -8197,13 +10641,10 @@ sub set_constants {                          # {{{1
     'visicalc 1'                   =>   8.89,
     'visual 4.0'                   =>   2.76,
     'visual basic'                 =>   1.90,
-    'visual basic 1'               =>   1.74,
-    'visual basic 2'               =>   1.86,
-    'visual basic 3'               =>   2.00,
-    'visual basic 4'               =>   2.22,
-    'visual basic 5'               =>   2.76,
     'Visual Basic'                 =>   2.76,
-    'visual basic dos'             =>   2.00,
+    'VB for Applications'          =>   2.76,
+    'Visual Basic .NET'            =>   2.76,
+    'Visual Basic Script'          =>   2.76,
     'visual c++'                   =>   2.35,
     'visual cobol'                 =>   4.00,
     'Visual Fox Pro'               =>   4.00, # Visual Fox Pro is not available in the language gearing ratios listed at Mayes Consulting web site
@@ -8225,6 +10666,8 @@ sub set_constants {                          # {{{1
     'watfiv'                       =>   0.94,
     'watfor'                       =>   0.88,
     'web scripts'                  =>   5.33,
+    'Web Services Description'     =>   1.00,
+    'WebAssembly'                  =>   0.45,
     'whip'                         =>   0.88,
     'Windows Message File'         =>   1.00,
     'Windows Resource File'        =>   1.00,
@@ -8233,6 +10676,8 @@ sub set_constants {                          # {{{1
     'WiX include'                  =>   1.90,
     'WiX string localization'      =>   1.90,
     'wizard'                       =>   2.86,
+    'WXML'                         =>   1.90,
+    'WXSS'                         =>   1.00,
     'xBase'                        =>   2.00,
     'xBase Header'                 =>   2.00,
     'xlisp'                        =>   1.25,
@@ -8245,6 +10690,7 @@ sub set_constants {                          # {{{1
     'zim'                          =>   4.21,
     'zlisp'                        =>   1.25,
     'Expect'                       => 2.00,
+    'Gencat NLS'                   => 1.50,
     'C/C++ Header'                 => 1.00,
     'inc'                          => 1.00,
     'lex'                          => 1.00,
@@ -8257,16 +10703,36 @@ sub set_constants {                          # {{{1
     'Octave'                       => 4.00,
     'ML'                           => 3.00,
     'Modula3'                      => 2.00,
+    'Mojo'                         => 2.00,
     'PHP'                          => 3.50,
+    'Jupyter Notebook'             => 4.20,
     'Python'                       => 4.20,
     'RapydScript'                  => 4.20,
-    'Skylark'                      => 4.20,
+    'Starlark'                     => 4.20,
+    'BizTalk Pipeline'             => 1.00,
+    'BizTalk Orchestration'        => 1.00,
     'Cython'                       => 3.80,
     'Ruby'                         => 4.20,
     'Ruby HTML'                    => 4.00,
     'sed'                          => 4.00,
     'Lua'                          => 4.00,
     'OpenCL'                       => 1.50,
+    'Xtend'                        => 2.00,
+    'Zig'                          => 2.50,
+    # aggregates; value is meaningless
+    'C#/Smalltalk'                    => 1.00,
+    'D/dtrace'                        => 1.00,
+    'F#/Forth'                        => 1.00,
+    'Fortran 77/Forth'                => 1.00,
+    'Lisp/Julia'                      => 1.00,
+    'Lisp/OpenCL'                     => 1.00,
+    'PHP/Pascal'                      => 1.00,
+    'Pascal/Puppet'                   => 1.00,
+    'Perl/Prolog'                     => 1.00,
+    'Raku/Prolog'                     => 1.00,
+    'Verilog-SystemVerilog/Coq'    => 1.00,
+    'MATLAB/Mathematica/Objective-C/MUMPS/Mercury' => 1.00,
+    'IDL/Qt Project/Prolog/ProGuard'     => 1.00,
 );
 # 1}}}
 %{$rh_Known_Binary_Archives} = (             # {{{1
@@ -8280,6 +10746,7 @@ sub set_constants {                          # {{{1
             '.ear'     => 1 ,  # Java
             '.war'     => 1 ,  # contained within .ear
             '.xz'      => 1 ,
+            '.whl'     => 1 ,  # Python wheel files (zip)
             );
 # 1}}}
 } # end sub set_constants()
@@ -8289,26 +10756,9 @@ sub check_scale_existence {                  # {{{1
         $rh_Language_by_Extension,
         $rh_Scale_Factor) = @_;
 
-    my %extension_collisions = (
-        # TODO:  find a better way of dealing with these
-        "PHP/Pascal"                        => 1,
-        "Lisp/OpenCL"                       => 1,
-        "Lisp/Julia"                        => 1,
-        "MATLAB/Mathematica/Objective C/MUMPS/Mercury"  => 1,
-        "Pascal/Puppet"                     => 1,
-        "Perl/Prolog"                       => 1,
-        "IDL/Qt Project/Prolog/ProGuard"    => 1,
-        "D/dtrace"                          => 1,
-        "Fortran 77/Forth"                  => 1,
-        "F#/Forth"                          => 1,
-        "Verilog-SystemVerilog/Coq"         => 1,
-        "TypeScript/Qt Linguist"            => 1,
-        "Qt/Glade"                          => 1,
-        "C#/Smalltalk"                      => 1,
-    );
     my $OK = 1;
     foreach my $language (sort keys %{$rhaa_Filters_by_Language}) {
-        next if defined $extension_collisions{$language};
+        next if defined $Extension_Collision{$language};
         if (!defined $rh_Scale_Factor->{$language}) {
             $OK = 0;
             warn "Missing scale factor for $language\n";
@@ -8318,7 +10768,7 @@ sub check_scale_existence {                  # {{{1
     my %seen_it = ();
     foreach my $ext (sort keys %{$rh_Language_by_Extension}) {
         my $language = $rh_Language_by_Extension->{$ext};
-        next if defined $extension_collisions{$language};
+        next if defined $Extension_Collision{$language};
         next if $seen_it{$language};
         if (!$rhaa_Filters_by_Language->{$language}) {
             $OK = 0;
@@ -8328,33 +10778,2959 @@ sub check_scale_existence {                  # {{{1
     }
     die unless $OK;
 } # 1}}}
+sub pre_post_fix {                           # {{{1
+    # Return the input lines prefixed and postfixed
+    # by the given strings.
+    my ($ra_lines, $prefix, $postfix ) = @_;
+    print "-> pre_post_fix with $prefix, $postfix\n" if $opt_v > 2;
+
+    my $all_lines = $prefix . join(""  , @{$ra_lines}) . $postfix;
+
+    print "<- pre_post_fix\n" if $opt_v > 2;
+    return split("\n", $all_lines);
+} # 1}}}
+sub rm_last_line {                           # {{{1
+    # Return all but the last line.
+    my ($ra_lines, ) = @_;
+    print "-> rm_last_line\n" if $opt_v > 2;
+    print "<- rm_last_line\n" if $opt_v > 2;
+    my $n = scalar(@{$ra_lines}) - 2;
+    return @{$ra_lines}[0..$n];
+} # 1}}}
+sub call_regexp_common {                     # {{{1
+    my ($ra_lines, $language ) = @_;
+    print "-> call_regexp_common for $language\n" if $opt_v > 2;
+
+    Install_Regexp_Common() unless $HAVE_Rexexp_Common;
+
+    my $all_lines = undef;
+    if ($language eq "C++") { # Regexp::Common's C++ comment regex is multi-line
+#       $all_lines = join("\n", @{$ra_lines});
+        $all_lines = "";
+        foreach (@{$ra_lines}) {
+            if (m/\\$/) {  # line ends with a continuation marker
+                $all_lines .= $_;
+            } else {
+                $all_lines .= "$_\n";
+            }
+        }
+    } else {
+        $all_lines = join(""  , @{$ra_lines});
+    }
+
+    no strict 'vars';
+    # otherwise get:
+    #  Global symbol "%RE" requires explicit package name at cloc line xx.
+    if ($all_lines =~ $RE{comment}{$language}) {
+        # Suppress "Use of uninitialized value in regexp compilation" that
+        # pops up when $1 is undefined--happens if there's a bug in the $RE
+        # This Pascal comment will trigger it:
+        #         (* This is { another } test. **)
+        # Curiously, testing for "defined $1" breaks the substitution.
+        no warnings;
+        # Remove comments.
+        $all_lines =~ s/$1//g;
+    }
+    # a bogus use of %RE to avoid:
+    # Name "main::RE" used only once: possible typo at cloc line xx.
+    print scalar keys %RE if $opt_v < -20;
+    print "<- call_regexp_common\n" if $opt_v > 2;
+    return split("\n", $all_lines);
+} # 1}}}
+sub plural_form {                            # {{{1
+    # For getting the right plural form on some English nouns.
+    my $n = shift @_;
+    if ($n == 1) { return ( 1, "" ); }
+    else         { return ($n, "s"); }
+} # 1}}}
+sub matlab_or_objective_C {                  # {{{1
+    # Decide if code is MATLAB, Mathematica, Objective-C, MUMPS, or Mercury
+    my ($file        , # in
+        $rh_Err      , # in   hash of error codes
+        $raa_errors  , # out
+        $rs_language , # out
+       ) = @_;
+    print "-> matlab_or_objective_C\n" if $opt_v > 2;
+    # matlab markers:
+    #   first line starts with "function"
+    #   some lines start with "%"
+    #   high marks for lines that start with [
+    #
+    # Objective-C markers:
+    #   must have at least two brace characters, { }
+    #   has /* ... */ style comments
+    #   some lines start with @
+    #   some lines start with #include
+    #
+    # MUMPS:
+    #   has ; comment markers
+    #   do not match:  \w+\s*=\s*\w
+    #   lines begin with   \s*\.?\w+\s+\w
+    #   high marks for lines that start with \s*K\s+ or \s*Kill\s+
+    #
+    # Mercury:
+    #   any line that begins with :- immediately triggers this
+    #
+    # Mathematica:
+    #   (* .. *)
+    #   BeginPackage
+
+    ${$rs_language} = "";
+    my $IN = new IO::File $file, "r";
+    if (!defined $IN) {
+        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
+        return;
+    }
+
+    my $DEBUG              = 0;
+
+    my $matlab_points      = 0;
+    my $mathematica_points = 0;
+    my $objective_C_points = 0;
+    my $mumps_points       = 0;
+    my $mercury_points     = 0;
+    my $has_braces         = 0;
+    while (<$IN>) {
+        ++$has_braces if $_ =~ m/[{}]/;
+#print "LINE $. has_braces=$has_braces\n";
+        ++$mumps_points if $. == 1 and m{^[A-Z]};
+        if      (m{^\s*/\*} or m {^\s*//}) {   #   /* or //
+            $objective_C_points += 5;
+            $matlab_points      -= 5;
+printf ".m:  /*|//  obj C=% 2d  matlab=% 2d  mathematica=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
+        } elsif (m{^:-\s+}) {      # gotta be mercury
+            $mercury_points = 1000;
+            last;
+        } elsif (m{\w+\s*=\s*\[}) {      # matrix assignment, very matlab
+            $matlab_points += 5;
+        }
+        if (m{\w+\[}) {      # function call by []
+            $mathematica_points += 2;
+printf ".m:  \\w=[   obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
+        } elsif (m{^\s*\w+\s*=\s*}) {    # definitely not MUMPS
+            --$mumps_points;
+printf ".m:  \\w=    obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
+        } elsif (m{^\s*\.?(\w)\s+(\w)} and $1 !~ /\d/ and $2 !~ /\d/) {
+            ++$mumps_points;
+printf ".m:  \\w \\w  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
+        } elsif (m{^\s*;}) {
+            ++$mumps_points;
+printf ".m:  ;      obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
+        }
+        if (m{^\s*#(include|import)}) {
+            # Objective-C without a doubt
+            $objective_C_points = 1000;
+            $matlab_points      = 0;
+printf ".m: #includ obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
+            $has_braces         = 2;
+            last;
+        } elsif (m{^\s*@(interface|implementation|protocol|public|protected|private|end)\s}o) {
+            # Objective-C without a doubt
+            $objective_C_points = 1000;
+            $matlab_points      = 0;
+printf ".m: keyword obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
+            last;
+        } elsif (m{^\s*BeginPackage}) {
+            $mathematica_points += 2;
+        } elsif (m{^\s*\[}) {             #   line starts with [  -- very matlab
+            $matlab_points += 5;
+printf ".m:  [      obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
+        } elsif (m{^\sK(ill)?\s+}) {
+            $mumps_points  += 5;
+printf ".m:  Kill   obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
+        } elsif (m{^\s*function}) {
+            --$objective_C_points;
+            ++$matlab_points;
+printf ".m:  funct  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
+        } elsif (m{^\s*%}) {              #   %
+            # matlab commented line
+            --$objective_C_points;
+            ++$matlab_points;
+printf ".m:  pcent  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
+        }
+    }
+    $IN->close;
+printf "END LOOP    obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
+
+    # next heuristic is unreliable for small files
+#   $objective_C_points = -9.9e20 unless $has_braces >= 2;
+
+    my %points = ( 'MATLAB'      => $matlab_points     ,
+                   'Mathematica' => $mathematica_points     ,
+                   'MUMPS'       => $mumps_points      ,
+                   'Objective-C' => $objective_C_points,
+                   'Mercury'     => $mercury_points    , );
+
+    ${$rs_language} = (sort { $points{$b} <=> $points{$a} or $a cmp $b } keys %points)[0];
+
+    print "<- matlab_or_objective_C($file: matlab=$matlab_points, mathematica=$mathematica_points, C=$objective_C_points, mumps=$mumps_points, mercury=$mercury_points) => ${$rs_language}\n"
+        if $opt_v > 2;
+
+} # 1}}}
+sub Lisp_or_OpenCL {                         # {{{1
+    my ($file        , # in
+        $rh_Err      , # in   hash of error codes
+        $raa_errors  , # out
+       ) = @_;
+
+    print "-> Lisp_or_OpenCL\n" if $opt_v > 2;
+
+    my $lang = undef;
+    my $IN = new IO::File $file, "r";
+    if (!defined $IN) {
+        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
+        return $lang;
+    }
+    my $lisp_points   = 0;
+    my $opcl_points = 0;
+    while (<$IN>) {
+        ++$lisp_points if  /^\s*;/;
+        ++$lisp_points if  /\((def|eval|require|export|let|loop|dec|format)/;
+        ++$opcl_points if  /^\s*(int|float|const|{)/;
+    }
+    $IN->close;
+    # print "lisp_points=$lisp_points   opcl_points=$opcl_points\n";
+    if ($lisp_points > $opcl_points) {
+        $lang = "Lisp";
+    } else {
+        $lang = "OpenCL";
+    }
+
+    print "<- Lisp_or_OpenCL\n" if $opt_v > 2;
+    return $lang;
+} # 1}}}
+sub Lisp_or_Julia {                          # {{{1
+    my ($file        , # in
+        $rh_Err      , # in   hash of error codes
+        $raa_errors  , # out
+       ) = @_;
+
+    print "-> Lisp_or_Julia\n" if $opt_v > 2;
+
+    my $lang = undef;
+    my $IN = new IO::File $file, "r";
+    if (!defined $IN) {
+        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
+        return $lang;
+    }
+    my $lisp_points   = 0;
+    my $julia_points = 0;
+    while (<$IN>) {
+        ++$lisp_points if  /^\s*;/;
+        ++$lisp_points if  /\((def|eval|require|export|let|loop|dec|format)/;
+        ++$julia_points if  /^\s*(function|end|println|for|while)/;
+    }
+    $IN->close;
+    # print "lisp_points=$lisp_points   julia_points=$julia_points\n";
+    if ($lisp_points > $julia_points) {
+        $lang = "Lisp";
+    } else {
+        $lang = "Julia";
+    }
+
+    print "<- Lisp_or_Julia\n" if $opt_v > 2;
+    return $lang;
+} # 1}}}
+sub Perl_or_Prolog {                         # {{{1
+    my ($file        , # in
+        $rh_Err      , # in   hash of error codes
+        $raa_errors  , # out
+       ) = @_;
+
+    print "-> Perl_or_Prolog\n" if $opt_v > 2;
+
+    my $lang = undef;
+    my $IN = new IO::File $file, "r";
+    if (!defined $IN) {
+        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
+        return $lang;
+    }
+    my $perl_points = 0;
+    my $prolog_points = 0;
+    while (<$IN>) {
+        next if /^\s*$/;
+        if ($. == 1 and /^#!.*?\bperl/) {
+            $perl_points = 100;
+            last;
+        }
+        ++$perl_points   if  /^=(head|over|item|cut)/;
+        ++$perl_points   if  /;\s*$/;
+        ++$perl_points   if  /(\{|\})/;
+        ++$perl_points   if  /^\s*sub\s+/;
+        ++$perl_points   if  /\s*<<'/;  # start HERE block
+        ++$perl_points   if  /\$(\w+\->|[_!])/;
+        ++$prolog_points if !/\s*#/ and /\.\s*$/;
+        ++$prolog_points if  /:-/;
+    }
+    $IN->close;
+    # print "perl_points=$perl_points   prolog_points=$prolog_points\n";
+    if ($perl_points > $prolog_points) {
+        $lang = "Perl";
+    } else {
+        $lang = "Prolog";
+    }
+
+    printf "<- Perl_or_Prolog(%s, Perl=%d Prolog=%d)\n",
+        $file, $perl_points, $prolog_points if $opt_v > 2;
+    return $lang;
+} # 1}}}
+sub Raku_or_Prolog {                         # {{{1
+    my ($file        , # in
+        $rh_Err      , # in   hash of error codes
+        $raa_errors  , # out
+       ) = @_;
+
+    print "-> Raku_or_Prolog\n" if $opt_v > 2;
+    my $lang = Perl_or_Prolog($file, $rh_Err, $raa_errors);
+    $lang = "Raku" if $lang eq "Perl";
+
+    print "<- Raku_or_Prolog\n" if $opt_v > 2;
+    return $lang;
+} # 1}}}
+sub IDL_or_QtProject {                       # {{{1
+    # IDL, QtProject, Prolog, or ProGuard
+    my ($file        , # in
+        $rh_Err      , # in   hash of error codes
+        $raa_errors  , # out
+       ) = @_;
+
+    print "-> IDL_or_QtProject($file)\n" if $opt_v > 2;
+
+    my $lang = undef;
+    my $IN = new IO::File $file, "r";
+    if (!defined $IN) {
+        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
+        return $lang;
+    }
+    my $idl_points      = 0;
+    my $qtproj_points   = 0;
+    my $prolog_points   = 0;
+    my $proguard_points = 0;
+    while (<$IN>) {
+        ++$idl_points      if /^\s*;/;
+        ++$idl_points      if /plot\(/i;
+        ++$qtproj_points   if /^\s*(qt|configs|sources|template|target|targetpath|subdirs)\b/i;
+        ++$qtproj_points   if /qthavemodule/i;
+        ++$prolog_points   if /\.\s*$/;
+        ++$prolog_points   if /:-/;
+        ++$proguard_points if /^\s*#/;
+        ++$proguard_points if /^-keep/;
+        ++$proguard_points if /^-(dont)?obfuscate/;
+    }
+    $IN->close;
+    # print "idl_points=$idl_points   qtproj_points=$qtproj_points\n";
+
+    my %points = ( 'IDL'        => $idl_points       ,
+                   'Qt Project' => $qtproj_points    ,
+                   'Prolog'     => $prolog_points    ,
+                   'ProGuard'   => $proguard_points  ,
+                 );
+
+    $lang = (sort { $points{$b} <=> $points{$a} or $a cmp $b} keys %points)[0];
+
+    print "<- IDL_or_QtProject(idl_points=$idl_points, ",
+          "qtproj_points=$qtproj_points, prolog_points=$prolog_points, ",
+          "proguard_points=$proguard_points)\n"
+           if $opt_v > 2;
+    return $lang;
+} # 1}}}
+sub Ant_or_XML {                             # {{{1
+    my ($file        , # in
+        $rh_Err      , # in   hash of error codes
+        $raa_errors  , # out
+       ) = @_;
+
+    print "-> Ant_or_XML($file)\n" if $opt_v > 2;
+
+    my $lang = "XML";
+    my $IN = new IO::File $file, "r";
+    if (!defined $IN) {
+        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
+        return $lang;
+    }
+    my $Ant_points   = 0;
+    my $XML_points   = 1;
+    while (<$IN>) {
+        if (/^\s*<project\s+/) {
+            ++$Ant_points  ;
+            --$XML_points  ;
+        }
+        if (/xmlns:artifact="antlib:org.apache.maven.artifact.ant"/) {
+            ++$Ant_points  ;
+            --$XML_points  ;
+        }
+    }
+    $IN->close;
+
+    if ($XML_points >= $Ant_points) {
+        # tie or better goes to XML
+        $lang = "XML";
+    } else {
+        $lang = "Ant";
+    }
+
+    print "<- Ant_or_XML($lang)\n" if $opt_v > 2;
+    return $lang;
+} # 1}}}
+sub Maven_or_XML {                           # {{{1
+    my ($file        , # in
+        $rh_Err      , # in   hash of error codes
+        $raa_errors  , # out
+       ) = @_;
+
+    print "-> Maven_or_XML($file)\n" if $opt_v > 2;
+
+    my $lang = "XML";
+    my $IN = new IO::File $file, "r";
+    if (!defined $IN) {
+        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
+        return $lang;
+    }
+    my $Mvn_points   = 0;
+    my $XML_points   = 1;
+    while (<$IN>) {
+        if (/^\s*<project\s+/) {
+            ++$Mvn_points  ;
+            --$XML_points  ;
+        }
+        if (m{xmlns="http://maven.apache.org/POM/}) {
+            ++$Mvn_points  ;
+            --$XML_points  ;
+        }
+    }
+    $IN->close;
+
+    if ($XML_points >= $Mvn_points) {
+        # tie or better goes to XML
+        $lang = "XML";
+    } else {
+        $lang = "Maven";
+    }
+
+    print "<- Maven_or_XML($lang)\n" if $opt_v > 2;
+    return $lang;
+} # 1}}}
+sub pascal_or_puppet {                       # {{{1
+    # Decide if code is Pascal or Puppet manifest
+    my ($file        , # in
+        $rh_Err      , # in   hash of error codes
+        $raa_errors  , # out
+        $rs_language , # out
+       ) = @_;
+
+    print "-> pascal_or_puppet\n" if $opt_v > 2;
+
+    ${$rs_language} = "";
+    my $IN = new IO::File $file, "r";
+    if (!defined $IN) {
+        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
+        return;
+    }
+
+    my $DEBUG              = 0;
+    my $pascal_points      = 0;
+    my $puppet_points      = 0;
+
+    while (<$IN>) {
+
+        if ( /^\s*\#\s+/ ) {
+                $puppet_points += .001;
+                next;
+        }
+
+        ++$pascal_points if /\bprogram\s+[A-Za-z]/i;
+        ++$pascal_points if /\bunit\s+[A-Za-z]/i;
+        ++$pascal_points if /\bmodule\s+[A-Za-z]/i;
+        ++$pascal_points if /\bprocedure\b/i;
+        ++$pascal_points if /\bfunction\b/i;
+        ++$pascal_points if /^\s*interface\s+/i;
+        ++$pascal_points if /^\s*implementation\s+/i;
+        ++$pascal_points if /^\s*uses\s+/i;
+        ++$pascal_points if /(?<!\:\:)\bbegin\b(?!\:\:)/i;
+        ++$pascal_points if /(?<!\:\:)\bend\b(?!\:\:)/i;
+        ++$pascal_points if /\:\=/;
+        ++$pascal_points if /\<\>/;
+        ++$pascal_points if /^\s*\{\$(I|INCLUDE)\s+.*\}/i;
+        ++$pascal_points if /writeln/;
+
+        ++$puppet_points if /^\s*class\s+/ and not /class\s+operator\s+/;
+        ++$puppet_points if /^\s*function\s+[a-z][a-z0-9]+::[a-z][a-z0-9]+\s*/;
+        ++$puppet_points if /^\s*type\s+[A-Z]\w+::[A-Z]\w+\s+/;
+        ++$puppet_points if /^\s*case\s+/;
+        ++$puppet_points if /^\s*package\s+/;
+        ++$puppet_points if /^\s*file\s+/;
+        ++$puppet_points if /^\s*include\s\w+/;
+        ++$puppet_points if /^\s*service\s+/;
+        ++$puppet_points if /\s\$\w+\s*\=\s*\S/;
+        ++$puppet_points if /\S\s*\=\>\s*\S/;
+
+        # No need to process rest of file if language seems obvious.
+        last
+                if (abs ($pascal_points - $puppet_points ) > 20 );
+    }
+    $IN->close;
+
+    print "<- pascal_or_puppet(pascal=$pascal_points, puppet=$puppet_points)\n"
+        if $opt_v > 2;
+
+    if ($pascal_points > $puppet_points) {
+        ${$rs_language} = "Pascal";
+    } else {
+        ${$rs_language} = "Puppet";
+    }
+
+} # 1}}}
+sub Forth_or_Fortran {                       # {{{1
+    my ($file        , # in
+        $rh_Err      , # in   hash of error codes
+        $raa_errors  , # out
+       ) = @_;
+
+    print "-> Forth_or_Fortran\n" if $opt_v > 2;
+
+    my $lang = undef;
+    my $IN = new IO::File $file, "r";
+    if (!defined $IN) {
+        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
+        return $lang;
+    }
+    my $forth_points = 0;
+    my $fortran_points = 0;
+    while (<$IN>) {
+        ++$forth_points if  /^:\s/;
+        ++$fortran_points if  /^([c*][^a-z]|\s{6,}(subroutine|program|end|implicit)\s|\s*!)/i;
+    }
+    $IN->close;
+    if ($forth_points > $fortran_points) {
+        $lang = "Forth";
+    } else {
+        $lang = "Fortran 77";
+    }
+
+    print "<- Forth_or_Fortran\n" if $opt_v > 2;
+    return $lang;
+} # 1}}}
+sub Forth_or_Fsharp {                        # {{{1
+    my ($file        , # in
+        $rh_Err      , # in   hash of error codes
+        $raa_errors  , # out
+       ) = @_;
+
+    print "-> Forth_or_Fsharp\n" if $opt_v > 2;
+
+    my $lang = undef;
+    my $IN = new IO::File $file, "r";
+    if (!defined $IN) {
+        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
+        return $lang;
+    }
+    my $forth_points = 0;
+    my $fsharp_points = 0;
+    while (<$IN>) {
+        ++$forth_points if  /^:\s/;
+        ++$fsharp_points if  /^\s*(#light|import|let|module|namespace|open|type)/;
+    }
+    $IN->close;
+    if ($forth_points > $fsharp_points) {
+        $lang = "Forth";
+    } else {
+        $lang = "F#";
+    }
+
+    print "<- Forth_or_Fsharp\n" if $opt_v > 2;
+    return $lang;
+} # 1}}}
+sub Verilog_or_Coq {                         # {{{1
+    my ($file        , # in
+        $rh_Err      , # in   hash of error codes
+        $raa_errors  , # out
+       ) = @_;
+
+    print "-> Verilog_or_Coq\n" if $opt_v > 2;
+
+    my $lang = undef;
+    my $IN = new IO::File $file, "r";
+    if (!defined $IN) {
+        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
+        return $lang;
+    }
+    my $coq_points     = 0;
+    my $verilog_points = 0;
+    while (<$IN>) {
+        ++$verilog_points if  /^\s*(module|begin|input|output|always)/;
+        ++$coq_points if /\b(Inductive|Fixpoint|Definition|
+                             Theorem|Lemma|Proof|Qed|forall|
+                             Section|Check|Notation|Variable|
+                             Goal|Fail|Require|Scheme|Module|Ltac|
+                             Set|Unset|Parameter|Coercion|Axiom|
+                             Locate|Type|Record|Existing|Class)\b/x;
+    }
+    $IN->close;
+    if ($coq_points > $verilog_points) {
+        $lang = "Coq";
+    } else {
+        $lang = "Verilog-SystemVerilog";
+    }
+
+    print "<- Verilog_or_Coq\n" if $opt_v > 2;
+    return $lang;
+} # 1}}}
+sub TypeScript_or_QtLinguist {               # {{{1
+    my ($file        , # in
+        $rh_Err      , # in   hash of error codes
+        $raa_errors  , # out
+       ) = @_;
+
+    print "-> TypeScript_or_QtLinguist\n" if $opt_v > 2;
+
+    my $lang = undef;
+    my $IN = new IO::File $file, "r";
+    if (!defined $IN) {
+        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
+        return $lang;
+    }
+    my $tscript_points  = 0;
+    my $linguist_points = 0;
+    while (<$IN>) {
+        ++$linguist_points if m{\b</?(message|source|translation)>};
+        ++$tscript_points  if /^\s*(var|const|let|class|document)\b/;
+        ++$tscript_points  if /[;}]\s*$/;
+        ++$tscript_points  if m{^\s*//};
+    }
+    $IN->close;
+    if ($tscript_points >= $linguist_points) {
+        $lang = "TypeScript";
+    } else {
+        $lang = "Qt Linguist";
+    }
+    print "<- TypeScript_or_QtLinguist\n" if $opt_v > 2;
+    return $lang;
+} # 1}}}
+sub Qt_or_Glade {                            # {{{1
+    my ($file        , # in
+        $rh_Err      , # in   hash of error codes
+        $raa_errors  , # out
+       ) = @_;
+
+    print "-> Qt_or_Glade\n" if $opt_v > 2;
+
+    my $lang = undef;
+    my $IN = new IO::File $file, "r";
+    if (!defined $IN) {
+        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
+        return $lang;
+    }
+    my $qt_points    =  1;
+    my $glade_points = -1;
+    while (<$IN>) {
+        if (/generated\s+with\s+glade/i) {
+            $glade_points =  1;
+            $qt_points    = -1;
+            last;
+        }
+    }
+    $IN->close;
+    if ($glade_points > $qt_points) {
+        $lang = "Glade";
+    } else {
+        $lang = "Qt";
+    }
+    print "<- Qt_or_Glade\n" if $opt_v > 2;
+    return $lang;
+} # 1}}}
+sub Csharp_or_Smalltalk {                    # {{{1
+    my ($file        , # in
+        $rh_Err      , # in   hash of error codes
+        $raa_errors  , # out
+       ) = @_;
+
+    print "-> Csharp_or_Smalltalk($file)\n" if $opt_v > 2;
+
+    my $lang = undef;
+    my $IN = new IO::File $file, "r";
+    if (!defined $IN) {
+        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
+        return $lang;
+    }
+    my $cs_points        = 0;
+    my $smalltalk_points = 0;
+    while (<$IN>) {
+        s{//.*?$}{};        # strip inline C# comments for better clarity
+        next if /^\s*$/;
+        if (/[;}{]\s*$/) {
+            ++$cs_points       ;
+        } elsif (/^(using|namespace)\s/) {
+            $cs_points += 20;
+        } elsif (/^\s*(public|private|new)\s/) {
+            $cs_points += 20;
+        } elsif (/^\s*\[assembly:/) {
+            ++$cs_points       ;
+        }
+        if (/(\!|\]\.)\s*$/) {
+            ++$smalltalk_points;
+            --$cs_points       ;
+        }
+    }
+    $IN->close;
+    if ($smalltalk_points > $cs_points) {
+        $lang = "Smalltalk";
+    } else {
+        $lang = "C#";
+    }
+    print "<- Csharp_or_Smalltalk($file)=$lang\n" if $opt_v > 2;
+    return $lang;
+} # 1}}}
+sub Visual_Basic_or_TeX_or_Apex {            # {{{1
+    my ($file        , # in
+        $rh_Err      , # in   hash of error codes
+        $raa_errors  , # out
+        $rs_language , # out
+       ) = @_;
+
+    print "-> Visual_Basic_or_TeX_or_Apex($file)\n" if $opt_v > 2;
+
+    my $lang = undef;
+    my $IN = new IO::File $file, "r";
+    if (!defined $IN) {
+        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
+        return $lang;
+    }
+    my $VB_points        = 0;
+    my $tex_points       = 0;
+    my $apex_points      = 0;
+    while (<$IN>) {
+        next if /^\s*$/;
+#print "$_";
+        if (/\s*%/ or /\s*\\/) {
+            ++$tex_points   ;
+        } else {
+            if (/^\s*(public|private)\s/i) {
+                ++$VB_points    ;
+                ++$apex_points  ;
+#print "+VB1 +A1";
+            } elsif (/^\s*(end|attribute|version)\s/i) {
+                ++$VB_points    ;
+#print "+VB2";
+            }
+            if (/[{}]/ or /;\s*$/) {
+                ++$apex_points  ;
+#print "+A2";
+            }
+#print "\n";
+        }
+    }
+    $IN->close;
+
+    my %points = ( 'Visual Basic'   => $VB_points   ,
+                   'TeX'            => $tex_points  ,
+                   'Apex Class'     => $apex_points ,);
+
+    ${$rs_language} = (sort { $points{$b} <=> $points{$a} or $a cmp $b } keys %points)[0];
+
+    print "<- Visual_Basic_or_TeX_or_Apex($file: VB=$VB_points, TeX=$tex_points, Apex=$apex_points\n" if $opt_v > 2;
+    return $lang;
+} # 1}}}
+sub Scheme_or_SaltStack {                    # {{{1
+    my ($file        , # in
+        $rh_Err      , # in   hash of error codes
+        $raa_errors  , # out
+       ) = @_;
+
+    print "-> Scheme_or_SaltStack($file)\n" if $opt_v > 2;
+
+    my $lang = undef;
+    my $IN = new IO::File $file, "r";
+    if (!defined $IN) {
+        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
+        return $lang;
+    }
+    my $Sch_points = 0;
+    my $SS_points  = 0;
+    while (<$IN>) {
+        next if /^\s*$/;
+        if (/{\%.*%}/) {
+            $SS_points += 5;
+        } elsif (/map\.jinja\b/) {
+            $SS_points += 5;
+        } elsif (/\((define|lambda|let|cond|do)\s/) {
+            $Sch_points += 1;
+        } else {
+        }
+    }
+    $IN->close;
+
+    print "<- Scheme_or_SaltStack($file: Scheme=$Sch_points, SaltStack=$SS_points\n" if $opt_v > 2;
+    if ($Sch_points > $SS_points) {
+        return "Scheme";
+    } else {
+        return "SaltStack";
+    }
+} # 1}}}
+sub html_colored_text {                      # {{{1
+    # http://www.pagetutor.com/pagetutor/makapage/pics/net216-2.gif
+    my ($color, $text) = @_;
+#?#die "html_colored_text($text)";
+    if      ($color =~ /^red$/i)   {
+        $color = "#ff0000";
+    } elsif ($color =~ /^green$/i) {
+        $color = "#00ff00";
+    } elsif ($color =~ /^blue$/i)  {
+        $color = "#0000ff";
+    } elsif ($color =~ /^grey$/i)  {
+        $color = "#cccccc";
+    }
+#   return "" unless $text;
+    return '<font color="' . $color . '">' . html_metachars($text) . "</font>";
+} # 1}}}
+sub xml_metachars {                          # {{{1
+    # http://en.wikipedia.org/wiki/Character_encodings_in_HTML#XML_character_references
+    my ($string, ) = shift @_;
+
+    my  @in_chars    = split(//, $string);
+    my  @out_chars   = ();
+    foreach my $c (@in_chars) {
+        if      ($c eq '&') { push @out_chars, '&amp;'
+        } elsif ($c eq '<') { push @out_chars, '&lt;'
+        } elsif ($c eq '>') { push @out_chars, '&gt;'
+        } elsif ($c eq '"') { push @out_chars, '&quot;'
+        } elsif ($c eq "'") { push @out_chars, '&apos;'
+        } else {
+            push @out_chars, $c;
+        }
+    }
+    return join "", @out_chars;
+} # 1}}}
+sub html_metachars {                         # {{{1
+    # Replace HTML metacharacters with their printable forms.
+    # Future:  use HTML-Encoder-0.00_04/lib/HTML/Encoder.pm
+    # from Fabiano Reese Righetti's HTML::Encoder module if
+    # this subroutine proves to be too simplistic.
+    my ($string, ) = shift @_;
+
+    my  @in_chars    = split(//, $string);
+    my  @out_chars   = ();
+    foreach my $c (@in_chars) {
+        if      ($c eq '<') {
+            push @out_chars, '&lt;'
+        } elsif ($c eq '>') {
+            push @out_chars, '&gt;'
+        } elsif ($c eq '&') {
+            push @out_chars, '&amp;'
+        } else {
+            push @out_chars, $c;
+        }
+    }
+    return join "", @out_chars;
+} # 1}}}
+sub test_alg_diff {                          # {{{1
+    my ($file_1 ,
+        $file_2 )
+       = @_;
+    my $fh_1 = new IO::File $file_1, "r";
+    die "Unable to read $file_1:  $!\n" unless defined $fh_1;
+    chomp(my @lines_1 = <$fh_1>);
+    $fh_1->close;
+
+    my $fh_2 = new IO::File $file_2, "r";
+    die "Unable to read $file_2:  $!\n" unless defined $fh_2;
+    chomp(my @lines_2 = <$fh_2>);
+    $fh_2->close;
+
+    my $n_no_change = 0;
+    my $n_modified  = 0;
+    my $n_added     = 0;
+    my $n_deleted   = 0;
+    my @min_sdiff   = ();
+my $NN = chr(27) . "[0m";  # normal
+my $BB = chr(27) . "[1m";  # bold
+
+    my @sdiffs = sdiff( \@lines_1, \@lines_2 );
+    foreach my $entry (@sdiffs) {
+        my ($out_1, $out_2) = ('', '');
+        if ($entry->[0] eq 'u') {
+            ++$n_no_change;
+          # $out_1 = $entry->[1];
+          # $out_2 = $entry->[2];
+            next;
+        }
+#       push @min_sdiff, $entry;
+        if      ($entry->[0] eq 'c') {
+            ++$n_modified;
+            ($out_1, $out_2) = diff_two_strings($entry->[1], $entry->[2]);
+            $out_1 =~ s/\cA(\w)/${BB}$1${NN}/g;
+            $out_2 =~ s/\cA(\w)/${BB}$1${NN}/g;
+          # $out_1 =~ s/\cA//g;
+          # $out_2 =~ s/\cA//g;
+        } elsif ($entry->[0] eq '+') {
+            ++$n_added;
+            $out_1 = $entry->[1];
+            $out_2 = $entry->[2];
+        } elsif ($entry->[0] eq '-') {
+            ++$n_deleted;
+            $out_1 = $entry->[1];
+            $out_2 = $entry->[2];
+        } elsif ($entry->[0] eq 'u') {
+        } else { die "unknown entry->[0]=[$entry->[0]]\n"; }
+        printf "%-80s | %s\n", $out_1, $out_2;
+    }
+
+#   foreach my $entry (@min_sdiff) {
+#       printf "DIFF:  %s  %s  %s\n", @{$entry};
+#   }
+} # 1}}}
+sub write_comments_to_html {                 # {{{1
+    my ($filename      , # in
+        $rah_diff_L    , # in  see routine array_diff() for explanation
+        $rah_diff_R    , # in  see routine array_diff() for explanation
+        $rh_blank      , # in  location and counts of blank lines
+       ) = @_;
+
+    print "-> write_comments_to_html($filename)\n" if $opt_v > 2;
+    my $file = $filename . ".html";
+
+    my $approx_line_count = scalar @{$rah_diff_L};
+       $approx_line_count = 1 unless $approx_line_count;
+    my $n_digits = 1 + int(log($approx_line_count)/2.30258509299405); # log_10
+
+    my $html_out = html_header($filename);
+
+    my $comment_line_number = 0;
+    for (my $i = 0; $i < scalar @{$rah_diff_R}; $i++) {
+        if (defined $rh_blank->{$i}) {
+            foreach (1..$rh_blank->{$i}) {
+                $html_out .= "<!-- blank -->\n";
+            }
+        }
+        my $line_num = "";
+        my $pre      = "";
+        my $post     = '</span> &nbsp;';
+warn "undef rah_diff_R[$i]{type} " unless defined $rah_diff_R->[$i]{type};
+        if ($rah_diff_R->[$i]{type} eq 'nonexist') {
+            ++$comment_line_number;
+            $line_num = sprintf "\&nbsp; <span class=\"clinenum\"> %0${n_digits}d %s",
+                            $comment_line_number, $post;
+            $pre = '<span class="comment">';
+            $html_out .= $line_num;
+            $html_out .= $pre .
+                         html_metachars($rah_diff_L->[$i]{char}) .
+                         $post . "\n";
+            next;
+        }
+        if      ($rah_diff_R->[$i]{type} eq 'code' and
+                 $rah_diff_R->[$i]{desc} eq 'same') {
+            # entire line remains as-is
+            $line_num = sprintf "\&nbsp; <span class=\"linenum\"> %0${n_digits}d %s",
+                            $rah_diff_R->[$i]{lnum}, $post;
+            $pre    = '<span class="normal">';
+            $html_out .= $line_num;
+            $html_out .= $pre .
+                         html_metachars($rah_diff_R->[$i]{char}) . $post;
+#XX     } elsif ($rah_diff_R->[$i]{type} eq 'code') { # code+comments
+#XX
+#XX         $line_num = '<span class="linenum">' .
+#XX                      $rah_diff_R->[$i]{lnum} . $post;
+#XX         $html_out .= $line_num;
+#XX
+#XX         my @strings = @{$rah_diff_R->[$i]{char}{strings}};
+#XX         my @type    = @{$rah_diff_R->[$i]{char}{type}};
+#XX         for (my $i = 0; $i < scalar @strings; $i++) {
+#XX             if ($type[$i] eq 'u') {
+#XX                 $pre = '<span class="normal">';
+#XX             } else {
+#XX                 $pre = '<span class="comment">';
+#XX             }
+#XX             $html_out .= $pre .  html_metachars($strings[$i]) . $post;
+#XX         }
+# print Dumper(@strings, @type); die;
+
+        } elsif ($rah_diff_R->[$i]{type} eq 'comment') {
+            $line_num = '<span class="clinenum">' . $comment_line_number . $post;
+            # entire line is a comment
+            $pre    = '<span class="comment">';
+            $html_out .= $pre .
+                         html_metachars($rah_diff_R->[$i]{char}) . $post;
+        }
+#printf "%-30s %s %-30s\n", $line_1, $separator, $line_2;
+        $html_out .= "\n";
+    }
+
+    $html_out .= html_end();
+
+    my $out_file = "$filename.html";
+    write_file($out_file, {}, ( $html_out ) );
+
+    print "<- write_comments_to_html\n" if $opt_v > 2;
+} # 1}}}
+sub array_diff {                             # {{{1
+    my ($file          , # in  only used for error reporting
+        $ra_lines_L    , # in  array of lines in Left  file (no blank lines)
+        $ra_lines_R    , # in  array of lines in Right file (no blank lines)
+        $mode          , # in  "comment" | "revision"
+        $rah_diff_L    , # out
+        $rah_diff_R    , # out
+        $raa_Errors    , # in/out
+       ) = @_;
+
+    # This routine operates in two ways:
+    # A. Computes diffs of the same file with and without comments.
+    #    This is used to classify lines as code, comments, or blank.
+    # B. Computes diffs of two revisions of a file.  This method
+    #    requires a prior run of method A using the older version
+    #    of the file because it needs lines to be classified.
+
+    # $rah_diff structure:
+    # An array with n entries where n equals the number of lines in
+    # an sdiff of the two files.  Each entry in the array describes
+    # the contents of the corresponding line in file Left and file Right:
+    #  diff[]{type} = blank | code | code+comment | comment | nonexist
+    #        {lnum} = line number within the original file (1-based)
+    #        {desc} = same | added | removed | modified
+    #        {char} = the input line unless {desc} = 'modified' in
+    #                 which case
+    #        {char}{strings} = [ substrings ]
+    #        {char}{type}    = [ disposition (added, removed, etc)]
+    #
+
+    @{$rah_diff_L} = ();
+    @{$rah_diff_R} = ();
+
+    print "-> array_diff()\n" if $opt_v > 2;
+    my $COMMENT_MODE = 0;
+       $COMMENT_MODE = 1 if $mode eq "comment";
+
+#print "array_diff(mode=$mode)\n";
+#print Dumper("block left:" , $ra_lines_L);
+#print Dumper("block right:", $ra_lines_R);
+
+    my @sdiffs = ();
+    eval {
+        local $SIG{ALRM} = sub { die "alarm\n" };
+        alarm $opt_diff_timeout;
+        @sdiffs = sdiff($ra_lines_L, $ra_lines_R);
+        alarm 0;
+    };
+    if ($@) {
+        # timed out
+        die unless $@ eq "alarm\n"; # propagate unexpected errors
+        push @{$raa_Errors},
+             [ $Error_Codes{'Diff error, exceeded timeout'}, $file ];
+        if ($opt_v) {
+          warn "array_diff: diff timeout failure for $file--ignoring\n";
+        }
+        return;
+    }
+
+    my $n_L        = 0;
+    my $n_R        = 0;
+    my $n_sdiff    = 0;  # index to $rah_diff_L, $rah_diff_R
+    foreach my $triple (@sdiffs) {
+        my $flag   = $triple->[0];
+        my $line_L = $triple->[1];
+        my $line_R = $triple->[2];
+        $rah_diff_L->[$n_sdiff]{char} = $line_L;
+        $rah_diff_R->[$n_sdiff]{char} = $line_R;
+        if      ($flag eq 'u') {  # u = unchanged
+            ++$n_L;
+            ++$n_R;
+            if ($COMMENT_MODE) {
+                # line exists in both with & without comments, must be code
+                $rah_diff_L->[$n_sdiff]{type} = "code";
+                $rah_diff_R->[$n_sdiff]{type} = "code";
+            }
+            $rah_diff_L->[$n_sdiff]{desc} = "same";
+            $rah_diff_R->[$n_sdiff]{desc} = "same";
+            $rah_diff_L->[$n_sdiff]{lnum} = $n_L;
+            $rah_diff_R->[$n_sdiff]{lnum} = $n_R;
+        } elsif ($flag eq 'c') {  # c = changed
+# warn "per line sdiff() commented out\n"; if (0) {
+            ++$n_L;
+            ++$n_R;
+
+            if ($COMMENT_MODE) {
+                # line has text both with & without comments;
+                # count as code
+                $rah_diff_L->[$n_sdiff]{type} = "code";
+                $rah_diff_R->[$n_sdiff]{type} = "code";
+            }
+
+            my @chars_L = split '', $line_L;
+            my @chars_R = split '', $line_R;
+
+            $rah_diff_L->[$n_sdiff]{desc} = "modified";
+            $rah_diff_R->[$n_sdiff]{desc} = "modified";
+            $rah_diff_L->[$n_sdiff]{lnum} = $n_L;
+            $rah_diff_R->[$n_sdiff]{lnum} = $n_R;
+
+        } elsif ($flag eq '+') {  # + = added
+            ++$n_R;
+            if ($COMMENT_MODE) {
+                # should never get here, but may due to sdiff() bug,
+                # ref https://rt.cpan.org/Public/Bug/Display.html?id=131629
+                # Rather than failing, ignore and continue.  A possible
+                # consequence is counts may be inconsistent.
+#####           @{$rah_diff_L} = ();
+#####           @{$rah_diff_R} = ();
+#####           push @{$raa_Errors},
+#####                [ $Error_Codes{'Diff error (quoted comments?)'}, $file ];
+                if ($opt_v) {
+                  warn "array_diff: diff failure (diff says the\n";
+                  warn "comment-free file has added lines).\n";
+                  warn "$n_sdiff  $line_L\n";
+                }
+            }
+####        $rah_diff_L->[$n_sdiff]{type} = "nonexist";
+            $rah_diff_L->[$n_sdiff]{type} = "comment";
+            $rah_diff_L->[$n_sdiff]{desc} = "removed";
+            $rah_diff_R->[$n_sdiff]{desc} = "added";
+            $rah_diff_R->[$n_sdiff]{lnum} = $n_R;
+        } elsif ($flag eq '-') {  # - = removed
+            ++$n_L;
+            if ($COMMENT_MODE) {
+                # line must be comment because blanks already gone
+                $rah_diff_L->[$n_sdiff]{type} = "comment";
+            }
+            $rah_diff_R->[$n_sdiff]{type} = "nonexist";
+            $rah_diff_R->[$n_sdiff]{desc} = "removed";
+            $rah_diff_L->[$n_sdiff]{desc} = "added";
+            $rah_diff_L->[$n_sdiff]{lnum} = $n_L;
+        }
+#printf "%-30s %s %-30s\n", $line_L, $separator, $line_R;
+        ++$n_sdiff;
+    }
+#use Data::Dumper::Simple;
+#print Dumper($rah_diff_L, $rah_diff_R);
+#print Dumper($rah_diff_L);
+
+    print "<- array_diff\n" if $opt_v > 2;
+} # 1}}}
+sub remove_leading_dir {                     # {{{1
+    my @filenames = @_;
+    #
+    #  Input should be a list of file names
+    #  with the same leading directory such as
+    #
+    #      dir1/dir2/a.txt
+    #      dir1/dir2/b.txt
+    #      dir1/dir2/dir3/c.txt
+    #
+    #  Output is the same list minus the common
+    #  directory path:
+    #
+    #      a.txt
+    #      b.txt
+    #      dir3/c.txt
+    #
+    print "-> remove_leading_dir()\n" if $opt_v > 2;
+    my @D = (); # a matrix:   [ [ dir1, dir2 ],         # dir1/dir2/a.txt
+                #               [ dir1, dir2 ],         # dir1/dir2/b.txt
+                #               [ dir1, dir2 , dir3] ]  # dir1/dir2/dir3/c.txt
+    if ($ON_WINDOWS) {
+        foreach my $F (@filenames) {
+            $F =~ s{\\}{/}g;
+            $F = ucfirst($F) if $F =~ /^\w:/;  # uppercase drive letter
+        }
+    }
+    if (scalar @filenames == 1) {
+        # special case:  with only one filename
+        # cannot determine a baseline, just remove first directory level
+        $filenames[0] =~ s{^.*?/}{};
+        # print "-> $filenames[0]\n";
+        return $filenames[0];
+    }
+    foreach my $F (@filenames) {
+        my ($Vol, $Dir, $File) = File::Spec->splitpath($F);
+        my @x = File::Spec->splitdir( $Dir );
+        pop @x unless $x[$#x]; # last entry usually null, remove it
+        if ($ON_WINDOWS) {
+            if (defined($Vol) and $Vol) {
+                # put the drive letter, eg, C:, at the front
+                unshift @x, uc $Vol;
+            }
+        }
+#print "F=$F, Dir=$Dir  x=[", join("][", @x), "]\n";
+        push @D, [ @x ];
+    }
+
+    # now loop over columns until either they are all
+    # eliminated or a unique column is found
+
+    my @common   = ();  # to contain the common leading directories
+    my $mismatch = 0;
+    while (!$mismatch) {
+        for (my $row = 1; $row < scalar @D; $row++) {
+#print "comparing $D[$row][0] to $D[0][0]\n";
+
+            if (!defined $D[$row][0] or !defined $D[0][0] or
+                ($D[$row][0] ne $D[0][0])) {
+                $mismatch = 1;
+                last;
+            }
+        }
+#print "mismatch=$mismatch\n";
+        if (!$mismatch) {
+            push @common, $D[0][0];
+            # all terms in the leading match; unshift the batch
+            foreach my $ra (@D) {
+                shift @{$ra};
+            }
+        }
+    }
+
+    push @common, " ";  # so that $leading will end with "/ "
+    my $leading = File::Spec->catdir( @common );
+       $leading =~ s{ $}{};  # now take back the bogus appended space
+#print "remove_leading_dir leading=[$leading]\n"; die;
+    if ($ON_WINDOWS) {
+       $leading =~ s{\\}{/}g;
+    }
+    foreach my $F (@filenames) {
+        $F =~ s{^$leading}{};
+    }
+
+    print "<- remove_leading_dir()\n" if $opt_v > 2;
+    return @filenames;
+
+} # 1}}}
+sub strip_leading_dir {                      # {{{1
+    my ($leading, @filenames) = @_;
+    #  removes the string $leading from each entry in @filenames
+    print "-> strip_leading_dir()\n" if $opt_v > 2;
+
+#print "remove_leading_dir leading=[$leading]\n"; die;
+    if ($ON_WINDOWS) {
+       $leading =~ s{\\}{/}g;
+        foreach my $F (@filenames) {
+            $F =~ s{\\}{/}g;
+        }
+    }
+    foreach my $F (@filenames) {
+#print "strip_leading_dir F before $F\n";
+        if ($ON_WINDOWS) {
+            $F =~ s{^$leading}{}i;
+        } else {
+            $F =~ s{^$leading}{};
+        }
+#print "strip_leading_dir F after  $F\n";
+    }
+
+    print "<- strip_leading_dir()\n" if $opt_v > 2;
+    return @filenames;
+
+} # 1}}}
+sub find_deepest_file {                      # {{{1
+    my @filenames = @_;
+    #
+    #  Input should be a list of file names
+    #  with the same leading directory such as
+    #
+    #      dir1/dir2/a.txt
+    #      dir1/dir2/b.txt
+    #      dir1/dir2/dir3/c.txt
+    #
+    #  Output is the file with the most parent directories:
+    #
+    #      dir1/dir2/dir3/c.txt
+
+    print "-> find_deepest_file()\n" if $opt_v > 2;
+
+    my $deepest    = undef;
+    my $max_subdir = -1;
+    foreach my $F (sort @filenames) {
+        my ($Vol, $Dir, $File) = File::Spec->splitpath($F);
+        my @x = File::Spec->splitdir( $Dir );
+        pop @x unless $x[$#x]; # last entry usually null, remove it
+        if (scalar @x > $max_subdir) {
+            $deepest    = $F;
+            $max_subdir = scalar @x;
+        }
+    }
+
+    print "<- find_deepest_file()\n" if $opt_v > 2;
+    return $deepest;
+
+} # 1}}}
+sub find_uncommon_parent_dir {               # {{{1
+    my ($file_L, $file_R) = @_;
+    #
+    # example:
+    #
+    #   file_L = "perl-5.16.1/cpan/CPANPLUS/lib/CPANPLUS/Internals/Source/SQLite/Tie.pm"
+    #   file_R = "/tmp/8VxQG0OLbp/perl-5.16.3/cpan/CPANPLUS/lib/CPANPLUS/Internals/Source/SQLite/Tie.pm"
+    #
+    # then return
+    #
+    #   "perl-5.16.1",
+    #   "/tmp/8VxQG0OLbp/perl-5.16.3",
+
+    my ($Vol_L, $Dir_L, $File_L) = File::Spec->splitpath($file_L);
+    my @x_L = File::Spec->splitdir( $Dir_L );
+    my ($Vol_R, $Dir_R, $File_R) = File::Spec->splitpath($file_R);
+    my @x_R = File::Spec->splitdir( $Dir_R );
+
+    my @common  = ();
+
+    # work backwards
+    while ($x_L[$#x_L] eq $x_R[$#x_R]) {
+        push @common, $x_L[$#x_L];
+        pop  @x_L;
+        pop  @x_R;
+    }
+    my $success = scalar @common;
+
+    my $dirs_L = File::Spec->catdir( @x_L );
+    my $dirs_R = File::Spec->catdir( @x_R );
+    my $lead_L = File::Spec->catpath( $Vol_L, $dirs_L, "" );
+    my $lead_R = File::Spec->catpath( $Vol_R, $dirs_R, "" );
+
+    return $lead_L, $lead_R, $success;
+
+} # 1}}}
+sub get_leading_dirs {                       # {{{1
+    my ($rh_file_list_L, $rh_file_list_R) = @_;
+    # find uniquely named files in both sets to help determine the
+    # leading directory positions
+    my %unique_filename = ();
+    my %basename_L = ();
+    my %basename_R = ();
+    foreach my $f (keys %{$rh_file_list_L}) {
+        my $bn = basename($f);
+        $basename_L{ $bn }{'count'}   += 1;
+        $basename_L{ $bn }{'fullpath'} = $f;
+    }
+    foreach my $f (keys %{$rh_file_list_R}) {
+        my $bn = basename($f);
+        $basename_R{ $bn }{'count'}   += 1;
+        $basename_R{ $bn }{'fullpath'} = $f;
+    }
+    foreach my $f (keys %basename_L) {
+        next unless $basename_L{$f}{'count'} == 1;
+        next unless defined $basename_R{$f} and $basename_R{$f}{'count'} == 1;
+        $unique_filename{$f}{'L'} = $basename_L{ $f }{'fullpath'};
+        $unique_filename{$f}{'R'} = $basename_R{ $f }{'fullpath'};
+    }
+    return undef, undef, 0 unless %unique_filename;
+
+####my %candidate_leading_dir_L = ();
+####my %candidate_leading_dir_R = ();
+    my ($L_drop, $R_drop) = (undef, undef);
+    foreach my $f (keys %unique_filename) {
+        my $fL = $unique_filename{ $f }{'L'};
+        my $fR = $unique_filename{ $f }{'R'};
+
+        my @DL = File::Spec->splitdir($fL);
+        my @DR = File::Spec->splitdir($fR);
+#printf "%-36s -> %-36s\n", $fL, $fR;
+#print Dumper(@DL, @DR);
+        # find the most number of common directories between L and R
+        if (!defined $L_drop) {
+            $L_drop = dirname $fL;
+        }
+        if (!defined $R_drop) {
+            $R_drop = dirname $fR;
+        }
+        my $n_path_elements_L = scalar @DL;
+        my $n_path_elements_R = scalar @DR;
+        my $n_path_elem = $n_path_elements_L < $n_path_elements_R ?
+                          $n_path_elements_L : $n_path_elements_R;
+        my ($n_L_drop_this_pair, $n_R_drop_this_pair) = (0, 0);
+        for (my $i = 0; $i < $n_path_elem; $i++) {
+            last if $DL[ $#DL - $i] ne $DR[ $#DR - $i];
+            ++$n_L_drop_this_pair;
+            ++$n_R_drop_this_pair;
+        }
+        my $L_common = File::Spec->catdir( @DL[0..($#DL-$n_L_drop_this_pair)] );
+        my $R_common = File::Spec->catdir( @DR[0..($#DR-$n_R_drop_this_pair)] );
+#print "L_common=$L_common\n";
+#print "R_common=$R_common\n";
+        $L_drop = $L_common if length $L_common < length $L_drop;
+        $R_drop = $R_common if length $R_common < length $R_drop;
+
+        $L_drop = $L_drop . "/" if $L_drop;
+        $R_drop = $R_drop . "/" if $R_drop;
+########my $ptr_L = length($fL) - 1;
+########my $ptr_R = length($fR) - 1;
+########my @aL    = split '', $fL;
+########my @aR    = split '', $fR;
+########while ($ptr_L >= 0 and $ptr_R >= 0) {
+########    last if $aL[$ptr_L] ne $aR[$ptr_R];
+########    --$ptr_L;
+########    --$ptr_R;
+########}
+########my $leading_dir_L = "";
+########   $leading_dir_L = substr($fL, 0, $ptr_L+1) if $ptr_L >= 0;
+########my $leading_dir_R = "";
+########   $leading_dir_R = substr($fR, 0, $ptr_R+1) if $ptr_R >= 0;
+########++$candidate_leading_dir_L{$leading_dir_L};
+########++$candidate_leading_dir_R{$leading_dir_R};
+    }
+#use Data::Dumper::Simple;
+    if ($ON_WINDOWS) {
+        $L_drop =~ s{\\}{/}g;
+        $R_drop =~ s{\\}{/}g;
+    }
+#print "L_drop=$L_drop\n";
+#print "R_drop=$R_drop\n";
+    return $L_drop, $R_drop, 1;
+####my $best_L = (sort {
+####           $candidate_leading_dir_L{$b} <=>
+####           $candidate_leading_dir_L{$a}} keys %candidate_leading_dir_L)[0];
+####my $best_R = (sort {
+####           $candidate_leading_dir_R{$b} <=>
+####           $candidate_leading_dir_R{$a}} keys %candidate_leading_dir_R)[0];
+####return $best_L, $best_R, 1;
+} # 1}}}
+sub align_by_pairs {                         # {{{1
+    my ($rh_file_list_L        , # in
+        $rh_file_list_R        , # in
+        $ra_added              , # out
+        $ra_removed            , # out
+        $ra_compare_list       , # out
+        ) = @_;
+    print "-> align_by_pairs()\n" if $opt_v > 2;
+    @{$ra_compare_list} = ();
+
+    my @files_L = sort keys %{$rh_file_list_L};
+    my @files_R = sort keys %{$rh_file_list_R};
+    return () unless @files_L or  @files_R;  # at least one must have stuff
+    if ($ON_WINDOWS) {
+        foreach (@files_L) { $_ =~ s{\\}{/}g; }
+        foreach (@files_R) { $_ =~ s{\\}{/}g; }
+        if ($opt_ignore_case_ext) {
+            foreach (@files_L) { $_ = lc $_; }
+            foreach (@files_R) { $_ = lc $_; }
+        }
+    }
+
+    if      ( @files_L and !@files_R) {
+        # left side has stuff, right side is empty; everything deleted
+        @{$ra_added   }     = ();
+        @{$ra_removed }     = @files_L;
+        @{$ra_compare_list} = ();
+        return;
+    } elsif (!@files_L and  @files_R) {
+        # left side is empty, right side has stuff; everything added
+        @{$ra_added   }     = @files_R;
+        @{$ra_removed }     = ();
+        @{$ra_compare_list} = ();
+        return;
+    } elsif (scalar(@files_L) == 1 and scalar(@files_R) == 1) {
+        # Special case of comparing one file against another.  In
+        # this case force the pair to be aligned with each other,
+        # otherwise the file naming logic will think one file
+        # was added and the other deleted.
+        @{$ra_added   }     = ();
+        @{$ra_removed }     = ();
+        @{$ra_compare_list} = ( [$files_L[0], $files_R[0]] );
+        return;
+    }
+#use Data::Dumper::Simple;
+#print Dumper("align_by_pairs", %{$rh_file_list_L}, %{$rh_file_list_R},);
+#die;
+
+    # The harder case:  compare groups of files.  This only works
+    # if the groups are in different directories so the first step
+    # is to strip the leading directory names from file lists to
+    # make it possible to align by file names.
+    my @files_L_minus_dir = undef;
+    my @files_R_minus_dir = undef;
+
+    my $deepest_file_L    = find_deepest_file(@files_L);
+    my $deepest_file_R    = find_deepest_file(@files_R);
+#print "deepest L = [$deepest_file_L]\n";
+#print "deepest R = [$deepest_file_R]\n";
+    my ($leading_dir_L, $leading_dir_R, $success) =
+                get_leading_dirs($rh_file_list_L, $rh_file_list_R);
+#print "leading_dir_L=[$leading_dir_L]\n";
+#print "leading_dir_R=[$leading_dir_R]\n";
+#print "success      =[$success]\n";
+    if ($success) {
+        @files_L_minus_dir = strip_leading_dir($leading_dir_L, @files_L);
+        @files_R_minus_dir = strip_leading_dir($leading_dir_R, @files_R);
+    } else {
+        # otherwise fall back to old strategy
+        @files_L_minus_dir = remove_leading_dir(@files_L);
+        @files_R_minus_dir = remove_leading_dir(@files_R);
+    }
+
+    # Keys of the stripped_X arrays are canonical file names;
+    # should overlap mostly.  Keys in stripped_L but not in
+    # stripped_R are files that have been deleted.  Keys in
+    # stripped_R but not in stripped_L have been added.
+    my %stripped_L = ();
+       @stripped_L{ @files_L_minus_dir } = @files_L;
+    my %stripped_R = ();
+       @stripped_R{ @files_R_minus_dir } = @files_R;
+
+    my %common = ();
+    foreach my $f (keys %stripped_L) {
+        $common{$f}  = 1 if     defined $stripped_R{$f};
+    }
+
+    my %deleted = ();
+    foreach my $f (keys %stripped_L) {
+        $deleted{$stripped_L{$f}} = $f unless defined $stripped_R{$f};
+    }
+
+    my %added = ();
+    foreach my $f (keys %stripped_R) {
+        $added{$stripped_R{$f}}   = $f unless defined $stripped_L{$f};
+    }
+
+#use Data::Dumper::Simple;
+#print Dumper("align_by_pairs", %stripped_L, %stripped_R);
+#print Dumper("align_by_pairs", %common, %added, %deleted);
+
+    foreach my $f (keys %common) {
+        push @{$ra_compare_list}, [ $stripped_L{$f},
+                                    $stripped_R{$f} ];
+    }
+    @{$ra_added   } = keys %added  ;
+    @{$ra_removed } = keys %deleted;
+
+    print "<- align_by_pairs()\n" if $opt_v > 2;
+    return;
+#print Dumper("align_by_pairs", @files_L_minus_dir, @files_R_minus_dir);
+#die;
+} # 1}}}
+sub html_header {                            # {{{1
+    my ($title , ) = @_;
+
+    print "-> html_header\n" if $opt_v > 2;
+    return
+'<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<meta name="GENERATOR" content="cloc http://github.com/AlDanial/cloc">
+' .
+"
+<!-- Created by $script v$VERSION -->
+<title>$title</title>
+" .
+'
+<style TYPE="text/css">
+<!--
+    body {
+        color: black;
+        background-color: white;
+        font-family: monospace
+    }
+
+    .whitespace {
+        background-color: gray;
+    }
+
+    .comment {
+        color: gray;
+        font-style: italic;
+    }
+
+    .clinenum {
+        color: red;
+    }
+
+    .linenum {
+        color: green;
+    }
+ -->
+</style>
+</head>
+<body>
+<pre><tt>
+';
+    print "<- html_header\n" if $opt_v > 2;
+} # 1}}}
+sub html_end {                               # {{{1
+return
+'</tt></pre>
+</body>
+</html>
+';
+} # 1}}}
+sub die_unknown_lang {                       # {{{1
+    my ($lang, $option_name) = @_;
+    die "Unknown language '$lang' used with $option_name option.  " .
+        "The command\n  $script --show-lang\n" .
+        "will print all recognized languages.  Language names are " .
+        "case sensitive.\n" ;
+} # 1}}}
+sub unicode_file {                           # {{{1
+    my $file = shift @_;
+
+    print "-> unicode_file($file)\n" if $opt_v > 2;
+    return 0 if (-s $file > 2_000_000);
+    # don't bother trying to test binary files bigger than 2 MB
+
+    my $IN = new IO::File $file, "r";
+    if (!defined $IN) {
+        warn "Unable to read $file; ignoring.\n";
+        return 0;
+    }
+    my @lines = <$IN>;
+    $IN->close;
+
+    if (unicode_to_ascii( join('', @lines) )) {
+        print "<- unicode_file()\n" if $opt_v > 2;
+        return 1;
+    } else {
+        print "<- unicode_file()\n" if $opt_v > 2;
+        return 0;
+    }
+
+} # 1}}}
+sub unicode_to_ascii {                       # {{{1
+    my $string = shift @_;
+
+    # A trivial attempt to convert UTF-16 little or big endian
+    # files into ASCII.  These files exhibit the following byte
+    # sequence:
+    #   byte   1:  255
+    #   byte   2:  254
+    #   byte   3:  ord of ASCII character
+    #   byte   4:    0
+    #   byte 3+i:  ord of ASCII character
+    #   byte 4+i:    0
+    # or
+    #   byte   1:  255
+    #   byte   2:  254
+    #   byte   3:    0
+    #   byte   4:  ord of ASCII character
+    #   byte 3+i:    0
+    #   byte 4+i:  ord of ASCII character
+    #
+    print "-> unicode_to_ascii()\n" if $opt_v > 2;
+
+    my $length  = length $string;
+#print "length=$length\n";
+    return '' if $length <= 3;
+    my @unicode = split(//, $string);
+
+    # check the first 100 characters (= 200 bytes) for big or
+    # little endian UTF-16 encoding
+    my $max_peek     = $length < 200 ? $length : 200;
+    my $max_for_pass = $length < 200 ? 0.9*$max_peek/2 : 90;
+    my @view_1   = ();
+    for (my $i = 2; $i < $max_peek; $i += 2) { push @view_1, $unicode[$i] }
+    my @view_2   = ();
+    for (my $i = 3; $i < $max_peek; $i += 2) { push @view_2, $unicode[$i] }
+
+    my $points_1 = 0;
+    foreach my $C (@view_1) {
+        ++$points_1 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13
+                                                          or ord($C) == 10
+                                                          or ord($C) ==  9;
+    }
+
+    my $points_2 = 0;
+    foreach my $C (@view_2) {
+        ++$points_2 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13
+                                                          or ord($C) == 10
+                                                          or ord($C) ==  9;
+    }
+#print "points 1: $points_1\n";
+#print "points 2: $points_2\n";
+#print "max_peek    : $max_peek\n";
+#print "max_for_pass: $max_for_pass\n";
+
+    my $offset = undef;
+    if    ($points_1 > $max_for_pass) { $offset = 2; }
+    elsif ($points_2 > $max_for_pass) { $offset = 3; }
+    else                   {
+        print "<- unicode_to_ascii() a p1=$points_1 p2=$points_2\n" if $opt_v > 2;
+        return '';
+    }  # neither big or little endian UTF-16
+
+    my @ascii              = ();
+    for (my $i = $offset; $i < $length; $i += 2) {
+        # some compound characters are made of HT (9), LF (10), or CR (13)
+        # True HT, LF, CR are followed by 00; only add those.
+        my $L = $unicode[$i];
+        if (ord($L) == 9 or ord($L) == 10 or ord($L) == 13) {
+            my $companion;
+            if ($points_1) {
+                last if $i+1 >= $length;
+                $companion = $unicode[$i+1];
+            } else {
+                $companion = $unicode[$i-1];
+            }
+            if (ord($companion) == 0) {
+                push @ascii, $L;
+            } else {
+                push @ascii, " ";  # no clue what this letter is
+            }
+        } else {
+            push @ascii, $L;
+        }
+    }
+    print "<- unicode_to_ascii() b p1=$points_1 p2=$points_2\n" if $opt_v > 2;
+    return join("", @ascii);
+} # 1}}}
+sub uncompress_archive_cmd {                 # {{{1
+    my ($archive_file, ) = @_;
+
+    # Wrap $archive_file in single or double quotes in the system
+    # commands below to avoid filename chicanery (including
+    # spaces in the names).
+
+    print "-> uncompress_archive_cmd($archive_file)\n" if $opt_v > 2;
+    my $extract_cmd = "";
+    my $missing     = "";
+    if ($opt_extract_with) {
+        ( $extract_cmd = $opt_extract_with ) =~ s/>FILE</$archive_file/g;
+    } elsif (basename($archive_file) eq "-" and !$ON_WINDOWS) {
+        $extract_cmd = "cat > -";
+    } elsif ($archive_file =~ /\.tar$/ and $ON_WINDOWS) {
+        $extract_cmd = "tar -xf \"$archive_file\"";
+    } elsif (($archive_file =~ /\.tar\.(gz|Z)$/ or
+              $archive_file =~ /\.tgz$/       ) and !$ON_WINDOWS)    {
+        if (external_utility_exists("gzip --version")) {
+            if (external_utility_exists("tar --version")) {
+                $extract_cmd = "gzip -dc '$archive_file' | tar xf -";
+            } else {
+                $missing = "tar";
+            }
+        } else {
+            $missing = "gzip";
+        }
+    } elsif ($archive_file =~ /\.tar\.bz2$/ and !$ON_WINDOWS)    {
+        if (external_utility_exists("bzip2 --help")) {
+            if (external_utility_exists("tar --version")) {
+                $extract_cmd = "bzip2 -dc '$archive_file' | tar xf -";
+            } else {
+                $missing = "tar";
+            }
+        } else {
+            $missing = "bzip2";
+        }
+    } elsif ($archive_file =~ /\.tar\.xz$/ and !$ON_WINDOWS)    {
+        if (external_utility_exists("unxz --version")) {
+            if (external_utility_exists("tar --version")) {
+                $extract_cmd = "unxz -dc '$archive_file' | tar xf -";
+            } else {
+                $missing = "tar";
+            }
+        } else {
+            $missing = "bzip2";
+        }
+    } elsif ($archive_file =~ /\.tar$/ and !$ON_WINDOWS)    {
+        $extract_cmd = "tar xf '$archive_file'";
+    } elsif ($archive_file =~ /\.src\.rpm$/i and !$ON_WINDOWS) {
+        if (external_utility_exists("cpio --version")) {
+            if (external_utility_exists("rpm2cpio")) {
+                $extract_cmd = "rpm2cpio '$archive_file' | cpio -i";
+            } else {
+                $missing = "rpm2cpio";
+            }
+        } else {
+            $missing = "bzip2";
+        }
+    } elsif ($archive_file =~ /\.(whl|zip)$/i and !$ON_WINDOWS)    {
+        if (external_utility_exists("unzip")) {
+            $extract_cmd = "unzip -qq -d . '$archive_file'";
+        } else {
+            $missing = "unzip";
+        }
+    } elsif ($archive_file =~ /\.deb$/i and !$ON_WINDOWS)    {
+        # only useful if the .deb contains source code--most
+        # .deb files just have compiled executables
+        if (external_utility_exists("dpkg-deb")) {
+            $extract_cmd = "dpkg-deb -x '$archive_file' .";
+        } else {
+            $missing = "dpkg-deb";
+        }
+    } elsif ($ON_WINDOWS and $archive_file =~ /\.(whl|zip)$/i) {
+        # use unzip on Windows (comes with git-for-Windows)
+        if (external_utility_exists("unzip")) {
+             $extract_cmd = "unzip -qq -d . \"$archive_file\" ";
+        } else {
+            $missing = "unzip";
+        }
+    }
+    print "<- uncompress_archive_cmd\n" if $opt_v > 2;
+    if ($missing) {
+        die "Unable to expand $archive_file because external\n",
+            "utility '$missing' is not available.\n",
+            "Another possibility is to use the --extract-with option.\n";
+    } else {
+        return $extract_cmd;
+    }
+}
+# 1}}}
+sub read_list_file {                         # {{{1
+    my ($file, ) = @_;
+    # reads filenames from a STDIN pipe if $file == "-"
+
+    print "-> read_list_file($file)\n" if $opt_v > 2;
+    my @entry = ();
+
+    if ($file eq "-") {
+        # read from a STDIN pipe
+        my $IN;
+        open($IN, $file);
+        if (!defined $IN) {
+            warn "Unable to read $file; ignoring.\n";
+            return ();
+        }
+        while (<$IN>) {
+            next if /^\s*$/ or /^\s*#/; # skip empty or commented lines
+            s/\cM$//;  # DOS to Unix
+            chomp;
+            push @entry, $_;
+        }
+        $IN->close;
+    } else {
+        # read from an actual file
+        foreach my $line (read_file($file)) {
+            next if $line =~ /^\s*$/ or $line =~ /^\s*#/;
+            $line =~ s/\cM$//;  # DOS to Unix
+            chomp $line;
+            push @entry, $line;
+        }
+    }
+
+    print "<- read_list_file\n" if $opt_v > 2;
+    return @entry;
+}
+# 1}}}
+sub external_utility_exists {                # {{{1
+    my $exe = shift @_;
+
+    my $success      = 0;
+    if ($ON_WINDOWS) {
+        $success = 1 unless system $exe . ' > nul';
+    } else {
+        $success = 1 unless system $exe . ' >/dev/null 2>&1';
+        if (!$success) {
+            $success = 1 unless system "which" . " $exe" . ' >/dev/null 2>&1';
+        }
+    }
+
+    return $success;
+} # 1}}}
+sub write_xsl_file {                         # {{{1
+    print "-> write_xsl_file\n" if $opt_v > 2;
+    my $XSL =             # <style>  </style> {{{2
+'<?xml version="1.0" encoding="UTF-8"?>
+<!-- XSL file by Paul Schwann, January 2009.
+     Fixes for by-file and by-file-by-lang by d_uragan, November 2010.
+     -->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+  <xsl:output method="html"/>
+  <xsl:template match="/">
+    <html xmlns="http://www.w3.org/1999/xhtml">
+      <head>
+        <title>CLOC Results</title>
+      </head>
+      <style type="text/css">
+        table {
+          table-layout: auto;
+          border-collapse: collapse;
+          empty-cells: show;
+        }
+        td, th {
+          padding: 4px;
+        }
+        th {
+          background-color: #CCCCCC;
+        }
+        td {
+          text-align: center;
+        }
+        table, td, tr, th {
+          border: thin solid #999999;
+        }
+      </style>
+      <body>
+        <h3><xsl:value-of select="results/header"/></h3>
+';
+# 2}}}
+
+    if ($opt_by_file) {
+        $XSL .=             # <table> </table>{{{2
+'        <table>
+          <thead>
+            <tr>
+              <th>File</th>
+              <th>Blank</th>
+              <th>Comment</th>
+              <th>Code</th>
+              <th>Language</th>
+';
+        $XSL .=
+'             <th>3<sup>rd</sup> Generation Equivalent</th>
+              <th>Scale</th>
+' if $opt_3;
+        $XSL .=
+'           </tr>
+          </thead>
+          <tbody>
+          <xsl:for-each select="results/files/file">
+            <tr>
+              <th><xsl:value-of select="@name"/></th>
+              <td><xsl:value-of select="@blank"/></td>
+              <td><xsl:value-of select="@comment"/></td>
+              <td><xsl:value-of select="@code"/></td>
+              <td><xsl:value-of select="@language"/></td>
+';
+        $XSL .=
+'             <td><xsl:value-of select="@factor"/></td>
+              <td><xsl:value-of select="@scaled"/></td>
+' if $opt_3;
+        $XSL .=
+'           </tr>
+          </xsl:for-each>
+            <tr>
+              <th>Total</th>
+              <th><xsl:value-of select="results/files/total/@blank"/></th>
+              <th><xsl:value-of select="results/files/total/@comment"/></th>
+              <th><xsl:value-of select="results/files/total/@code"/></th>
+              <th><xsl:value-of select="results/files/total/@language"/></th>
+';
+        $XSL .=
+'             <th><xsl:value-of select="results/files/total/@factor"/></th>
+              <th><xsl:value-of select="results/files/total/@scaled"/></th>
+' if $opt_3;
+        $XSL .=
+'           </tr>
+          </tbody>
+        </table>
+        <br/>
+';
+# 2}}}
+    }
+
+    if (!$opt_by_file or $opt_by_file_by_lang) {
+        $XSL .=             # <table> </table> {{{2
+'       <table>
+          <thead>
+            <tr>
+              <th>Language</th>
+              <th>Files</th>
+              <th>Blank</th>
+              <th>Comment</th>
+              <th>Code</th>
+';
+        $XSL .=
+'             <th>Scale</th>
+              <th>3<sup>rd</sup> Generation Equivalent</th>
+' if $opt_3;
+        $XSL .=
+'           </tr>
+          </thead>
+          <tbody>
+          <xsl:for-each select="results/languages/language">
+            <tr>
+              <th><xsl:value-of select="@name"/></th>
+              <td><xsl:value-of select="@files_count"/></td>
+              <td><xsl:value-of select="@blank"/></td>
+              <td><xsl:value-of select="@comment"/></td>
+              <td><xsl:value-of select="@code"/></td>
+';
+        $XSL .=
+'             <td><xsl:value-of select="@factor"/></td>
+              <td><xsl:value-of select="@scaled"/></td>
+' if $opt_3;
+        $XSL .=
+'          </tr>
+          </xsl:for-each>
+            <tr>
+              <th>Total</th>
+              <th><xsl:value-of select="results/languages/total/@sum_files"/></th>
+              <th><xsl:value-of select="results/languages/total/@blank"/></th>
+              <th><xsl:value-of select="results/languages/total/@comment"/></th>
+              <th><xsl:value-of select="results/languages/total/@code"/></th>
+';
+        $XSL .=
+'             <th><xsl:value-of select="results/languages/total/@factor"/></th>
+              <th><xsl:value-of select="results/languages/total/@scaled"/></th>
+' if $opt_3;
+        $XSL .=
+'           </tr>
+          </tbody>
+        </table>
+';
+# 2}}}
+    }
+
+    $XSL.= <<'EO_XSL'; # {{{2
+      </body>
+    </html>
+  </xsl:template>
+</xsl:stylesheet>
+
+EO_XSL
+# 2}}}
+
+    my $XSL_DIFF = <<'EO_DIFF_XSL'; # {{{2
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- XSL file by Blazej Kroll, November 2010 -->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+  <xsl:output method="html"/>
+  <xsl:template match="/">
+    <html xmlns="http://www.w3.org/1999/xhtml">
+      <head>
+        <title>CLOC Results</title>
+      </head>
+      <style type="text/css">
+        table {
+          table-layout: auto;
+          border-collapse: collapse;
+          empty-cells: show;
+          margin: 1em;
+        }
+        td, th {
+          padding: 4px;
+        }
+        th {
+          background-color: #CCCCCC;
+        }
+        td {
+          text-align: center;
+        }
+        table, td, tr, th {
+          border: thin solid #999999;
+        }
+      </style>
+      <body>
+        <h3><xsl:value-of select="results/header"/></h3>
+EO_DIFF_XSL
+# 2}}}
+
+    if ($opt_by_file) {
+        $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
+        <table>
+          <thead>
+          <tr><th colspan="4">Same</th>
+          </tr>
+            <tr>
+              <th>File</th>
+              <th>Blank</th>
+              <th>Comment</th>
+              <th>Code</th>
+            </tr>
+          </thead>
+          <tbody>
+          <xsl:for-each select="diff_results/same/file">
+            <tr>
+              <th><xsl:value-of select="@name"/></th>
+              <td><xsl:value-of select="@blank"/></td>
+              <td><xsl:value-of select="@comment"/></td>
+              <td><xsl:value-of select="@code"/></td>
+            </tr>
+          </xsl:for-each>
+          </tbody>
+        </table>
+
+        <table>
+          <thead>
+          <tr><th colspan="4">Modified</th>
+          </tr>
+            <tr>
+              <th>File</th>
+              <th>Blank</th>
+              <th>Comment</th>
+              <th>Code</th>
+            </tr>
+          </thead>
+          <tbody>
+          <xsl:for-each select="diff_results/modified/file">
+            <tr>
+              <th><xsl:value-of select="@name"/></th>
+              <td><xsl:value-of select="@blank"/></td>
+              <td><xsl:value-of select="@comment"/></td>
+              <td><xsl:value-of select="@code"/></td>
+            </tr>
+          </xsl:for-each>
+          </tbody>
+        </table>
+
+        <table>
+          <thead>
+          <tr><th colspan="4">Added</th>
+          </tr>
+            <tr>
+              <th>File</th>
+              <th>Blank</th>
+              <th>Comment</th>
+              <th>Code</th>
+            </tr>
+          </thead>
+          <tbody>
+          <xsl:for-each select="diff_results/added/file">
+            <tr>
+              <th><xsl:value-of select="@name"/></th>
+              <td><xsl:value-of select="@blank"/></td>
+              <td><xsl:value-of select="@comment"/></td>
+              <td><xsl:value-of select="@code"/></td>
+            </tr>
+          </xsl:for-each>
+          </tbody>
+        </table>
+
+        <table>
+          <thead>
+          <tr><th colspan="4">Removed</th>
+          </tr>
+            <tr>
+              <th>File</th>
+              <th>Blank</th>
+              <th>Comment</th>
+              <th>Code</th>
+            </tr>
+          </thead>
+          <tbody>
+          <xsl:for-each select="diff_results/removed/file">
+            <tr>
+              <th><xsl:value-of select="@name"/></th>
+              <td><xsl:value-of select="@blank"/></td>
+              <td><xsl:value-of select="@comment"/></td>
+              <td><xsl:value-of select="@code"/></td>
+            </tr>
+          </xsl:for-each>
+          </tbody>
+        </table>
+EO_DIFF_XSL
+# 2}}}
+    }
+
+    if (!$opt_by_file or $opt_by_file_by_lang) {
+        $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
+        <table>
+          <thead>
+          <tr><th colspan="5">Same</th>
+          </tr>
+            <tr>
+              <th>Language</th>
+              <th>Files</th>
+              <th>Blank</th>
+              <th>Comment</th>
+              <th>Code</th>
+            </tr>
+          </thead>
+          <tbody>
+          <xsl:for-each select="diff_results/same/language">
+            <tr>
+              <th><xsl:value-of select="@name"/></th>
+              <td><xsl:value-of select="@files_count"/></td>
+              <td><xsl:value-of select="@blank"/></td>
+              <td><xsl:value-of select="@comment"/></td>
+              <td><xsl:value-of select="@code"/></td>
+            </tr>
+          </xsl:for-each>
+          </tbody>
+        </table>
+
+        <table>
+          <thead>
+          <tr><th colspan="5">Modified</th>
+          </tr>
+            <tr>
+              <th>Language</th>
+              <th>Files</th>
+              <th>Blank</th>
+              <th>Comment</th>
+              <th>Code</th>
+            </tr>
+          </thead>
+          <tbody>
+          <xsl:for-each select="diff_results/modified/language">
+            <tr>
+              <th><xsl:value-of select="@name"/></th>
+              <td><xsl:value-of select="@files_count"/></td>
+              <td><xsl:value-of select="@blank"/></td>
+              <td><xsl:value-of select="@comment"/></td>
+              <td><xsl:value-of select="@code"/></td>
+            </tr>
+          </xsl:for-each>
+          </tbody>
+        </table>
+
+        <table>
+          <thead>
+          <tr><th colspan="5">Added</th>
+          </tr>
+            <tr>
+              <th>Language</th>
+              <th>Files</th>
+              <th>Blank</th>
+              <th>Comment</th>
+              <th>Code</th>
+            </tr>
+          </thead>
+          <tbody>
+          <xsl:for-each select="diff_results/added/language">
+            <tr>
+              <th><xsl:value-of select="@name"/></th>
+              <td><xsl:value-of select="@files_count"/></td>
+              <td><xsl:value-of select="@blank"/></td>
+              <td><xsl:value-of select="@comment"/></td>
+              <td><xsl:value-of select="@code"/></td>
+            </tr>
+          </xsl:for-each>
+          </tbody>
+        </table>
+
+        <table>
+          <thead>
+          <tr><th colspan="5">Removed</th>
+          </tr>
+            <tr>
+              <th>Language</th>
+              <th>Files</th>
+              <th>Blank</th>
+              <th>Comment</th>
+              <th>Code</th>
+            </tr>
+          </thead>
+          <tbody>
+          <xsl:for-each select="diff_results/removed/language">
+            <tr>
+              <th><xsl:value-of select="@name"/></th>
+              <td><xsl:value-of select="@files_count"/></td>
+              <td><xsl:value-of select="@blank"/></td>
+              <td><xsl:value-of select="@comment"/></td>
+              <td><xsl:value-of select="@code"/></td>
+            </tr>
+          </xsl:for-each>
+          </tbody>
+        </table>
+EO_DIFF_XSL
+# 2}}}
+
+    }
+
+    $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
+      </body>
+    </html>
+  </xsl:template>
+</xsl:stylesheet>
+EO_DIFF_XSL
+# 2}}}
+    if ($opt_diff) {
+        write_file($CLOC_XSL, {}, ( $XSL_DIFF ) );
+    } else {
+        write_file($CLOC_XSL, {}, ( $XSL ) );
+    }
+    print "<- write_xsl_file\n" if $opt_v > 2;
+} # 1}}}
+sub normalize_file_names {                   # {{{1
+    print "-> normalize_file_names\n" if $opt_v > 2;
+    my (@files, ) = @_;
+
+    # Returns a hash of file names reduced to a canonical form
+    # (fully qualified file names, all path separators changed to /,
+    # Windows file names lowercased).  Hash values are the original
+    # file name.
+
+    my %normalized = ();
+    foreach my $F (@files) {
+        my $F_norm = $F;
+        if ($ON_WINDOWS) {
+            $F_norm = lc $F_norm; # for case insensitive file name comparisons
+            $F_norm =~ s{\\}{/}g; # Windows directory separators to Unix
+            $F_norm =~ s{^\./}{}g;  # remove leading ./
+            if (($F_norm !~ m{^/}) and ($F_norm !~ m{^\w:/})) {
+                # looks like a relative path; prefix with cwd
+                $F_norm = lc "$cwd/$F_norm";
+            }
+        } else {
+            $F_norm =~ s{^\./}{}g;  # remove leading ./
+            if ($F_norm !~ m{^/}) {
+                # looks like a relative path; prefix with cwd
+                $F_norm = "$cwd/$F_norm";
+            }
+        }
+        # Remove trailing / so it does not interfere with further regex code
+        # that does not expect it
+        $F_norm =~ s{/+$}{};
+        $normalized{ $F_norm } = $F;
+    }
+    print "<- normalize_file_names\n" if $opt_v > 2;
+    return %normalized;
+} # 1}}}
+sub combine_diffs {                          # {{{1
+    # subroutine by Andy (awalshe@sf.net)
+    # https://sourceforge.net/tracker/?func=detail&aid=3261017&group_id=174787&atid=870625
+    my ($ra_files) = @_;
+    print "-> combine_diffs\n" if $opt_v > 2;
+
+    my $res   = "$URL v $VERSION\n";
+    my $dl    = '-';
+    my $width = 79;
+    # columns are in this order
+    my @cols  = ('files', 'blank', 'comment', 'code');
+    my %HoH   = ();
+
+    foreach my $file (@{$ra_files}) {
+        my $IN = new IO::File $file, "r";
+        if (!defined $IN) {
+            warn "Unable to read $file; ignoring.\n";
+            next;
+        }
+
+        my $sec;
+        while (<$IN>) {
+            chomp;
+            s/\cM$//;
+            next if /^(http|Language|-----)/;
+            if (/^[A-Za-z0-9]+/) {        # section title
+                $sec = $_;
+                chomp($sec);
+                $HoH{$sec} = () if ! exists $HoH{$sec};
+                next;
+            }
+
+            if (/^\s(same|modified|added|removed)/) {  # calculated totals row
+                my @ar = grep { $_ ne '' } split(/ /, $_);
+                chomp(@ar);
+                my $ttl = shift @ar;
+                my $i = 0;
+                foreach(@ar) {
+                    my $t = "${ttl}${dl}${cols[$i]}";
+                    $HoH{$sec}{$t} = 0 if ! exists $HoH{$sec}{$t};
+                    $HoH{$sec}{$t} += $_;
+                    $i++;
+                }
+            }
+        }
+        $IN->close;
+    }
+
+    # rows are in this order
+    my @rows = ('same', 'modified', 'added', 'removed');
+
+    $res .= sprintf("%s\n", "-" x $width);
+    $res .= sprintf("%-19s %14s %14s %14s %14s\n", 'Language',
+                    $cols[0], $cols[1], $cols[2], $cols[3]);
+    $res .= sprintf("%s\n", "-" x $width);
+
+    # no inputs? %HoH will be empty
+    return $res unless %HoH;
+
+    for my $sec ( keys %HoH ) {
+        next if $sec =~ /SUM:/;
+        next unless defined $HoH{$sec};  # eg, the header line
+        $res .= "$sec\n";
+        foreach (@rows) {
+            $res .= sprintf(" %-18s %14s %14s %14s %14s\n",
+                            $_, $HoH{$sec}{"${_}${dl}${cols[0]}"},
+                                $HoH{$sec}{"${_}${dl}${cols[1]}"},
+                                $HoH{$sec}{"${_}${dl}${cols[2]}"},
+                                $HoH{$sec}{"${_}${dl}${cols[3]}"});
+        }
+    }
+    $res .= sprintf("%s\n", "-" x $width);
+    my $sec = 'SUM:';
+    $res .= "$sec\n";
+    foreach (@rows) {
+        $res .= sprintf(" %-18s %14s %14s %14s %14s\n",
+                        $_, $HoH{$sec}{"${_}${dl}${cols[0]}"},
+                            $HoH{$sec}{"${_}${dl}${cols[1]}"},
+                            $HoH{$sec}{"${_}${dl}${cols[2]}"},
+                            $HoH{$sec}{"${_}${dl}${cols[3]}"});
+    }
+    $res .= sprintf("%s\n", "-" x $width);
+
+    print "<- combine_diffs\n" if $opt_v > 2;
+    return $res;
+} # 1}}}
+sub combine_csv_diffs {                      # {{{1
+    my ($delimiter, $ra_files) = @_;
+    print "-> combine_csv_diffs\n" if $opt_v > 2;
+
+    my %sum = ();  # sum{ language } = array of 17 values
+    foreach my $file (@{$ra_files}) {
+        my $IN = new IO::File $file, "r";
+        if (!defined $IN) {
+            warn "Unable to read $file; ignoring.\n";
+            next;
+        }
+
+        my $sec;
+        while (<$IN>) {
+            next if /^Language${delimiter}\s==\sfiles${delimiter}/;
+            chomp;
+            my @words = split(/$delimiter/);
+            my $n_col = scalar(@words);
+            if ($n_col != 18) {
+                warn "combine_csv_diffs(): Parse failure line $. of $file\n";
+                warn "Expected 18 columns, got $n_col\n";
+                die;
+            }
+            my $Lang = $words[0];
+            my @count = map { int($_) } @words[1..16];
+            if (defined $sum{$Lang}) {
+                for (my $i = 0; $i < 16; $i++) {
+                    $sum{$Lang}[$i] += $count[$i];
+                }
+            } else {
+                @{$sum{$Lang}} = @count;
+            }
+        }
+        $IN->close;
+    }
+
+    my @header = ("Language", "== files", "!= files", "+ files", "- files",
+                  "== blank", "!= blank", "+ blank", "- blank", "== comment",
+                  "!= comment", "+ comment", "- comment", "== code",
+                  "!= code", "+ code", "- code", "$URL v $VERSION" );
+
+    my $res = join("$delimiter ", @header) . "$delimiter\n";
+    foreach my $Lang (sort keys %sum) {
+        $res .= $Lang . "$delimiter ";
+        for (my $i = 0; $i < 16; $i++) {
+            $res .= $sum{$Lang}[$i] . "$delimiter ";
+        }
+        $res .= "\n";
+    }
+
+    print "<- combine_csv_diffs\n" if $opt_v > 2;
+    return $res;
+} # 1}}}
+sub get_time {                               # {{{1
+    if ($HAVE_Time_HiRes) {
+        return Time::HiRes::time();
+    } else {
+        return time();
+    }
+} # 1}}}
+sub really_is_D {                            # {{{1
+    # Ref bug 131, files ending with .d could be init.d scripts
+    # instead of D language source files.
+    my ($file        , # in
+        $rh_Err      , # in   hash of error codes
+        $raa_errors  , # out
+       ) = @_;
+    print "-> really_is_D($file)\n" if $opt_v > 2;
+    my ($possible_script, $L) = peek_at_first_line($file, $rh_Err, $raa_errors);
+
+    print "<- really_is_D($file)\n" if $opt_v > 2;
+    return $possible_script;    # null string if D, otherwise a language
+} # 1}}}
+sub no_autogen_files {                       # {{{1
+    # ref https://github.com/AlDanial/cloc/issues/151
+    my ($print,) = @_;
+    print "-> no_autogen($print)\n" if $opt_v > 2;
+
+    # These sometimes created manually?
+    #               acinclude.m4
+    #               configure.ac
+    #               Makefile.am
+
+    my @files = qw (
+                    aclocal.m4
+                    announce-gen
+                    autogen.sh
+                    bootstrap
+                    compile
+                    config.guess
+                    config.h.in
+                    config.rpath
+                    config.status
+                    config.sub
+                    configure
+                    configure.in
+                    depcomp
+                    gendocs.sh
+                    gitlog-to-changelog
+                    git-version-gen
+                    gnupload
+                    gnu-web-doc-update
+                    install-sh
+                    libtool
+                    libtool.m4
+                    link-warning.h
+                    ltmain.sh
+                    lt~obsolete.m4
+                    ltoptions.m4
+                    ltsugar.m4
+                    ltversion.in
+                    ltversion.m4
+                    Makefile.in
+                    mdate-sh
+                    missing
+                    mkinstalldirs
+                    test-driver
+                    texinfo.tex
+                    update-copyright
+                    useless-if-before-free
+                    vc-list-files
+                    ylwrap
+                   );
+
+    if ($print) {
+        printf "cloc will ignore these %d files with --no-autogen:\n", scalar @files;
+        foreach my $F (@files) {
+            print "    $F\n";
+        }
+        print "Additionally, Go files with '// Code generated by .* DO NOT EDIT.'\n";
+        print "on the first line are ignored.\n";
+    }
+    print "<- no_autogen()\n" if $opt_v > 2;
+    return @files;
+} # 1}}}
+sub load_from_config_file {                  # {{{1
+    # Supports all options except --config itself which would
+    # be pointless.
+    my ($config_file,
+                                                 $rs_by_file             ,
+                                                 $rs_by_file_by_lang     ,
+                                                 $rs_categorized         ,
+                                                 $rs_counted             ,
+                                                 $rs_include_ext         ,
+                                                 $rs_include_lang        ,
+                                                 $rs_exclude_content     ,
+                                                 $rs_exclude_lang        ,
+                                                 $rs_exclude_dir         ,
+                                                 $rs_exclude_list_file   ,
+                                                 $rs_explain             ,
+                                                 $rs_extract_with        ,
+                                                 $rs_found               ,
+                                                 $rs_count_diff          ,
+                                                 $rs_diff                ,
+                                                 $rs_diff_alignment      ,
+                                                 $rs_diff_timeout        ,
+                                                 $rs_timeout             ,
+                                                 $rs_html                ,
+                                                 $rs_ignored             ,
+                                                 $rs_quiet               ,
+                                                 $rs_force_lang_def      ,
+                                                 $rs_read_lang_def       ,
+                                                 $rs_show_ext            ,
+                                                 $rs_show_lang           ,
+                                                 $rs_progress_rate       ,
+                                                 $rs_print_filter_stages ,
+                                                 $rs_report_file         ,
+                                                 $ra_script_lang         ,
+                                                 $rs_sdir                ,
+                                                 $rs_skip_uniqueness     ,
+                                                 $rs_strip_comments      ,
+                                                 $rs_original_dir        ,
+                                                 $rs_sum_reports         ,
+                                                 $rs_hide_rate           ,
+                                                 $rs_processes           ,
+                                                 $rs_unicode             ,
+                                                 $rs_3                   ,
+                                                 $rs_v                   ,
+                                                 $rs_vcs                 ,
+                                                 $rs_version             ,
+                                                 $rs_write_lang_def      ,
+                                                 $rs_write_lang_def_incl_dup,
+                                                 $rs_xml                 ,
+                                                 $rs_xsl                 ,
+                                                 $ra_force_lang          ,
+                                                 $rs_lang_no_ext         ,
+                                                 $rs_yaml                ,
+                                                 $rs_csv                 ,
+                                                 $rs_csv_delimiter       ,
+                                                 $rs_json                ,
+                                                 $rs_md                  ,
+                                                 $rs_fullpath            ,
+                                                 $rs_match_f             ,
+                                                 $rs_not_match_f         ,
+                                                 $rs_match_d             ,
+                                                 $rs_not_match_d         ,
+                                                 $rs_list_file           ,
+                                                 $rs_help                ,
+                                                 $rs_skip_win_hidden     ,
+                                                 $rs_read_binary_files   ,
+                                                 $rs_sql                 ,
+                                                 $rs_sql_project         ,
+                                                 $rs_sql_append          ,
+                                                 $rs_sql_style           ,
+                                                 $rs_inline              ,
+                                                 $rs_exclude_ext         ,
+                                                 $rs_ignore_whitespace   ,
+                                                 $rs_ignore_case         ,
+                                                 $rs_ignore_case_ext     ,
+                                                 $rs_follow_links        ,
+                                                 $rs_autoconf            ,
+                                                 $rs_sum_one             ,
+                                                 $rs_by_percent          ,
+                                                 $rs_stdin_name          ,
+                                                 $rs_force_on_windows    ,
+                                                 $rs_force_on_unix       ,
+                                                 $rs_show_os             ,
+                                                 $rs_skip_archive        ,
+                                                 $rs_max_file_size       ,
+                                                 $rs_use_sloccount       ,
+                                                 $rs_no_autogen          ,
+                                                 $rs_force_git           ,
+                                                 $rs_strip_str_comments  ,
+                                                 $rs_file_encoding       ,
+                                                 $rs_docstring_as_code   ,
+                                                 $rs_stat                ,
+        ) = @_;
+        # look for runtime configuration file in
+        #    $ENV{'HOME'}/.config/cloc/options.txt         -> POSIX
+        #    $ENV{'APPDATA'} . 'cloc'
+
+    print "-> load_from_config_file($config_file)\n" if $opt_v and $opt_v > 2;
+    if (!-f $config_file) {
+        print "<- load_from_config_file() (no such file: $config_file)\n" if $opt_v and $opt_v > 2;
+        return;
+    } elsif (!-r $config_file) {
+        print "<- load_from_config_file() (unable to read $config_file)\n" if $opt_v and $opt_v > 2;
+        return;
+    }
+    print "Reading options from $config_file.\n" if defined $opt_v;
+
+    my $has_force_lang = @{$ra_force_lang};
+    my $has_script_lang = @{$ra_script_lang};
+    my @lines = read_file($config_file);
+    foreach (@lines) {
+        next if /^\s*$/ or /^\s*#/;
+        s/\s*--//;
+        s/^\s+//;
+        if      (!defined ${$rs_by_file}             and /^(by_file|by-file)/)                                { ${$rs_by_file}            = 1;
+        } elsif (!defined ${$rs_by_file_by_lang}     and /^(by_file_by_lang|by-file-by-lang)/)                { ${$rs_by_file_by_lang}    = 1;
+        } elsif (!defined ${$rs_categorized}         and /^categorized(=|\s+)(.*?)$/)                         { ${$rs_categorized}        = $2;
+        } elsif (!defined ${$rs_counted}             and /^counted(=|\s+)(.*?)$/)                             { ${$rs_counted}            = $2;
+        } elsif (!defined ${$rs_include_ext}         and /^(?:include_ext|include-ext)(=|\s+)(.*?)$/)         { ${$rs_include_ext}        = $2;
+        } elsif (!defined ${$rs_include_lang}        and /^(?:include_lang|include-lang)(=|\s+)(.*?)$/)       { ${$rs_include_lang}       = $2;
+        } elsif (!defined ${$rs_exclude_content}     and /^(?:exclude_content|exclude-content)(=|\s+)(.*?)$/) { ${$rs_exclude_content}    = $2;
+        } elsif (!defined ${$rs_exclude_lang}        and /^(?:exclude_lang|exclude-lang)(=|\s+)(.*?)$/)       { ${$rs_exclude_lang}       = $2;
+        } elsif (!defined ${$rs_exclude_dir}         and /^(?:exclude_dir|exclude-dir)(=|\s+)(.*?)$/)         { ${$rs_exclude_dir}        = $2;
+        } elsif (!defined ${$rs_explain}             and /^explain(=|\s+)(.*?)$/)                             { ${$rs_explain}            = $2;
+        } elsif (!defined ${$rs_extract_with}        and /^(?:extract_with|extract-with)(=|\s+)(.*?)$/)       { ${$rs_extract_with}       = $2;
+        } elsif (!defined ${$rs_found}               and /^found(=|\s+)(.*?)$/)                               { ${$rs_found}              = $2;
+        } elsif (!defined ${$rs_count_diff}          and /^(count_and_diff|count-and-diff)/)                  { ${$rs_count_diff}         = 1;
+        } elsif (!defined ${$rs_diff}                and /^diff/)                                             { ${$rs_diff}               = 1;
+        } elsif (!defined ${$rs_diff_alignment}      and /^(?:diff-alignment|diff_alignment)(=|\s+)(.*?)$/)   { ${$rs_diff_alignment}     = $2;
+        } elsif (!defined ${$rs_diff_timeout}        and /^(?:diff-timeout|diff_timeout)(=|\s+)i/)            { ${$rs_diff_timeout}       = $1;
+        } elsif (!defined ${$rs_timeout}             and /^timeout(=|\s+)i/)                                  { ${$rs_timeout}            = $1;
+        } elsif (!defined ${$rs_html}                and /^html/)                                             { ${$rs_html}               = 1;
+        } elsif (!defined ${$rs_ignored}             and /^ignored(=|\s+)(.*?)$/)                             { ${$rs_ignored}            = $2;
+        } elsif (!defined ${$rs_quiet}               and /^quiet/)                                            { ${$rs_quiet}              = 1;
+        } elsif (!defined ${$rs_force_lang_def}      and /^(?:force_lang_def|force-lang-def)(=|\s+)(.*?)$/)   { ${$rs_force_lang_def}     = $2;
+        } elsif (!defined ${$rs_read_lang_def}       and /^(?:read_lang_def|read-lang-def)(=|\s+)(.*?)$/)     { ${$rs_read_lang_def}      = $2;
+        } elsif (!defined ${$rs_progress_rate}       and /^(?:progress_rate|progress-rate)(=|\s+)(\d+)/)      { ${$rs_progress_rate}      = $2;
+        } elsif (!defined ${$rs_print_filter_stages} and /^(print_filter_stages|print-filter-stages)/)        { ${$rs_print_filter_stages}= 1;
+        } elsif (!defined ${$rs_report_file}         and /^(?:report_file|report-file)(=|\s+)(.*?)$/)         { ${$rs_report_file}        = $2;
+        } elsif (!defined ${$rs_report_file}         and /^out(=|\s+)(.*?)$/)                                 { ${$rs_report_file}        = $2;
+        } elsif (!defined ${$rs_sdir}                and /^sdir(=|\s+)(.*?)$/)                                { ${$rs_sdir}               = $2;
+        } elsif (!defined ${$rs_skip_uniqueness}     and /^(skip_uniqueness|skip-uniqueness)/)                { ${$rs_skip_uniqueness}    = 1;
+        } elsif (!defined ${$rs_strip_comments}      and /^(?:strip_comments|strip-comments)(=|\s+)(.*?)$/)   { ${$rs_strip_comments}     = $2;
+        } elsif (!defined ${$rs_original_dir}        and /^(original_dir|original-dir)/)                      { ${$rs_original_dir}       = 1;
+        } elsif (!defined ${$rs_sum_reports}         and /^(sum_reports|sum-reports)/)                        { ${$rs_sum_reports}        = 1;
+        } elsif (!defined ${$rs_hide_rate}           and /^(hid_rate|hide-rate)/)                             { ${$rs_hide_rate}          = 1;
+        } elsif (!defined ${$rs_processes}           and /^processes(=|\s+)(\d+)/)                            { ${$rs_processes}          = $2;
+        } elsif (!defined ${$rs_unicode}             and /^unicode/)                                          { ${$rs_unicode}            = 1;
+        } elsif (!defined ${$rs_3}                   and /^3/)                                                { ${$rs_3}                  = 1;
+        } elsif (!defined ${$rs_vcs}                 and /^vcs(=|\s+)(\S+)/)                                  { ${$rs_vcs}                = $2;
+        } elsif (!defined ${$rs_version}             and /^version/)                                          { ${$rs_version}            = 1;
+        } elsif (!defined ${$rs_write_lang_def}      and /^(?:write_lang_def|write-lang-def)(=|\s+)(.*?)$/)   { ${$rs_write_lang_def}     = $2;
+        } elsif (!defined ${$rs_write_lang_def_incl_dup} and /^(?:write_lang_def_incl_dup|write-lang-def-incl-dup)(=|\s+)(.*?)$/) { ${$rs_write_lang_def_incl_dup} = $2;
+        } elsif (!defined ${$rs_xml}                 and /^xml/)                                              { ${$rs_xml}                = 1;
+        } elsif (!defined ${$rs_xsl}                 and /^xsl(=|\s+)(.*?)$/)                                 { ${$rs_xsl}                = $2;
+        } elsif (!defined ${$rs_lang_no_ext}         and /^(?:lang_no_ext|lang-no-ext)(=|\s+)(.*?)$/)         { ${$rs_lang_no_ext}        = $2;
+        } elsif (!defined ${$rs_yaml}                and /^yaml/)                                             { ${$rs_yaml}               = 1;
+        } elsif (!defined ${$rs_csv}                 and /^csv/)                                              { ${$rs_csv}                = 1;
+        } elsif (!defined ${$rs_csv_delimiter}       and /^(?:csv_delimiter|csv-delimiter)(=|\s+)(.*?)$/)     { ${$rs_csv_delimiter}      = $2;
+        } elsif (!defined ${$rs_json}                and /^json/)                                             { ${$rs_json}               = 1;
+        } elsif (!defined ${$rs_md}                  and /^md/)                                               { ${$rs_md}                 = 1;
+        } elsif (!defined ${$rs_fullpath}            and /^fullpath/)                                         { ${$rs_fullpath}           = 1;
+        } elsif (!defined ${$rs_match_f}             and /^(?:match_f|match-f)(=|\s+)(.*?)$/)                 { ${$rs_match_f}            = $2;
+        } elsif (!defined ${$rs_not_match_f}         and /^(?:not_match_f|not-match-f)(=|\s+)(.*?)$/)         { ${$rs_not_match_f}        = $2;
+        } elsif (!defined ${$rs_match_d}             and /^(?:match_d|match-d)(=|\s+)(.*?)$/)                 { ${$rs_match_d}            = $2;
+        } elsif (!defined ${$rs_not_match_d}         and /^(?:not_match_d|not-match-d)(=|\s+)(.*?)$/)         { ${$rs_not_match_d}        = $2;
+        } elsif (!defined ${$rs_list_file}           and /^(?:list_file|list-file)(=|\s+)(.*?)$/)             { ${$rs_list_file}          = $2;
+        } elsif (!defined ${$rs_help}                and /^help/)                                             { ${$rs_help}               = 1;
+        } elsif (!defined ${$rs_skip_win_hidden}     and /^(skip_win_hidden|skip-win-hidden)/)                { ${$rs_skip_win_hidden}    = 1;
+        } elsif (!defined ${$rs_read_binary_files}   and /^(read_binary_files|read-binary-files)/)            { ${$rs_read_binary_files}  = 1;
+        } elsif (!defined ${$rs_sql}                 and /^sql(=|\s+)(.*?)$/)                                 { ${$rs_sql}                = $2;
+        } elsif (!defined ${$rs_sql_project}         and /^(?:sql_project|sql-project)(=|\s+)(.*?)$/)         { ${$rs_sql_project}        = $2;
+        } elsif (!defined ${$rs_sql_append}          and /^(sql_append|sql-append)/)                          { ${$rs_sql_append}         = 1;
+        } elsif (!defined ${$rs_sql_style}           and /^(?:sql_style|sql-style)(=|\s+)(.*?)$/)             { ${$rs_sql_style}          = $2;
+        } elsif (!defined ${$rs_inline}              and /^inline/)                                           { ${$rs_inline}             = 1;
+        } elsif (!defined ${$rs_exclude_ext}         and /^(?:exclude_ext|exclude-ext)(=|\s+)(.*?)$/)         { ${$rs_exclude_ext}        = $2;
+        } elsif (!defined ${$rs_ignore_whitespace}   and /^(ignore_whitespace|ignore-whitespace)/)            { ${$rs_ignore_whitespace}  = 1;
+        } elsif (!defined ${$rs_ignore_case_ext}     and /^(ignore_case_ext|ignore-case-ext)/)                { ${$rs_ignore_case_ext}    = 1;
+        } elsif (!defined ${$rs_ignore_case}         and /^(ignore_case|ignore-case)/)                        { ${$rs_ignore_case}        = 1;
+        } elsif (!defined ${$rs_follow_links}        and /^(follow_links|follow-links)/)                      { ${$rs_follow_links}       = 1;
+        } elsif (!defined ${$rs_autoconf}            and /^autoconf/)                                         { ${$rs_autoconf}           = 1;
+        } elsif (!defined ${$rs_sum_one}             and /^(sum_one|sum-one)/)                                { ${$rs_sum_one}            = 1;
+        } elsif (!defined ${$rs_by_percent}          and /^(?:by_percent|by-percent)(=|\s+)(.*?)$/)           { ${$rs_by_percent}         = $2;
+        } elsif (!defined ${$rs_stdin_name}          and /^(?:stdin_name|stdin-name)(=|\s+)(.*?)$/)           { ${$rs_stdin_name}         = $2;
+        } elsif (!defined ${$rs_force_on_windows}    and /^windows/)                                          { ${$rs_force_on_windows}   = 1;
+        } elsif (!defined ${$rs_force_on_unix}       and /^unix/)                                             { ${$rs_force_on_unix}      = 1;
+        } elsif (!defined ${$rs_show_os}             and /^(show_os|show-os)/)                                { ${$rs_show_os}            = 1;
+        } elsif (!defined ${$rs_skip_archive}        and /^(?:skip_archive|skip-archive)(=|\s+)(.*?)$/)       { ${$rs_skip_archive}       = $2;
+        } elsif (!defined ${$rs_max_file_size}       and /^(?:max_file_size|max-file-size)(=|\s+)(\d+)/)      { ${$rs_max_file_size}      = $2;
+        } elsif (!defined ${$rs_use_sloccount}       and /^(use_sloccount|use-sloccount)/)                    { ${$rs_use_sloccount}      = 1;
+        } elsif (!defined ${$rs_no_autogen}          and /^(no_autogen|no-autogen)/)                          { ${$rs_no_autogen}         = 1;
+        } elsif (!defined ${$rs_force_git}           and /^git/)                                              { ${$rs_force_git}          = 1;
+        } elsif (!defined ${$rs_exclude_list_file}   and /^(?:exclude_list_file|exclude-list-file)(=|\s+)(.*?)$/)
+                                                                   { ${$rs_exclude_list_file}  = $2;
+        } elsif (!defined ${$rs_v} and /(verbose|v)((=|\s+)(\d+))?/) {
+            if (!defined $4) { ${$rs_v} =  0; }
+            else             { ${$rs_v} = $4; }
+        } elsif (!$has_script_lang and /^(?:script_lang|script-lang)(=|\s+)(.*?)$/)         {
+                                                            push @{$ra_script_lang}          , $2;
+        } elsif (!$has_force_lang and /^(?:force_lang|force-lang)(=|\s+)(.*?)$/)           {
+                                                            push @{$ra_force_lang}           , $2;
+        } elsif (!defined ${$rs_show_ext}          and /^(show_ext|show-ext)((=|\s+)(.*))?$/)  {
+            if (!defined $4) { ${$rs_show_ext} =  0; }
+            else             { ${$rs_show_ext} = $4; }
+        } elsif (!defined ${$rs_show_lang}         and /^(show_lang|show-lang)((=|\s+)(.*))?s/){
+            if (!defined $4) { ${$rs_show_lang} =  0; }
+            else             { ${$rs_show_lang} = $4; }
+        } elsif (!defined ${$rs_strip_str_comments}  and /^(strip_str_comments|strip-str-comments)/)     { ${$rs_strip_str_comments} = 1;
+        } elsif (!defined ${$rs_file_encoding}       and /^(?:file_encoding|file-encoding)(=|\s+)(\S+)/) { ${$rs_file_encoding}      = $2;
+        } elsif (!defined ${$rs_docstring_as_code}   and /^(docstring_as_code|docstring-as-code)/)       { ${$rs_docstring_as_code}  = 1;
+        } elsif (!defined ${$rs_stat}                and /stat/)                                         { ${$rs_stat}               = 1;
+        }
+
+    }
+} # 1}}}
+sub trick_pp_packer_encode {                 # {{{1
+    use Encode;
+    # PAR::Packer gives 'Unknown PerlIO layer "encoding"' unless it is
+    # forced into using this module.
+    my ($OUT, $JunkFile) = tempfile(UNLINK => 1);  # delete on exit
+    open($OUT, "> :encoding(utf8)", $JunkFile);
+    close($OUT);
+}
+# 1}}}
+sub really_is_smarty {                       # {{{1
+    # Given filename, returns TRUE if its contents look like Smarty template
+    my ($filename, ) = @_;
+
+    print "-> really_is_smarty($filename)\n" if $opt_v > 2;
+
+    my @lines = read_file($filename);
+
+    my $points = 0;
+    foreach my $L (@lines) {
+        if (($L =~ /\{(if|include)\s/) or
+            ($L =~ /\{\/if\}/)         or
+            ($L =~ /(\{\*|\*\})/)      or
+            ($L =~ /\{\$\w/)) {
+            ++$points;
+        }
+        last if $points >= 2;
+    }
+    print "<- really_is_smarty(points=$points)\n" if $opt_v > 2;
+    return $points >= 2;
+} # 1}}}
+sub check_alternate_config_files {           # {{{1
+    my ($list_file, $exclude_list_file, $read_lang_def,
+        $force_lang_def, $diff_list_file, ) = @_;
+    my $found_it = "";
+    foreach my $file ($list_file,
+                      $exclude_list_file,
+                      $read_lang_def,
+                      $force_lang_def,
+                      $diff_list_file ) {
+        next unless defined $file;
+        my $dir = dirname $file;
+        next unless -r $dir and -d $dir;
+        my $bn = basename $config_file;
+        if (-r "$dir/$bn") {
+            $found_it = "$dir/$bn";
+            print "Using configuration file $found_it\n" if $opt_v;
+            last;
+        }
+    }
+    return $found_it;
+}
+# 1}}}
+# really_is_pascal, really_is_incpascal, really_is_php from SLOCCount
+my %php_files    = ();  # really_is_php()
+sub really_is_pascal {                       # {{{1
+# Given filename, returns TRUE if its contents really are Pascal.
+
+# This isn't as obvious as it seems.
+# Many ".p" files are Perl files
+# (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p),
+# others are C extractions
+# (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p
+# and some files in linuxconf).
+# However, test files in "p2c" really are Pascal, for example.
+
+# Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p
+# is actually C code.  The heuristics determine that they're not Pascal,
+# but because it ends in ".p" it's not counted as C code either.
+# I believe this is actually correct behavior, because frankly it
+# looks like it's automatically generated (it's a bitmap expressed as code).
+# Rather than guess otherwise, we don't include it in a list of
+# source files.  Let's face it, someone who creates C files ending in ".p"
+# and expects them to be counted by default as C files in SLOCCount needs
+# their head examined.  I suggest examining their head
+# with a sucker rod (see syslogd(8) for more on sucker rods).
+
+# This heuristic counts as Pascal such files such as:
+#  /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p
+# Which is hand-generated.  We don't count woven documents now anyway,
+# so this is justifiable.
+
+ my $filename = shift;
+ chomp($filename);
+
+# The heuristic is as follows: it's Pascal _IF_ it has all of the following
+# (ignoring {...} and (*...*) comments):
+# 1. "^..program NAME" or "^..unit NAME",
+# 2. "procedure", "function", "^..interface", or "^..implementation",
+# 3. a "begin", and
+# 4. it ends with "end.",
+#
+# Or it has all of the following:
+# 1. "^..module NAME" and
+# 2. it ends with "end.".
+#
+# Or it has all of the following:
+# 1. "^..program NAME",
+# 2. a "begin", and
+# 3. it ends with "end.".
+#
+# The "end." requirements in particular filter out non-Pascal.
+#
+# Note (jgb): this does not detect Pascal main files in fpc, like
+# fpc-1.0.4/api/test/testterminfo.pas, which does not have "program" in
+# it
+
+ my $is_pascal = 0;      # Value to determine.
+
+ my $has_program = 0;
+ my $has_unit = 0;
+ my $has_module = 0;
+ my $has_procedure_or_function = 0;
+ my $found_begin = 0;
+ my $found_terminating_end = 0;
+ my $has_begin = 0;
+
+ open(PASCAL_FILE, "<$filename") ||
+      die "Can't open $filename to determine if it's pascal.\n";
+ while(<PASCAL_FILE>) {
+   s/\{.*?\}//g;  # Ignore {...} comments on this line; imperfect, but effective.
+   s/\(\*.*?\*\)//g;  # Ignore (*...*) comments on this line; imperfect, but effective.
+   if (m/\bprogram\s+[A-Za-z]/i)  {$has_program=1;}
+   if (m/\bunit\s+[A-Za-z]/i)     {$has_unit=1;}
+   if (m/\bmodule\s+[A-Za-z]/i)   {$has_module=1;}
+   if (m/\bprocedure\b/i)         { $has_procedure_or_function = 1; }
+   if (m/\bfunction\b/i)          { $has_procedure_or_function = 1; }
+   if (m/^\s*interface\s+/i)      { $has_procedure_or_function = 1; }
+   if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; }
+   if (m/\bbegin\b/i) { $has_begin = 1; }
+   # Originally I said:
+   # "This heuristic fails if there are multi-line comments after
+   # "end."; I haven't seen that in real Pascal programs:"
+   # But jgb found there are a good quantity of them in Debian, specially in
+   # fpc (at the end of a lot of files there is a multiline comment
+   # with the changelog for the file).
+   # Therefore, assume Pascal if "end." appears anywhere in the file.
+   if (m/end\.\s*$/i) {$found_terminating_end = 1;}
+#   elsif (m/\S/) {$found_terminating_end = 0;}
+ }
+ close(PASCAL_FILE);
+
+ # Okay, we've examined the entire file looking for clues;
+ # let's use those clues to determine if it's really Pascal:
+
+ if ( ( ($has_unit || $has_program) && $has_procedure_or_function &&
+     $has_begin && $found_terminating_end ) ||
+      ( $has_module && $found_terminating_end ) ||
+      ( $has_program && $has_begin && $found_terminating_end ) )
+          {$is_pascal = 1;}
+
+ return $is_pascal;
+} # 1}}}
+sub really_is_incpascal {                    # {{{1
+# Given filename, returns TRUE if its contents really are Pascal.
+# For .inc files (mainly seen in fpc)
+
+ my $filename = shift;
+ chomp($filename);
+
+# The heuristic is as follows: it is Pascal if any of the following:
+# 1. really_is_pascal returns true
+# 2. Any usual reserved word is found (program, unit, const, begin...)
+
+ # If the general routine for Pascal files works, we have it
+ if (really_is_pascal($filename)) {
+   return 1;
+ }
+
+ my $is_pascal = 0;      # Value to determine.
+ my $found_begin = 0;
+
+ open(PASCAL_FILE, "<$filename") ||
+      die "Can't open $filename to determine if it's pascal.\n";
+ while(<PASCAL_FILE>) {
+   s/\{.*?\}//g;  # Ignore {...} comments on this line; imperfect, but effective.
+   s/\(\*.*?\*\)//g;  # Ignore (*...*) comments on this line; imperfect, but effective.
+   if (m/\bprogram\s+[A-Za-z]/i)  {$is_pascal=1;}
+   if (m/\bunit\s+[A-Za-z]/i)     {$is_pascal=1;}
+   if (m/\bmodule\s+[A-Za-z]/i)   {$is_pascal=1;}
+   if (m/\bprocedure\b/i)         {$is_pascal = 1; }
+   if (m/\bfunction\b/i)          {$is_pascal = 1; }
+   if (m/^\s*interface\s+/i)      {$is_pascal = 1; }
+   if (m/^\s*implementation\s+/i) {$is_pascal = 1; }
+   if (m/\bconstant\s+/i)         {$is_pascal=1;}
+   if (m/\bbegin\b/i) { $found_begin = 1; }
+   if ((m/end\.\s*$/i) && ($found_begin = 1)) {$is_pascal = 1;}
+   if ($is_pascal) {
+     last;
+   }
+ }
+
+ close(PASCAL_FILE);
+ return $is_pascal;
+} # 1}}}
+sub really_is_php {                          # {{{1
+# Given filename, returns TRUE if its contents really is php.
+
+ my $filename = shift;
+ chomp($filename);
+
+ my $is_php = 0;      # Value to determine.
+ # Need to find a matching pair of surrounds, with ending after beginning:
+ my $normal_surround = 0;  # <?; bit 0 = <?, bit 1 = ?>
+ my $script_surround = 0;  # <script..>; bit 0 = <script language="php">
+ my $asp_surround = 0;     # <%; bit 0 = <%, bit 1 = %>
+
+ # Return cached result, if available:
+ if ($php_files{$filename}) { return $php_files{$filename};}
+
+ open(PHP_FILE, "<$filename") ||
+      die "Can't open $filename to determine if it's php.\n";
+ while(<PHP_FILE>) {
+   if (m/\<\?/)                           { $normal_surround |= 1; }
+   if (m/\?\>/ && ($normal_surround & 1)) { $normal_surround |= 2; }
+   if (m/\<script.*language="?php"?/i)    { $script_surround |= 1; }
+   if (m/\<\/script\>/i && ($script_surround & 1)) { $script_surround |= 2; }
+   if (m/\<\%/)                           { $asp_surround |= 1; }
+   if (m/\%\>/ && ($asp_surround & 1)) { $asp_surround |= 2; }
+ }
+ close(PHP_FILE);
+
+ if ( ($normal_surround == 3) || ($script_surround == 3) ||
+      ($asp_surround == 3)) {
+   $is_php = 1;
+ }
+
+ $php_files{$filename} = $is_php; # Store result in cache.
+
+ return $is_php;
+} # 1}}}
+# vendored modules
 sub Install_Regexp_Common {                  # {{{1
     # Installs portions of Damian Conway's & Abigail's Regexp::Common
-    # module, version 2013031301 into a temporary directory for the
+    # module, version 2017060201 into a temporary directory for the
     # duration of this run.
     my %Regexp_Common_Contents = ();
 $Regexp_Common_Contents{'Common'} = <<'EOCommon'; # {{{2
 package Regexp::Common;
 
-use 5.00473;
+use 5.10.0;
 use strict;
 
-BEGIN {
-    # This makes sure 'use warnings' doesn't bomb out on 5.005_*;
-    # warnings won't be enabled on those old versions though.
-    # Since all other files use this file, we can use 'use warnings'
-    # elsewhere as well, but *AFTER* 'use Regexp::Common'.
-    if ($] < 5.006) {
-        $INC {"warnings.pm"} = 1;
-        no strict 'refs';
-        *{"warnings::unimport"} = sub {0};
-    }
-}
-
 use warnings;
-use vars qw /$VERSION %RE %sub_interface $AUTOLOAD/;
+no  warnings 'syntax';
 
-$VERSION = '2013031301';
+our $VERSION = '2017060201';
+our %RE;
+our %sub_interface;
+our $AUTOLOAD;
 
 
 sub _croak {
@@ -8609,28 +13985,626 @@ sub _clone_with {
 }
 
 1;
-#
-# This software is Copyright (c) 2001 - 2011, Damian Conway and Abigail.
-#
-# This module is free software, and maybe used under any of the following
-# licenses:
-#
-#  1) The Perl Artistic License.     See the file COPYRIGHT.AL.
-#  2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
-#  3) The BSD Licence.               See the file COPYRIGHT.BSD.
-#  4) The MIT Licence.               See the file COPYRIGHT.MIT.
+
+__END__
+
+=pod
+
+=head1 NAME
+
+Regexp::Common - Provide commonly requested regular expressions
+
+=head1 SYNOPSIS
+
+ # STANDARD USAGE 
+
+ use Regexp::Common;
+
+ while (<>) {
+     /$RE{num}{real}/               and print q{a number};
+     /$RE{quoted}/                  and print q{a ['"`] quoted string};
+    m[$RE{delimited}{-delim=>'/'}]  and print q{a /.../ sequence};
+     /$RE{balanced}{-parens=>'()'}/ and print q{balanced parentheses};
+     /$RE{profanity}/               and print q{a #*@%-ing word};
+ }
+
+
+ # SUBROUTINE-BASED INTERFACE
+
+ use Regexp::Common 'RE_ALL';
+
+ while (<>) {
+     $_ =~ RE_num_real()              and print q{a number};
+     $_ =~ RE_quoted()                and print q{a ['"`] quoted string};
+     $_ =~ RE_delimited(-delim=>'/')  and print q{a /.../ sequence};
+     $_ =~ RE_balanced(-parens=>'()'} and print q{balanced parentheses};
+     $_ =~ RE_profanity()             and print q{a #*@%-ing word};
+ }
+
+
+ # IN-LINE MATCHING...
+
+ if ( $RE{num}{int}->matches($text) ) {...}
+
+
+ # ...AND SUBSTITUTION
+
+ my $cropped = $RE{ws}{crop}->subs($uncropped);
+
+
+ # ROLL-YOUR-OWN PATTERNS
+
+ use Regexp::Common 'pattern';
+
+ pattern name   => ['name', 'mine'],
+         create => '(?i:J[.]?\s+A[.]?\s+Perl-Hacker)',
+         ;
+
+ my $name_matcher = $RE{name}{mine};
+
+ pattern name    => [ 'lineof', '-char=_' ],
+         create  => sub {
+                        my $flags = shift;
+                        my $char = quotemeta $flags->{-char};
+                        return '(?:^$char+$)';
+                    },
+         match   => sub {
+                        my ($self, $str) = @_;
+                        return $str !~ /[^$self->{flags}{-char}]/;
+                    },
+         subs   => sub {
+                        my ($self, $str, $replacement) = @_;
+                        $_[1] =~ s/^$self->{flags}{-char}+$//g;
+                   },
+         ;
+
+ my $asterisks = $RE{lineof}{-char=>'*'};
+
+ # DECIDING WHICH PATTERNS TO LOAD.
+
+ use Regexp::Common qw /comment number/;  # Comment and number patterns.
+ use Regexp::Common qw /no_defaults/;     # Don't load any patterns.
+ use Regexp::Common qw /!delimited/;      # All, but delimited patterns.
+
+
+=head1 DESCRIPTION
+
+By default, this module exports a single hash (C<%RE>) that stores or generates
+commonly needed regular expressions (see L<"List of available patterns">).
+
+There is an alternative, subroutine-based syntax described in
+L<"Subroutine-based interface">.
+
+
+=head2 General syntax for requesting patterns
+
+To access a particular pattern, C<%RE> is treated as a hierarchical hash of
+hashes (of hashes...), with each successive key being an identifier. For
+example, to access the pattern that matches real numbers, you 
+specify:
+
+        $RE{num}{real}
+        
+and to access the pattern that matches integers: 
+
+        $RE{num}{int}
+
+Deeper layers of the hash are used to specify I<flags>: arguments that
+modify the resulting pattern in some way. The keys used to access these
+layers are prefixed with a minus sign and may have a value; if a value
+is given, it's done by using a multidimensional key.
+For example, to access the pattern that
+matches base-2 real numbers with embedded commas separating
+groups of three digits (e.g. 10,101,110.110101101):
+
+        $RE{num}{real}{-base => 2}{-sep => ','}{-group => 3}
+
+Through the magic of Perl, these flag layers may be specified in any order
+(and even interspersed through the identifier keys!)
+so you could get the same pattern with:
+
+        $RE{num}{real}{-sep => ','}{-group => 3}{-base => 2}
+
+or:
+
+        $RE{num}{-base => 2}{real}{-group => 3}{-sep => ','}
+
+or even:
+
+        $RE{-base => 2}{-group => 3}{-sep => ','}{num}{real}
+
+etc.
+
+Note, however, that the relative order of amongst the identifier keys
+I<is> significant. That is:
+
+        $RE{list}{set}
+
+would not be the same as:
+
+        $RE{set}{list}
+
+=head2 Flag syntax
+
+In versions prior to 2.113, flags could also be written as
+C<{"-flag=value"}>. This no longer works, although C<{"-flag$;value"}>
+still does. However, C<< {-flag => 'value'} >> is the preferred syntax.
+
+=head2 Universal flags
+
+Normally, flags are specific to a single pattern.
+However, there is two flags that all patterns may specify.
+
+=over 4
+
+=item C<-keep>
+
+By default, the patterns provided by C<%RE> contain no capturing
+parentheses. However, if the C<-keep> flag is specified (it requires
+no value) then any significant substrings that the pattern matches
+are captured. For example:
+
+        if ($str =~ $RE{num}{real}{-keep}) {
+                $number   = $1;
+                $whole    = $3;
+                $decimals = $5;
+        }
+
+Special care is needed if a "kept" pattern is interpolated into a
+larger regular expression, as the presence of other capturing
+parentheses is likely to change the "number variables" into which significant
+substrings are saved.
+
+See also L<"Adding new regular expressions">, which describes how to create
+new patterns with "optional" capturing brackets that respond to C<-keep>.
+
+=item C<-i>
+
+Some patterns or subpatterns only match lowercase or uppercase letters.
+If one wants the do case insensitive matching, one option is to use
+the C</i> regexp modifier, or the special sequence C<(?i)>. But if the
+functional interface is used, one does not have this option. The 
+C<-i> switch solves this problem; by using it, the pattern will do
+case insensitive matching.
+
+=back
+
+=head2 OO interface and inline matching/substitution
+
+The patterns returned from C<%RE> are objects, so rather than writing:
+
+        if ($str =~ /$RE{some}{pattern}/ ) {...}
+
+you can write:
+
+        if ( $RE{some}{pattern}->matches($str) ) {...}
+
+For matching this would seem to have no great advantage apart from readability
+(but see below).
+
+For substitutions, it has other significant benefits. Frequently you want to
+perform a substitution on a string without changing the original. Most people
+use this:
+
+        $changed = $original;
+        $changed =~ s/$RE{some}{pattern}/$replacement/;
+
+The more adept use:
+
+        ($changed = $original) =~ s/$RE{some}{pattern}/$replacement/;
+
+Regexp::Common allows you do write this:
+
+        $changed = $RE{some}{pattern}->subs($original=>$replacement);
+
+Apart from reducing precedence-angst, this approach has the added
+advantages that the substitution behaviour can be optimized from the 
+regular expression, and the replacement string can be provided by
+default (see L<"Adding new regular expressions">).
+
+For example, in the implementation of this substitution:
+
+        $cropped = $RE{ws}{crop}->subs($uncropped);
+
+the default empty string is provided automatically, and the substitution is
+optimized to use:
+
+        $uncropped =~ s/^\s+//;
+        $uncropped =~ s/\s+$//;
+
+rather than:
+
+        $uncropped =~ s/^\s+|\s+$//g;
+
+
+=head2 Subroutine-based interface
+
+The hash-based interface was chosen because it allows regexes to be
+effortlessly interpolated, and because it also allows them to be
+"curried". For example:
+
+        my $num = $RE{num}{int};
+
+        my $commad     = $num->{-sep=>','}{-group=>3};
+        my $duodecimal = $num->{-base=>12};
+
+
+However, the use of tied hashes does make the access to Regexp::Common
+patterns slower than it might otherwise be. In contexts where impatience
+overrules laziness, Regexp::Common provides an additional
+subroutine-based interface.
+
+For each (sub-)entry in the C<%RE> hash (C<$RE{key1}{key2}{etc}>), there
+is a corresponding exportable subroutine: C<RE_key1_key2_etc()>. The name of
+each subroutine is the underscore-separated concatenation of the I<non-flag>
+keys that locate the same pattern in C<%RE>. Flags are passed to the subroutine
+in its argument list. Thus:
+
+        use Regexp::Common qw( RE_ws_crop RE_num_real RE_profanity );
+
+        $str =~ RE_ws_crop() and die "Surrounded by whitespace";
+
+        $str =~ RE_num_real(-base=>8, -sep=>" ") or next;
+
+        $offensive = RE_profanity(-keep);
+        $str =~ s/$offensive/$bad{$1}++; "<expletive deleted>"/ge;
+
+Note that, unlike the hash-based interface (which returns objects), these
+subroutines return ordinary C<qr>'d regular expressions. Hence they do not
+curry, nor do they provide the OO match and substitution inlining described
+in the previous section.
+
+It is also possible to export subroutines for all available patterns like so:
+
+        use Regexp::Common 'RE_ALL';
+
+Or you can export all subroutines with a common prefix of keys like so:
+
+        use Regexp::Common 'RE_num_ALL';
+
+which will export C<RE_num_int> and C<RE_num_real> (and if you have
+create more patterns who have first key I<num>, those will be exported
+as well). In general, I<RE_key1_..._keyn_ALL> will export all subroutines
+whose pattern names have first keys I<key1> ... I<keyn>.
+
+
+=head2 Adding new regular expressions
+
+You can add your own regular expressions to the C<%RE> hash at run-time,
+using the exportable C<pattern> subroutine. It expects a hash-like list of 
+key/value pairs that specify the behaviour of the pattern. The various
+possible argument pairs are:
+
+=over 4
+
+=item C<name =E<gt> [ @list ]>
+
+A required argument that specifies the name of the pattern, and any
+flags it may take, via a reference to a list of strings. For example:
+
+         pattern name => [qw( line of -char )],
+                 # other args here
+                 ;
+
+This specifies an entry C<$RE{line}{of}>, which may take a C<-char> flag.
+
+Flags may also be specified with a default value, which is then used whenever
+the flag is specified without an explicit value (but not when the flag is
+omitted). For example:
+
+         pattern name => [qw( line of -char=_ )],
+                 # default char is '_'
+                 # other args here
+                 ;
+
+
+=item C<create =E<gt> $sub_ref_or_string>
+
+A required argument that specifies either a string that is to be returned
+as the pattern:
+
+        pattern name    => [qw( line of underscores )],
+                create  => q/(?:^_+$)/
+                ;
+
+or a reference to a subroutine that will be called to create the pattern:
+
+        pattern name    => [qw( line of -char=_ )],
+                create  => sub {
+                                my ($self, $flags) = @_;
+                                my $char = quotemeta $flags->{-char};
+                                return '(?:^$char+$)';
+                            },
+                ;
+
+If the subroutine version is used, the subroutine will be called with 
+three arguments: a reference to the pattern object itself, a reference
+to a hash containing the flags and their values,
+and a reference to an array containing the non-flag keys. 
+
+Whatever the subroutine returns is stringified as the pattern.
+
+No matter how the pattern is created, it is immediately postprocessed to
+include or exclude capturing parentheses (according to the value of the
+C<-keep> flag). To specify such "optional" capturing parentheses within
+the regular expression associated with C<create>, use the notation
+C<(?k:...)>. Any parentheses of this type will be converted to C<(...)>
+when the C<-keep> flag is specified, or C<(?:...)> when it is not.
+It is a Regexp::Common convention that the outermost capturing parentheses
+always capture the entire pattern, but this is not enforced.
+
+
+=item C<match =E<gt> $sub_ref>
+
+An optional argument that specifies a subroutine that is to be called when
+the C<$RE{...}-E<gt>matches(...)> method of this pattern is invoked.
+
+The subroutine should expect two arguments: a reference to the pattern object
+itself, and the string to be matched against.
+
+It should return the same types of values as a C<m/.../> does.
+
+     pattern name    => [qw( line of -char )],
+             create  => sub {...},
+             match   => sub {
+                             my ($self, $str) = @_;
+                             $str !~ /[^$self->{flags}{-char}]/;
+                        },
+             ;
+
+
+=item C<subs =E<gt> $sub_ref>
+
+An optional argument that specifies a subroutine that is to be called when
+the C<$RE{...}-E<gt>subs(...)> method of this pattern is invoked.
+
+The subroutine should expect three arguments: a reference to the pattern object
+itself, the string to be changed, and the value to be substituted into it.
+The third argument may be C<undef>, indicating the default substitution is
+required.
+
+The subroutine should return the same types of values as an C<s/.../.../> does.
+
+For example:
+
+     pattern name    => [ 'lineof', '-char=_' ],
+             create  => sub {...},
+             subs    => sub {
+                          my ($self, $str, $ignore_replacement) = @_;
+                          $_[1] =~ s/^$self->{flags}{-char}+$//g;
+                        },
+             ;
+
+Note that such a subroutine will almost always need to modify C<$_[1]> directly.
+
+
+=item C<version =E<gt> $minimum_perl_version>
+
+If this argument is given, it specifies the minimum version of perl required
+to use the new pattern. Attempts to use the pattern with earlier versions of
+perl will generate a fatal diagnostic.
+
+=back
+
+=head2 Loading specific sets of patterns.
+
+By default, all the sets of patterns listed below are made available.
+However, it is possible to indicate which sets of patterns should
+be made available - the wanted sets should be given as arguments to
+C<use>. Alternatively, it is also possible to indicate which sets of
+patterns should not be made available - those sets will be given as
+argument to the C<use> statement, but are preceded with an exclaimation
+mark. The argument I<no_defaults> indicates none of the default patterns
+should be made available. This is useful for instance if all you want
+is the C<pattern()> subroutine.
+
+Examples:
+
+ use Regexp::Common qw /comment number/;  # Comment and number patterns.
+ use Regexp::Common qw /no_defaults/;     # Don't load any patterns.
+ use Regexp::Common qw /!delimited/;      # All, but delimited patterns.
+
+It's also possible to load your own set of patterns. If you have a
+module C<Regexp::Common::my_patterns> that makes patterns available,
+you can have it made available with
+
+ use Regexp::Common qw /my_patterns/;
+
+Note that the default patterns will still be made available - only if
+you use I<no_defaults>, or mention one of the default sets explicitly,
+the non mentioned defaults aren't made available.
+
+=head2 List of available patterns
+
+The patterns listed below are currently available. Each set of patterns
+has its own manual page describing the details. For each pattern set
+named I<name>, the manual page I<Regexp::Common::name> describes the
+details.
+
+Currently available are:
+
+=over 4
+
+=item Regexp::Common::balanced
+
+Provides regexes for strings with balanced parenthesized delimiters.
+
+=item Regexp::Common::comment
+
+Provides regexes for comments of various languages (43 languages
+currently).
+
+=item Regexp::Common::delimited
+
+Provides regexes for delimited strings.
+
+=item Regexp::Common::lingua
+
+Provides regexes for palindromes.
+
+=item Regexp::Common::list
+
+Provides regexes for lists.
+
+=item Regexp::Common::net
+
+Provides regexes for IPv4, IPv6, and MAC addresses.
+
+=item Regexp::Common::number
+
+Provides regexes for numbers (integers and reals).
+
+=item Regexp::Common::profanity
+
+Provides regexes for profanity.
+
+=item Regexp::Common::whitespace
+
+Provides regexes for leading and trailing whitespace.
+
+=item Regexp::Common::zip
+
+Provides regexes for zip codes.
+
+=back
+
+=head2 Forthcoming patterns and features
+
+Future releases of the module will also provide patterns for the following:
+
+        * email addresses 
+        * HTML/XML tags
+        * more numerical matchers,
+        * mail headers (including multiline ones),
+        * more URLS
+        * telephone numbers of various countries
+        * currency (universal 3 letter format, Latin-1, currency names)
+        * dates
+        * binary formats (e.g. UUencoded, MIMEd)
+
+If you have other patterns or pattern generators that you think would be
+generally useful, please send them to the maintainer -- preferably as source
+code using the C<pattern> subroutine. Submissions that include a set of
+tests will be especially welcome.
+
+
+=head1 DIAGNOSTICS
+
+=over 4
+
+=item C<Can't export unknown subroutine %s>
+
+The subroutine-based interface didn't recognize the requested subroutine.
+Often caused by a spelling mistake or an incompletely specified name.
+
+        
+=item C<Can't create unknown regex: $RE{...}>
+
+Regexp::Common doesn't have a generator for the requested pattern.
+Often indicates a misspelt or missing parameter.
+
+=item
+C<Perl %f does not support the pattern $RE{...}.
+You need Perl %f or later>
+
+The requested pattern requires advanced regex features (e.g. recursion)
+that not available in your version of Perl. Time to upgrade.
+
+=item C<< pattern() requires argument: name => [ @list ] >>
+
+Every user-defined pattern specification must have a name.
+
+=item C<< pattern() requires argument: create => $sub_ref_or_string >>
+
+Every user-defined pattern specification must provide a pattern creation
+mechanism: either a pattern string or a reference to a subroutine that
+returns the pattern string.
+
+=item C<Base must be between 1 and 36>
+
+The C<< $RE{num}{real}{-base=>'I<N>'} >> pattern uses the characters [0-9A-Z]
+to represent the digits of various bases. Hence it only produces
+regular expressions for bases up to hexatricensimal.
+
+=item C<Must specify delimiter in $RE{delimited}>
+
+The pattern has no default delimiter.
+You need to write: C<< $RE{delimited}{-delim=>I<X>'} >> for some character I<X>
+
+=back
+
+=head1 ACKNOWLEDGEMENTS
+
+Deepest thanks to the many people who have encouraged and contributed to this
+project, especially: Elijah, Jarkko, Tom, Nat, Ed, and Vivek.
+
+Further thanks go to: Alexandr Ciornii, Blair Zajac, Bob Stockdale,
+Charles Thomas, Chris Vertonghen, the CPAN Testers, David Hand,
+Fany, Geoffrey Leach, Hermann-Marcus Behrens, Jerome Quelin, Jim Cromie,
+Lars Wilke, Linda Julien, Mike Arms, Mike Castle, Mikko, Murat Uenalan,
+RafaE<235>l Garcia-Suarez, Ron Savage, Sam Vilain, Slaven Rezic, Smylers,
+Tim Maher, and all the others I've forgotten.
+
+=head1 AUTHOR
+
+Damian Conway (damian@conway.org)
+
+=head1 MAINTENANCE
+
+This package is maintained by Abigail S<(I<regexp-common@abigail.be>)>.
+
+=head1 BUGS AND IRRITATIONS
+
+Bound to be plenty.
+
+For a start, there are many common regexes missing.
+Send them in to I<regexp-common@abigail.be>.
+
+There are some POD issues when installing this module using a pre-5.6.0 perl;
+some manual pages may not install, or may not install correctly using a perl
+that is that old. You might consider upgrading your perl.
+
+=head1 NOT A BUG
+
+=over 4
+
+=item *
+
+The various patterns are not anchored. That is, a pattern like 
+C<< $RE {num} {int} >> will match against "abc4def", because a 
+substring of the subject matches. This is by design, and not a
+bug. If you want the pattern to be anchored, use something like:
+
+ my $integer = $RE {num} {int};
+ $subj =~ /^$integer$/ and print "Matches!\n";
+
+=back
+
+=head1 LICENSE and COPYRIGHT
+
+This software is Copyright (c) 2001 - 2017, Damian Conway and Abigail.
+
+This module is free software, and maybe used under any of the following
+licenses:
+
+ 1) The Perl Artistic License.     See the file COPYRIGHT.AL.
+ 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
+ 3) The BSD License.               See the file COPYRIGHT.BSD.
+ 4) The MIT License.               See the file COPYRIGHT.MIT.
 EOCommon
 # 2}}}
 $Regexp_Common_Contents{'Common/comment'} = <<'EOC';   # {{{2
 package Regexp::Common::comment;
 
-use Regexp::Common qw /pattern clean no_defaults/;
+use 5.10.0;
 
 use strict;
 use warnings;
+no  warnings 'syntax';
 
-use vars qw /$VERSION/;
-$VERSION = '2010010201';
+use Regexp::Common qw /pattern clean no_defaults/;
+
+our $VERSION = '2017060201';
 
 my @generic = (
     {languages => [qw /ABC Forth/],
@@ -8669,7 +14643,7 @@ my @generic = (
      from_to   => [['<?_c', '_c?>']],
     },
 
-    {languages => [qw /C++/, 'C#', qw /AspectJ Cg ECMAScript FPL Java JavaScript JSX Stylus/],
+    {languages => [qw /C++/, 'C#', qw /Cg ECMAScript FPL Java JavaScript/],
      to_eol    => ['//'],
      from_to   => [[qw {/* */}]]},
 
@@ -8703,7 +14677,7 @@ my @generic = (
 
     {languages => [qw /Oberon/],
      from_to   => [[qw /(* *)/]]},
-
+     
     {languages => [[qw /Pascal Delphi/], [qw /Pascal Free/], [qw /Pascal GPC/]],
      to_eol    => ['//'],
      from_to   => [[qw !{ }!], [qw !(* *)!]]},
@@ -8832,7 +14806,6 @@ foreach my $info (@plain_or_nested) {
                      exists $_ [1] -> {-keep} ? qr /($prefix$re)/
                                               : qr  /$prefix$re/
                 },
-            version => 5.006,
             ;
 }
 
@@ -8848,9 +14821,9 @@ foreach my $group (@generic) {
                 ;
     }
 }
+                
 
-
-
+    
 #
 # Other languages.
 #
@@ -8922,7 +14895,6 @@ pattern name    =>  [qw /comment Beatnik/],
                               $s  >= 5 && $s < 18})XXX|)}x;
             $re;
         },
-        version  => 5.008,
         ;
 }
 
@@ -8937,26 +14909,737 @@ pattern name    =>  [qw /comment Fortran fixed/],
 
 
 # http://www.csis.ul.ie/cobol/Course/COBOLIntro.htm
-# Traditionally, comments in COBOL were indicated with an asterisk in
+# Traditionally, comments in COBOL were indicated with an asteriks in
 # the seventh column. Modern compilers may be more lenient.
 pattern name    =>  [qw /comment COBOL/],
         create  =>  '(?<=^......)(?k:(?k:[*])(?k:[^\n]*)(?k:\n))',
-        version =>  '5.008',
         ;
 
 1;
+
+
+__END__
+
+=pod
+
+=head1 NAME
+
+Regexp::Common::comment -- provide regexes for comments.
+
+=head1 SYNOPSIS
+
+    use Regexp::Common qw /comment/;
+
+    while (<>) {
+        /$RE{comment}{C}/       and  print "Contains a C comment\n";
+        /$RE{comment}{C++}/     and  print "Contains a C++ comment\n";
+        /$RE{comment}{PHP}/     and  print "Contains a PHP comment\n";
+        /$RE{comment}{Java}/    and  print "Contains a Java comment\n";
+        /$RE{comment}{Perl}/    and  print "Contains a Perl comment\n";
+        /$RE{comment}{awk}/     and  print "Contains an awk comment\n";
+        /$RE{comment}{HTML}/    and  print "Contains an HTML comment\n";
+    }
+
+    use Regexp::Common qw /comment RE_comment_HTML/;
+
+    while (<>) {
+        $_ =~ RE_comment_HTML() and  print "Contains an HTML comment\n";
+    }
+
+=head1 DESCRIPTION
+
+Please consult the manual of L<Regexp::Common> for a general description
+of the works of this interface.
+
+Do not use this module directly, but load it via I<Regexp::Common>.
+
+This modules gives you regular expressions for comments in various
+languages.
+
+=head2 THE LANGUAGES
+
+Below, the comments of each of the languages are described.
+The patterns are available as C<$RE{comment}{I<LANG>}>, foreach
+language I<LANG>. Some languages have variants; it's described
+at the individual languages how to get the patterns for the variants.
+Unless mentioned otherwise,
+C<{-keep}> sets C<$1>, C<$2>, C<$3> and C<$4> to the entire comment,
+the opening marker, the content of the comment, and the closing marker
+(for many languages, the latter is a newline) respectively.
+
+=over 4
+
+=item ABC
+
+Comments in I<ABC> start with a backslash (C<\>), and last till
+the end of the line.
+See L<http://homepages.cwi.nl/%7Esteven/abc/>.
+
+=item Ada
+
+Comments in I<Ada> start with C<-->, and last till the end of the line.
+
+=item Advisor
+
+I<Advisor> is a language used by the HP product I<glance>. Comments for
+this language start with either C<#> or C<//>, and last till the
+end of the line.
+
+=item Advsys
+
+Comments for the I<Advsys> language start with C<;> and last till
+the end of the line. See also L<http://www.wurb.com/if/devsys/12>.
+
+=item Alan
+
+I<Alan> comments start with C<-->, and last till the end of the line.
+See also L<http://w1.132.telia.com/~u13207378/alan/manual/alanTOC.html>.
+
+=item Algol 60
+
+Comments in the I<Algol 60> language start with the keyword C<comment>,
+and end with a C<;>. See L<http://www.masswerk.at/algol60/report.htm>.
+
+=item Algol 68
+
+In I<Algol 68>, comments are either delimited by C<#>, or by one of the
+keywords C<co> or C<comment>. The keywords should not be part of another
+word. See L<http://westein.arb-phys.uni-dortmund.de/~wb/a68s.txt>.
+With C<{-keep}>, only C<$1> will be set, returning the entire comment.
+
+=item ALPACA
+
+The I<ALPACA> language has comments starting with C</*> and ending with C<*/>.
+
+=item awk
+
+The I<awk> programming language uses comments that start with C<#>
+and end at the end of the line.
+
+=item B
+
+The I<B> language has comments starting with C</*> and ending with C<*/>.
+
+=item BASIC
+
+There are various forms of BASIC around. Currently, we only support the
+variant supported by I<mvEnterprise>, whose pattern is available as
+C<$RE{comment}{BASIC}{mvEnterprise}>. Comments in this language start with a
+C<!>, a C<*> or the keyword C<REM>, and end till the end of the line. See
+L<http://www.rainingdata.com/products/beta/docs/mve/50/ReferenceManual/Basic.pdf>.
+
+=item Beatnik
+
+The esotoric language I<Beatnik> only uses words consisting of letters.
+Words are scored according to the rules of Scrabble. Words scoring less
+than 5 points, or 18 points or more are considered comments (although
+the compiler might mock at you if you score less than 5 points).
+Regardless whether C<{-keep}>, C<$1> will be set, and set to the
+entire comment. This pattern requires I<perl 5.8.0> or newer.
+
+=item beta-Juliet
+
+The I<beta-Juliet> programming language has comments that start with
+C<//> and that continue till the end of the line. See also
+L<http://www.catseye.mb.ca/esoteric/b-juliet/index.html>.
+
+=item Befunge-98
+
+The esotoric language I<Befunge-98> uses comments that start and end
+with a C<;>. See L<http://www.catseye.mb.ca/esoteric/befunge/98/spec98.html>.
+
+=item BML                 
+
+I<BML>, or I<Better Markup Language> is an HTML templating language that
+uses comments starting with C<< <?c_ >>, and ending with C<< c_?> >>.
+See L<http://www.livejournal.com/doc/server/bml.index.html>.               
+
+=item Brainfuck
+
+The minimal language I<Brainfuck> uses only eight characters, 
+C<E<lt>>, C<E<gt>>, C<[>, C<]>, C<+>, C<->, C<.> and C<,>.
+Any other characters are considered comments. With C<{-keep}>,
+C<$1> is set to the entire comment.
+
+=item C
+
+The I<C> language has comments starting with C</*> and ending with C<*/>.
+
+=item C--
+
+The I<C--> language has comments starting with C</*> and ending with C<*/>.
+See L<http://cs.uas.arizona.edu/classes/453/programs/C--Spec.html>.
+
+=item C++
+
+The I<C++> language has two forms of comments. Comments that start with
+C<//> and last till the end of the line, and comments that start with
+C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
+set, and set to the entire comment.
+
+=item C#
+
+The I<C#> language has two forms of comments. Comments that start with
+C<//> and last till the end of the line, and comments that start with
+C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
+set, and set to the entire comment.
+See L<http://msdn.microsoft.com/library/default.asp?url=/library/en-us/csspec/html/vclrfcsharpspec_C.asp>.
+
+=item Caml
+
+Comments in I<Caml> start with C<(*>, end with C<*)>, and can be nested.
+See L<http://www.cs.caltech.edu/courses/cs134/cs134b/book.pdf> and
+L<http://pauillac.inria.fr/caml/index-eng.html>.
+
+=item Cg
+
+The I<Cg> language has two forms of comments. Comments that start with
+C<//> and last till the end of the line, and comments that start with
+C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
+set, and set to the entire comment.
+See L<http://developer.nvidia.com/attach/3722>.
+
+=item CLU
+
+In C<CLU>, a comment starts with a procent sign (C<%>), and ends with the
+next newline. See L<ftp://ftp.lcs.mit.edu:/pub/pclu/CLU-syntax.ps> and
+L<http://www.pmg.lcs.mit.edu/CLU.html>.
+
+=item COBOL
+
+Traditionally, comments in I<COBOL> are indicated by an asteriks in the
+seventh column. This is what the pattern matches. Modern compiler may
+more lenient though. See L<http://www.csis.ul.ie/cobol/Course/COBOLIntro.htm>,
+and L<http://www.csis.ul.ie/cobol/default.htm>.
+
+=item CQL
+
+Comments in the chess query language (I<CQL>) start with a semi colon
+(C<;>) and last till the end of the line. See L<http://www.rbnn.com/cql/>.
+
+=item Crystal Report
+
+The formula editor in I<Crystal Reports> uses comments that start
+with C<//>, and end with the end of the line.
+
+=item Dylan
+
+There are two types of comments in I<Dylan>. They either start with
+C<//>, or are nested comments, delimited with C</*> and C<*/>.
+Under C<{-keep}>, only C<$1> will be set, returning the entire comment.
+This pattern requires I<perl 5.6.0> or newer.
+
+=item ECMAScript
+
+The I<ECMAScript> language has two forms of comments. Comments that start with
+C<//> and last till the end of the line, and comments that start with
+C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
+set, and set to the entire comment. I<JavaScript> is Netscapes implementation
+of I<ECMAScript>. See
+L<http://www.ecma-international.org/publications/files/ecma-st/Ecma-262.pdf>,
+and L<http://www.ecma-international.org/publications/standards/Ecma-262.htm>.
+
+=item Eiffel
+
+I<Eiffel> comments start with C<-->, and last till the end of the line.
+
+=item False
+
+In I<False>, comments start with C<{> and end with C<}>.
+See L<http://wouter.fov120.com/false/false.txt>
+
+=item FPL
+
+The I<FPL> language has two forms of comments. Comments that start with
+C<//> and last till the end of the line, and comments that start with
+C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
+set, and set to the entire comment.
+
+=item Forth
+
+Comments in Forth start with C<\>, and end with the end of the line.
+See also L<http://docs.sun.com/sb/doc/806-1377-10>.
+
+=item Fortran
+
+There are two forms of I<Fortran>. There's free form I<Fortran>, which
+has comments that start with C<!>, and end at the end of the line.
+The pattern for this is given by C<$RE{Fortran}>. Fixed form I<Fortran>,
+which has been obsoleted, has comments that start with C<C>, C<c> or
+C<*> in the first column, or with C<!> anywhere, but the sixth column.
+The pattern for this are given by C<$RE{Fortran}{fixed}>.
+
+See also L<http://www.cray.com/craydoc/manuals/007-3692-005/html-007-3692-005/>.
+
+=item Funge-98
+
+The esotoric language I<Funge-98> uses comments that start and end with
+a C<;>.
+
+=item fvwm2
+
+Configuration files for I<fvwm2> have comments starting with a
+C<#> and lasting the rest of the line.
+
+=item Haifu
+
+I<Haifu>, an esotoric language using haikus, has comments starting and
+ending with a C<,>.
+See L<http://www.dangermouse.net/esoteric/haifu.html>.
+
+=item Haskell
+
+There are two types of comments in I<Haskell>. They either start with
+at least two dashes, or are nested comments, delimited with C<{-> and C<-}>.
+Under C<{-keep}>, only C<$1> will be set, returning the entire comment.
+This pattern requires I<perl 5.6.0> or newer.
+
+=item HTML
+
+In I<HTML>, comments only appear inside a I<comment declaration>.
+A comment declaration starts with a C<E<lt>!>, and ends with a
+C<E<gt>>. Inside this declaration, we have zero or more comments.
+Comments starts with C<--> and end with C<-->, and are optionally
+followed by whitespace. The pattern C<$RE{comment}{HTML}> recognizes
+those comment declarations (and hence more than a comment).
+Note that this is not the same as something that starts with
+C<E<lt>!--> and ends with C<--E<gt>>, because the following will
+be matched completely:
+
+    <!--  First  Comment   --
+      --> Second Comment <!--
+      --  Third  Comment   -->
+
+Do not be fooled by what your favourite browser thinks is an HTML
+comment.
+
+If C<{-keep}> is used, the following are returned:
+
+=over 4
+
+=item $1
+
+captures the entire comment declaration.
+
+=item $2
+
+captures the MDO (markup declaration open), C<E<lt>!>.
+
+=item $3
+
+captures the content between the MDO and the MDC.
+
+=item $4
+
+captures the (last) comment, without the surrounding dashes.
+
+=item $5
+
+captures the MDC (markup declaration close), C<E<gt>>.
+
+=back
+
+=item Hugo
+
+There are two types of comments in I<Hugo>. They either start with
+C<!> (which cannot be followed by a C<\>), or are nested comments,
+delimited with C<!\> and C<\!>.
+Under C<{-keep}>, only C<$1> will be set, returning the entire comment.
+This pattern requires I<perl 5.6.0> or newer.
+
+=item Icon
+
+I<Icon> has comments that start with C<#> and end at the next new line.
+See L<http://www.toolsofcomputing.com/IconHandbook/IconHandbook.pdf>,
+L<http://www.cs.arizona.edu/icon/index.htm>, and
+L<http://burks.bton.ac.uk/burks/language/icon/index.htm>.
+
+=item ILLGOL
+
+The esotoric language I<ILLGOL> uses comments starting with I<NB> and lasting
+till the end of the line.
+See L<http://www.catseye.mb.ca/esoteric/illgol/index.html>.
+
+=item INTERCAL
+
+Comments in INTERCAL are single line comments. They start with one of
+the keywords C<NOT> or C<N'T>, and can optionally be preceded by the
+keywords C<DO> and C<PLEASE>. If both keywords are used, C<PLEASE>
+precedes C<DO>. Keywords are separated by whitespace.
+
+=item J
+
+The language I<J> uses comments that start with C<NB.>, and that last till
+the end of the line. See
+L<http://www.jsoftware.com/books/help/primer/contents.htm>, and
+L<http://www.jsoftware.com/>.
+
+=item Java
+
+The I<Java> language has two forms of comments. Comments that start with
+C<//> and last till the end of the line, and comments that start with
+C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
+set, and set to the entire comment.
+
+=item JavaDoc
+
+The I<Javadoc> documentation syntax is demarked with a subset of
+ordinary Java comments to separate it from code.  Comments start with
+C</**> end with C<*/>.  If C<{-keep}> is used, only C<$1> will be set,
+and set to the entire comment. See
+L<http://www.oracle.com/technetwork/java/javase/documentation/index-137868.html#format>.
+
+=item JavaScript
+
+The I<JavaScript> language has two forms of comments. Comments that start with
+C<//> and last till the end of the line, and comments that start with
+C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
+set, and set to the entire comment. I<JavaScript> is Netscapes implementation
+of I<ECMAScript>.
+See L<http://www.mozilla.org/js/language/E262-3.pdf>,
+and L<http://www.mozilla.org/js/language/>.
+
+=item LaTeX
+
+The documentation language I<LaTeX> uses comments starting with C<%>
+and ending at the end of the line.
+
+=item Lisp
+
+Comments in I<Lisp> start with a semi-colon (C<;>) and last till the
+end of the line.
+
+=item LPC
+
+The I<LPC> language has comments starting with C</*> and ending with C<*/>.
+
+=item LOGO
+
+Comments for the language I<LOGO> start with C<;>, and last till the end
+of the line.
+
+=item lua
+
+Comments for the I<lua> language start with C<-->, and last till the end
+of the line. See also L<http://www.lua.org/manual/manual.html>.
+
+=item M, MUMPS
+
+In C<M> (aka C<MUMPS>), comments start with a semi-colon, and last
+till the end of a line. The language specification requires the 
+semi-colon to be preceded by one or more I<linestart character>s.
+Those characters default to a space, but that's configurable. This
+requirement, of preceding the comment with linestart characters is
+B<not> tested for. See
+L<ftp://ftp.intersys.com/pub/openm/ism/ism64docs.zip>,
+L<http://mtechnology.intersys.com/mproducts/openm/index.html>, and
+L<http://mcenter.com/mtrc/index.html>.
+
+=item m4
+
+By default, the preprocessor language I<m4> uses single line comments,
+that start with a C<#> and continue to the end of the line, including
+the newline. The pattern C<$RE {comment} {m4}> matches such comments.
+In I<m4>, it is possible to change the starting token though.
+See L<http://wolfram.schneider.org/bsd/7thEdManVol2/m4/m4.pdf>,
+L<http://www.cs.stir.ac.uk/~kjt/research/pdf/expl-m4.pdf>, and
+L<http://www.gnu.org/software/m4/manual/>.
+
+=item Modula-2
+
+In C<Modula-2>, comments start with C<(*>, and end with C<*)>. Comments
+may be nested. See L<http://www.modula2.org/>.
+
+=item Modula-3
+
+In C<Modula-3>, comments start with C<(*>, and end with C<*)>. Comments
+may be nested. See L<http://www.m3.org/>.
+
+=item mutt
+
+Configuration files for I<mutt> have comments starting with a
+C<#> and lasting the rest of the line.
+
+=item Nickle
+
+The I<Nickle> language has one line comments starting with C<#>
+(like Perl), or multiline comments delimited by C</*> and C<*/>
+(like C). Under C<-keep>, only C<$1> will be set. See also
+L<http://www.nickle.org>.
+
+=item Oberon
+
+Comments in I<Oberon> start with C<(*> and end with C<*)>.
+See L<http://www.oberon.ethz.ch/oreport.html>.
+
+=item Pascal
+
+There are many implementations of Pascal. This modules provides
+pattern for comments of several implementations.
+
+=over 4
+
+=item C<$RE{comment}{Pascal}>
+
+This is the pattern that recognizes comments according to the Pascal ISO 
+standard. This standard says that comments start with either C<{>, or
+C<(*>, and end with C<}> or C<*)>. This means that C<{*)> and C<(*}>
+are considered to be comments. Many Pascal applications don't allow this.
+See L<http://www.pascal-central.com/docs/iso10206.txt>
+
+=item C<$RE{comment}{Pascal}{Alice}>
+
+The I<Alice Pascal> compiler accepts comments that start with C<{>
+and end with C<}>. Comments are not allowed to contain newlines.
+See L<http://www.templetons.com/brad/alice/language/>.
+
+=item C<$RE{comment}{Pascal}{Delphi}>, C<$RE{comment}{Pascal}{Free}>
+and C<$RE{comment}{Pascal}{GPC}>
+
+The I<Delphi Pascal>, I<Free Pascal> and the I<Gnu Pascal Compiler>
+implementations of Pascal all have comments that either start with
+C<//> and last till the end of the line, are delimited with C<{>
+and C<}> or are delimited with C<(*> and C<*)>. Patterns for those
+comments are given by C<$RE{comment}{Pascal}{Delphi}>, 
+C<$RE{comment}{Pascal}{Free}> and C<$RE{comment}{Pascal}{GPC}>
+respectively. These patterns only set C<$1> when C<{-keep}> is used,
+which will then include the entire comment.
+
+See L<http://info.borland.com/techpubs/delphi5/oplg/>, 
+L<http://www.freepascal.org/docs-html/ref/ref.html> and
+L<http://www.gnu-pascal.de/gpc/>.
+
+=item C<$RE{comment}{Pascal}{Workshop}>
+
+The I<Workshop Pascal> compiler, from SUN Microsystems, allows comments
+that are delimited with either C<{> and C<}>, delimited with
+C<(*)> and C<*>), delimited with C</*>, and C<*/>, or starting
+and ending with a double quote (C<">). When C<{-keep}> is used,
+only C<$1> is set, and returns the entire comment.
+
+See L<http://docs.sun.com/db/doc/802-5762>.
+
+=back
+
+=item PEARL
+
+Comments in I<PEARL> start with a C<!> and last till the end of the
+line, or start with C</*> and end with C<*/>. With C<{-keep}>, 
+C<$1> will be set to the entire comment.
+
+=item PHP
+
+Comments in I<PHP> start with either C<#> or C<//> and last till the
+end of the line, or are delimited by C</*> and C<*/>. With C<{-keep}>,
+C<$1> will be set to the entire comment.
+
+=item PL/B
+
+In I<PL/B>, comments start with either C<.> or C<;>, and end with the 
+next newline. See L<http://www.mmcctech.com/pl-b/plb-0010.htm>.
+
+=item PL/I
+
+The I<PL/I> language has comments starting with C</*> and ending with C<*/>.
+
+=item PL/SQL
+
+In I<PL/SQL>, comments either start with C<--> and run till the end
+of the line, or start with C</*> and end with C<*/>.
+
+=item Perl
+
+I<Perl> uses comments that start with a C<#>, and continue till the end
+of the line.
+
+=item Portia
+
+The I<Portia> programming language has comments that start with C<//>,
+and last till the end of the line.
+
+=item Python
+
+I<Python> uses comments that start with a C<#>, and continue till the end
+of the line.
+
+=item Q-BAL
+
+Comments in the I<Q-BAL> language start with C<`> (a backtick), and
+contine till the end of the line.
+
+=item QML
+
+In C<QML>, comments start with C<#> and last till the end of the line.
+See L<http://www.questionmark.com/uk/qml/overview.doc>.
+
+=item R
+
+The statistical language I<R> uses comments that start with a C<#> and
+end with the following new line. See L<http://www.r-project.org/>.
+
+=item REBOL
+
+Comments for the I<REBOL> language start with C<;> and last till the
+end of the line.
+
+=item Ruby
+
+Comments in I<Ruby> start with C<#> and last till the end of the time.
+
+=item Scheme
+
+I<Scheme> comments start with C<;>, and last till the end of the line.
+See L<http://schemers.org/>.
+
+=item shell
+
+Comments in various I<shell>s start with a C<#> and end at the end of
+the line.
+
+=item Shelta
+
+The esotoric language I<Shelta> uses comments that start and end with
+a C<;>. See L<http://www.catseye.mb.ca/esoteric/shelta/index.html>.
+
+=item SLIDE
+
+The I<SLIDE> language has two froms of comments. First there is the
+line comment, which starts with a C<#> and includes the rest of the
+line (just like Perl). Second, there is the multiline, nested comment,
+which are delimited by C<(*> and C<*)>. Under C{-keep}>, only 
+C<$1> is set, and is set to the entire comment. See
+L<http://www.cs.berkeley.edu/~ug/slide/docs/slide/spec/spec_frame_intro.shtml>.
+
+=item slrn
+
+Configuration files for I<slrn> have comments starting with a
+C<%> and lasting the rest of the line.
+
+=item Smalltalk
+
+I<Smalltalk> uses comments that start and end with a double quote, C<">.
+
+=item SMITH
+
+Comments in the I<SMITH> language start with C<;>, and last till the
+end of the line.
+
+=item Squeak
+
+In the Smalltalk variant I<Squeak>, comments start and end with
+C<">. Double quotes can appear inside comments by doubling them.
+
+=item SQL
+
+Standard I<SQL> uses comments starting with two or more dashes, and
+ending at the end of the line. 
+
+I<MySQL> does not follow the standard. Instead, it allows comments
+that start with a C<#> or C<-- > (that's two dashes and a space)
+ending with the following newline, and comments starting with 
+C</*>, and ending with the next C<;> or C<*/> that isn't inside
+single or double quotes. A pattern for this is returned by
+C<$RE{comment}{SQL}{MySQL}>. With C<{-keep}>, only C<$1> will
+be set, and it returns the entire comment.
+
+=item Tcl
+
+In I<Tcl>, comments start with C<#> and continue till the end of the line.
+
+=item TeX
+
+The documentation language I<TeX> uses comments starting with C<%>
+and ending at the end of the line.
+
+=item troff
+
+The document formatting language I<troff> uses comments starting
+with C<\">, and continuing till the end of the line.
+
+=item Ubercode
+
+The Windows programming language I<Ubercode> uses comments that start with
+C<//> and continue to the end of the line. See L<http://www.ubercode.com>.
+
+=item vi
+
+In configuration files for the editor I<vi>, one can use comments
+starting with C<">, and ending at the end of the line.
+
+=item *W
+
+In the language I<*W>, comments start with C<||>, and end with C<!!>.
+
+=item zonefile
+
+Comments in DNS I<zonefile>s start with C<;>, and continue till the
+end of the line.
+
+=item ZZT-OOP
+
+The in-game language I<ZZT-OOP> uses comments that start with a C<'> 
+character, and end at the following newline. See
+L<http://dave2.rocketjump.org/rad/zzthelp/lang.html>.
+
+=back
+
+=head1 REFERENCES
+
+=over 4
+
+=item B<[Go 90]>
+
+Charles F. Goldfarb: I<The SGML Handbook>. Oxford: Oxford University
+Press. B<1990>. ISBN 0-19-853737-9. Ch. 10.3, pp 390-391.
+
+=back
+
+=head1 SEE ALSO
+
+L<Regexp::Common> for a general description of how to use this interface.
+
+=head1 AUTHOR
+
+Damian Conway (damian@conway.org)
+
+=head1 MAINTENANCE
+
+This package is maintained by Abigail S<(I<regexp-common@abigail.be>)>.
+
+=head1 BUGS AND IRRITATIONS
+
+Bound to be plenty.
+
+For a start, there are many common regexes missing.
+Send them in to I<regexp-common@abigail.be>.
+
+=head1 LICENSE and COPYRIGHT
+
+This software is Copyright (c) 2001 - 2017, Damian Conway and Abigail.
+
+This module is free software, and maybe used under any of the following
+licenses:
+
+ 1) The Perl Artistic License.     See the file COPYRIGHT.AL.
+ 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
+ 3) The BSD License.               See the file COPYRIGHT.BSD.
+ 4) The MIT License.               See the file COPYRIGHT.MIT.
+
+=cut
 EOC
 # 2}}}
 $Regexp_Common_Contents{'Common/balanced'} = <<'EOB';   # {{{2
 package Regexp::Common::balanced; {
 
-use Regexp::Common qw /pattern clean no_defaults/;
+use 5.10.0;
 
 use strict;
 use warnings;
+no  warnings 'syntax';
 
-use vars qw /$VERSION/;
-$VERSION = '2013030901';
+use Regexp::Common qw /pattern clean no_defaults/;
+
+our $VERSION = '2017060201';
 
 my %closer = ( '{'=>'}', '('=>')', '['=>']', '<'=>'>' );
 my %cache;
@@ -9017,60 +15700,164 @@ pattern name    => [qw /balanced -parens=() -begin= -end=/],
             }
             return nested @$flag {qw /-begin -end/};
         },
-        version => 5.010,
         ;
 
 }
 
 1;
-# This software is Copyright (c) 2001 - 2013, Damian Conway and Abigail.
-#
-# This module is free software, and maybe used under any of the following
-# licenses:
-#
-#  1) The Perl Artistic License.     See the file COPYRIGHT.AL.
-#  2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
-#  3) The BSD Licence.               See the file COPYRIGHT.BSD.
-#  4) The MIT Licence.               See the file COPYRIGHT.MIT.
+
+__END__
+
+=pod
+
+=head1 NAME
+
+Regexp::Common::balanced -- provide regexes for strings with balanced
+parenthesized delimiters or arbitrary delimiters.
+
+=head1 SYNOPSIS
+
+    use Regexp::Common qw /balanced/;
+
+    while (<>) {
+        /$RE{balanced}{-parens=>'()'}/
+                                   and print q{balanced parentheses\n};
+    }
+
+
+=head1 DESCRIPTION
+
+Please consult the manual of L<Regexp::Common> for a general description
+of the works of this interface.
+
+Do not use this module directly, but load it via I<Regexp::Common>.
+
+=head2 C<$RE{balanced}{-parens}>
+
+Returns a pattern that matches a string that starts with the nominated
+opening parenthesis or bracket, contains characters and properly nested
+parenthesized subsequences, and ends in the matching parenthesis.
+
+More than one type of parenthesis can be specified:
+
+        $RE{balanced}{-parens=>'(){}'}
+
+in which case all specified parenthesis types must be correctly balanced within
+the string.
+
+Since version 2013030901, C<< $1 >> will always be set (to the entire
+matched substring), regardless whether C<< {-keep} >> is used or not.
+
+=head2 C<< $RE{balanced}{-begin => "begin"}{-end => "end"} >>
+
+Returns a pattern that matches a string that is properly balanced
+using the I<begin> and I<end> strings as start and end delimiters.
+Multiple sets of begin and end strings can be given by separating
+them by C<|>s (which can be escaped with a backslash).
+
+    qr/$RE{balanced}{-begin => "do|if|case"}{-end => "done|fi|esac"}/
+
+will match properly balanced strings that either start with I<do> and
+end with I<done>, start with I<if> and end with I<fi>, or start with
+I<case> and end with I<esac>.
+
+If I<-end> contains less cases than I<-begin>, the last case of I<-end>
+is repeated. If it contains more cases than I<-begin>, the extra cases
+are ignored. If either of I<-begin> or I<-end> isn't given, or is empty,
+I<< -begin => '(' >> and I<< -end => ')' >> are assumed.
+
+Since version 2013030901, C<< $1 >> will always be set (to the entire
+matched substring), regardless whether C<< {-keep} >> is used or not.
+
+=head2 Note
+
+Since version 2013030901 the pattern will make of the recursive construct
+C<< (?-1) >>, instead of using the problematic C<< (??{ }) >> construct.
+This fixes an problem that was introduced in the 5.17 development track.
+
+=head1 SEE ALSO
+
+L<Regexp::Common> for a general description of how to use this interface.
+
+=head1 AUTHOR
+
+Damian Conway (damian@conway.org)
+
+=head1 MAINTENANCE
+
+This package is maintained by Abigail S<(I<regexp-common@abigail.be>)>.
+
+=head1 BUGS AND IRRITATIONS
+
+Bound to be plenty.
+
+For a start, there are many common regexes missing.
+Send them in to I<regexp-common@abigail.be>.
+
+=head1 LICENSE and COPYRIGHT
+
+This software is Copyright (c) 2001 - 2017, Damian Conway and Abigail.
+
+This module is free software, and maybe used under any of the following
+licenses:
+
+ 1) The Perl Artistic License.     See the file COPYRIGHT.AL.
+ 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
+ 3) The BSD License.               See the file COPYRIGHT.BSD.
+ 4) The MIT License.               See the file COPYRIGHT.MIT.
+
+=cut
 EOB
 # 2}}}
-$Regexp_Common_Contents{'Common/delimited'} = <<'EOD';   # {{{3
+$Regexp_Common_Contents{'Common/delimited'} = <<'EOD';   # {{{2
 package Regexp::Common::delimited;
 
-use Regexp::Common qw /pattern clean no_defaults/;
+use 5.10.0;
 
 use strict;
 use warnings;
+no  warnings 'syntax';
 
-use vars qw /$VERSION/;
-$VERSION = '2010010201';
+use Regexp::Common qw /pattern clean no_defaults/;
+
+use charnames ':full';
+
+our $VERSION = '2017060201';
 
 sub gen_delimited {
 
-    my ($dels, $escs) = @_;
+    my ($dels, $escs, $cdels) = @_;
     # return '(?:\S*)' unless $dels =~ /\S/;
-    if (length $escs) {
-        $escs .= substr ($escs, -1) x (length ($dels) - length ($escs));
+    if (defined $escs && length $escs) {
+        $escs  .= substr  ($escs, -1) x (length ($dels) - length  ($escs));
     }
+    if (defined $cdels && length $cdels) {
+        $cdels .= substr ($cdels, -1) x (length ($dels) - length ($cdels));
+    }
+    else {
+        $cdels = $dels;
+    }
+
     my @pat = ();
-    my $i;
-    for ($i=0; $i < length $dels; $i++) {
-        my $del = quotemeta substr ($dels, $i, 1);
-        my $esc = length($escs) ? quotemeta substr ($escs, $i, 1) : "";
-        if ($del eq $esc) {
-            push @pat,
-                 "(?k:$del)(?k:[^$del]*(?:(?:$del$del)[^$del]*)*)(?k:$del)";
+    for (my $i = 0; $i < length $dels; $i ++) {
+        my $del  = quotemeta substr  ($dels, $i, 1);
+        my $cdel = quotemeta substr ($cdels, $i, 1);
+        my $esc  = defined $escs && length ($escs)
+                           ? quotemeta substr ($escs, $i, 1) : "";
+        if ($cdel eq $esc) {
+            push @pat =>
+                "(?k:$del)(?k:[^$cdel]*(?:(?:$cdel$cdel)[^$cdel]*)*)(?k:$cdel)";
         }
         elsif (length $esc) {
-            push @pat,
-                 "(?k:$del)(?k:[^$esc$del]*(?:$esc.[^$esc$del]*)*)(?k:$del)";
+            push @pat =>
+                "(?k:$del)(?k:[^$esc$cdel]*(?:$esc.[^$esc$cdel]*)*)(?k:$cdel)";
         }
         else {
-            push @pat, "(?k:$del)(?k:[^$del]*)(?k:$del)";
+            push @pat => "(?k:$del)(?k:[^$cdel]*)(?k:$cdel)";
         }
     }
     my $pat = join '|', @pat;
-    return "(?k:$pat)";
+    return "(?k:(?|$pat))";
 }
 
 sub _croak {
@@ -9078,31 +15865,320 @@ sub _croak {
     goto &Carp::croak;
 }
 
-pattern name   => [qw( delimited -delim= -esc=\\ )],
-        create => sub {my $flags = $_[1];
-                       _croak 'Must specify delimiter in $RE{delimited}'
-                             unless length $flags->{-delim};
-                       return gen_delimited (@{$flags}{-delim, -esc});
-                  },
+pattern name    => [qw( delimited -delim= -esc=\\ -cdelim= )],
+        create  => sub {my $flags = $_[1];
+                        _croak 'Must specify delimiter in $RE{delimited}'
+                              unless length $flags->{-delim};
+                        return gen_delimited (@{$flags}{-delim, -esc, -cdelim});
+                   },
         ;
 
-pattern name   => [qw( quoted -esc=\\ )],
-        create => sub {my $flags = $_[1];
-                       return gen_delimited (q{"'`}, $flags -> {-esc});
-                  },
+pattern name    => [qw( quoted -esc=\\ )],
+        create  => sub {my $flags = $_[1];
+                        return gen_delimited (q{"'`}, $flags -> {-esc});
+                   },
         ;
 
 
+my @bracket_pairs;
+if ($] >= 5.014) {
+    #
+    # List from http://xahlee.info/comp/unicode_matching_brackets.html
+    #
+    @bracket_pairs =
+        map {ref $_ ? $_ :
+                /!/ ? [(do {my $x = $_; $x =~ s/!/TOP/;    $x},
+                        do {my $x = $_; $x =~ s/!/BOTTOM/; $x})]
+                    : [(do {my $x = $_; $x =~ s/\?/LEFT/;  $x},
+                        do {my $x = $_; $x =~ s/\?/RIGHT/; $x})]}
+            "? PARENTHESIS",
+            "? SQUARE BRACKET",
+            "? CURLY BRACKET",
+            "? DOUBLE QUOTATION MARK",
+            "? SINGLE QUOTATION MARK",
+            "SINGLE ?-POINTING ANGLE QUOTATION MARK",
+            "?-POINTING DOUBLE ANGLE QUOTATION MARK",
+            "FULLWIDTH ? PARENTHESIS",
+            "FULLWIDTH ? SQUARE BRACKET",
+            "FULLWIDTH ? CURLY BRACKET",
+            "FULLWIDTH ? WHITE PARENTHESIS",
+            "? WHITE PARENTHESIS",
+            "? WHITE SQUARE BRACKET",
+            "? WHITE CURLY BRACKET",
+            "? CORNER BRACKET",
+            "? ANGLE BRACKET",
+            "? DOUBLE ANGLE BRACKET",
+            "? BLACK LENTICULAR BRACKET",
+            "? TORTOISE SHELL BRACKET",
+            "? BLACK TORTOISE SHELL BRACKET",
+            "? WHITE CORNER BRACKET",
+            "? WHITE LENTICULAR BRACKET",
+            "? WHITE TORTOISE SHELL BRACKET",
+            "HALFWIDTH ? CORNER BRACKET",
+            "MATHEMATICAL ? WHITE SQUARE BRACKET",
+            "MATHEMATICAL ? ANGLE BRACKET",
+            "MATHEMATICAL ? DOUBLE ANGLE BRACKET",
+            "MATHEMATICAL ? FLATTENED PARENTHESIS",
+            "MATHEMATICAL ? WHITE TORTOISE SHELL BRACKET",
+            "? CEILING",
+            "? FLOOR",
+            "Z NOTATION ? IMAGE BRACKET",
+            "Z NOTATION ? BINDING BRACKET",
+            [   "HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT",
+                "HEAVY SINGLE " .   "COMMA QUOTATION MARK ORNAMENT", ],
+            [   "HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT",
+                "HEAVY DOUBLE " .   "COMMA QUOTATION MARK ORNAMENT", ],
+            "MEDIUM ? PARENTHESIS ORNAMENT",
+            "MEDIUM FLATTENED ? PARENTHESIS ORNAMENT",
+            "MEDIUM ? CURLY BRACKET ORNAMENT",
+            "MEDIUM ?-POINTING ANGLE BRACKET ORNAMENT",
+            "HEAVY ?-POINTING ANGLE QUOTATION MARK ORNAMENT",
+            "HEAVY ?-POINTING ANGLE BRACKET ORNAMENT",
+            "LIGHT ? TORTOISE SHELL BRACKET ORNAMENT",
+            "ORNATE ? PARENTHESIS",
+            "! PARENTHESIS",
+            "! SQUARE BRACKET",
+            "! CURLY BRACKET",
+            "! TORTOISE SHELL BRACKET",
+            "PRESENTATION FORM FOR VERTICAL ? CORNER BRACKET",
+            "PRESENTATION FORM FOR VERTICAL ? WHITE CORNER BRACKET",
+            "PRESENTATION FORM FOR VERTICAL ? TORTOISE SHELL BRACKET",
+            "PRESENTATION FORM FOR VERTICAL ? BLACK LENTICULAR BRACKET",
+            "PRESENTATION FORM FOR VERTICAL ? WHITE LENTICULAR BRACKET",
+            "PRESENTATION FORM FOR VERTICAL ? ANGLE BRACKET",
+            "PRESENTATION FORM FOR VERTICAL ? DOUBLE ANGLE BRACKET",
+            "PRESENTATION FORM FOR VERTICAL ? SQUARE BRACKET",
+            "PRESENTATION FORM FOR VERTICAL ? CURLY BRACKET",
+            "?-POINTING ANGLE BRACKET",
+            "? ANGLE BRACKET WITH DOT",
+            "?-POINTING CURVED ANGLE BRACKET",
+            "SMALL ? PARENTHESIS",
+            "SMALL ? CURLY BRACKET",
+            "SMALL ? TORTOISE SHELL BRACKET",
+            "SUPERSCRIPT ? PARENTHESIS",
+            "SUBSCRIPT ? PARENTHESIS",
+            "? SQUARE BRACKET WITH UNDERBAR",
+            [    "LEFT SQUARE BRACKET WITH TICK IN TOP CORNER",
+                "RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER", ],
+            [    "LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER",
+                "RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER", ],
+            "? SQUARE BRACKET WITH QUILL",
+            "TOP ? HALF BRACKET",
+            "BOTTOM ? HALF BRACKET",
+            "? S-SHAPED BAG DELIMITER",
+            [    "LEFT ARC LESS-THAN BRACKET",
+                "RIGHT ARC GREATER-THAN BRACKET",  ],
+            [    "DOUBLE LEFT ARC GREATER-THAN BRACKET",
+                "DOUBLE RIGHT ARC LESS-THAN BRACKET",  ],
+            "? SIDEWAYS U BRACKET",
+            "? DOUBLE PARENTHESIS",
+            "? WIGGLY FENCE",
+            "? DOUBLE WIGGLY FENCE",
+            "? LOW PARAPHRASE BRACKET",
+            "? RAISED OMISSION BRACKET",
+            "? SUBSTITUTION BRACKET",
+            "? DOTTED SUBSTITUTION BRACKET",
+            "? TRANSPOSITION BRACKET",
+            [   "OGHAM FEATHER MARK",
+                "OGHAM REVERSED FEATHER MARK",  ],
+            [   "TIBETAN MARK GUG RTAGS GYON",
+                "TIBETAN MARK GUG RTAGS GYAS",  ],
+            [   "TIBETAN MARK ANG KHANG GYON",
+                "TIBETAN MARK ANG KHANG GYAS",  ],
+    ;
+
+    #
+    # Filter out unknown characters; this may run on an older version
+    # of Perl with an old version of Unicode.
+    #
+    @bracket_pairs = grep {defined charnames::string_vianame ($$_ [0]) &&
+                           defined charnames::string_vianame ($$_ [1])}
+                     @bracket_pairs;
+
+    if (@bracket_pairs) {
+        my  $delims = join "" => map {charnames::string_vianame ($$_ [0])}
+                                     @bracket_pairs;
+        my $cdelims = join "" => map {charnames::string_vianame ($$_ [1])}
+                                     @bracket_pairs;
+
+        pattern name   => [qw (bquoted -esc=\\)],
+                create => sub {my $flags = $_ [1];
+                               return gen_delimited ($delims, $flags -> {-esc},
+                                                    $cdelims);
+                          },
+                version => 5.014,
+                ;
+    }
+}
+
+
+#
+# Return the Unicode names of the pairs of matching delimiters.
+#
+sub bracket_pairs {@bracket_pairs}
+
 1;
-# This software is Copyright (c) 2001 - 2009, Damian Conway and Abigail.
-#
-# This module is free software, and maybe used under any of the following
-# licenses:
-#
-#  1) The Perl Artistic License.     See the file COPYRIGHT.AL.
-#  2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
-#  3) The BSD Licence.               See the file COPYRIGHT.BSD.
-#  4) The MIT Licence.               See the file COPYRIGHT.MIT.
+
+__END__
+
+=pod
+
+=head1 NAME
+
+Regexp::Common::delimited -- provides a regex for delimited strings
+
+=head1 SYNOPSIS
+
+    use Regexp::Common qw /delimited/;
+
+    while (<>) {
+        /$RE{delimited}{-delim=>'"'}/  and print 'a \" delimited string';
+        /$RE{delimited}{-delim=>'/'}/  and print 'a \/ delimited string';
+    }
+
+
+=head1 DESCRIPTION
+
+Please consult the manual of L<Regexp::Common> for a general description
+of the works of this interface.
+
+Do not use this module directly, but load it via I<Regexp::Common>.
+
+=head2 C<$RE{delimited}{-delim}{-cdelim}{-esc}>
+
+Returns a pattern that matches a single-character-delimited substring,
+with optional internal escaping of the delimiter.
+
+When C<-delim => I<S>> is specified, each character in the sequence I<S> is
+a possible delimiter. There is no default delimiter, so this flag must always
+be specified.
+
+By default, the closing delimiter is the same character as the opening
+delimiter. If this is not wanted, for instance, if you want to match
+a string with symmetric delimiters, you can specify the closing delimiter(s)
+with C<-cdelim => I<S>>. Each character in I<S> is matched with the
+corresponding character supplied with the C<-delim> option. If the C<-cdelim>
+option has less characters than the C<-delim> option, the last character
+is repeated as often as necessary. If the C<-cdelim> option has more 
+characters than the C<-delim> option, the extra characters are ignored.
+
+If C<-esc => I<S>> is specified, each character in the sequence I<S> is
+the delimiter for the corresponding character in the C<-delim=I<S>> list.
+The default escape is backslash.
+
+For example:
+
+   $RE{delimited}{-delim=>'"'}               # match "a \" delimited string"
+   $RE{delimited}{-delim=>'"'}{-esc=>'"'}    # match "a "" delimited string"
+   $RE{delimited}{-delim=>'/'}               # match /a \/ delimited string/
+   $RE{delimited}{-delim=>q{'"}}             # match "string" or 'string'
+   $RE{delimited}{-delim=>"("}{-cdelim=>")"} # match (string)
+
+Under C<-keep> (See L<Regexp::Common>):
+
+=over 4
+
+=item $1
+
+captures the entire match
+
+=item $2
+
+captures the opening delimiter
+
+=item $3
+
+captures delimited portion of the string
+
+=item $4
+
+captures the closing delimiter
+
+=back
+
+=head2 $RE{quoted}{-esc}
+
+A synonym for C<< $RE {delimited} {-delim => q {'"`}} {...} >>.
+
+=head2 $RE {bquoted} {-esc}
+
+This is a pattern which matches delimited strings, where the delimiters
+are a set of matching brackets. Currently, this comes 85 pairs. This
+includes the 60 pairs of bidirection paired brackets, as listed
+in L<< http://www.unicode.org/Public/UNIDATA/BidiBrackets.txt >>.
+
+The other 25 pairs are the quotation marks, the double quotation
+marks, the single and double pointing quoation marks, the heavy
+single and double commas, 4 pairs of top-bottom parenthesis and
+brackets, 9 pairs of presentation form for vertical brackets,
+and the low paraphrase, raised omission, substitution, double
+substitution, and transposition brackets.
+
+In a future update, pairs may be added (or deleted).
+
+This pattern requires perl 5.14.0 or higher.
+
+For a full list of bracket pairs, inspect the output of 
+C<< Regexp::Common::delimited::bracket_pair () >>, which returns
+a list of two element arrays, each holding the Unicode names of
+matching pair of delimiters.
+
+The C<< {-esc => I<S> } >> works as in the C<< $RE {delimited} >> pattern.
+
+If C<< {-keep} >> is given, the following things will be captured:
+
+=over 4
+
+=item $1
+
+captures the entire match
+
+=item $2
+
+captures the opening delimiter
+
+=item $3
+
+captures delimited portion of the string
+
+=item $4
+
+captures the closing delimiter
+
+=back
+
+=head1 SEE ALSO
+
+L<Regexp::Common> for a general description of how to use this interface.
+
+=head1 AUTHOR
+
+Damian Conway (damian@conway.org)
+
+=head1 MAINTENANCE
+
+This package is maintained by Abigail S<(I<regexp-common@abigail.be>)>.
+
+=head1 BUGS AND IRRITATIONS
+
+Bound to be plenty.
+
+For a start, there are many common regexes missing.
+Send them in to I<regexp-common@abigail.be>.
+
+=head1 LICENSE and COPYRIGHT
+
+This software is Copyright (c) 2001 - 2017, Damian Conway and Abigail.
+
+This module is free software, and maybe used under any of the following
+licenses:
+
+ 1) The Perl Artistic License.     See the file COPYRIGHT.AL.
+ 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
+ 3) The BSD License.               See the file COPYRIGHT.BSD.
+ 4) The MIT License.               See the file COPYRIGHT.MIT.
+
+=cut
 EOD
 # 2}}}
     my $problems        = 0;
@@ -10004,2568 +17080,6 @@ EOAlgDiff
     eval "use Algorithm::Diff qw / sdiff /";
     $HAVE_Algorith_Diff = 1 unless $problems;
 } # 1}}}
-sub pre_post_fix {                           # {{{1
-    # Return the input lines prefixed and postfixed
-    # by the given strings.
-    my ($ra_lines, $prefix, $postfix ) = @_;
-    print "-> pre_post_fix with $prefix, $postfix\n" if $opt_v > 2;
-
-    my $all_lines = $prefix . join(""  , @{$ra_lines}) . $postfix;
-
-    print "<- pre_post_fix\n" if $opt_v > 2;
-    return split("\n", $all_lines);
-} # 1}}}
-sub rm_last_line {                           # {{{1
-    # Return all but the last line.
-    my ($ra_lines, ) = @_;
-    print "-> rm_last_line\n" if $opt_v > 2;
-    print "<- rm_last_line\n" if $opt_v > 2;
-    my $n = scalar(@{$ra_lines}) - 2;
-    return @{$ra_lines}[0..$n];
-} # 1}}}
-sub call_regexp_common {                     # {{{1
-    my ($ra_lines, $language ) = @_;
-    print "-> call_regexp_common for $language\n" if $opt_v > 2;
-
-    Install_Regexp_Common() unless $HAVE_Rexexp_Common;
-
-    my $all_lines = undef;
-    if ($language eq "C++") { # Regexp::Common's C++ comment regex is multi-line
-#       $all_lines = join("\n", @{$ra_lines});
-        $all_lines = "";
-        foreach (@{$ra_lines}) {
-            if (m/\\$/) {  # line ends with a continuation marker
-                $all_lines .= $_;
-            } else {
-                $all_lines .= "$_\n";
-            }
-        }
-    } else {
-        $all_lines = join(""  , @{$ra_lines});
-    }
-
-    no strict 'vars';
-    # otherwise get:
-    #  Global symbol "%RE" requires explicit package name at cloc line xx.
-    if ($all_lines =~ $RE{comment}{$language}) {
-        # Suppress "Use of uninitialized value in regexp compilation" that
-        # pops up when $1 is undefined--happens if there's a bug in the $RE
-        # This Pascal comment will trigger it:
-        #         (* This is { another } test. **)
-        # Curiously, testing for "defined $1" breaks the substitution.
-        no warnings;
-        # Remove comments.
-        $all_lines =~ s/$1//g;
-    }
-    # a bogus use of %RE to avoid:
-    # Name "main::RE" used only once: possible typo at cloc line xx.
-    print scalar keys %RE if $opt_v < -20;
-    print "<- call_regexp_common\n" if $opt_v > 2;
-    return split("\n", $all_lines);
-} # 1}}}
-sub plural_form {                            # {{{1
-    # For getting the right plural form on some English nouns.
-    my $n = shift @_;
-    if ($n == 1) { return ( 1, "" ); }
-    else         { return ($n, "s"); }
-} # 1}}}
-sub matlab_or_objective_C {                  # {{{1
-    # Decide if code is MATLAB, Mathematica, Objective C, MUMPS, or Mercury
-    my ($file        , # in
-        $rh_Err      , # in   hash of error codes
-        $raa_errors  , # out
-        $rs_language , # out
-       ) = @_;
-    print "-> matlab_or_objective_C\n" if $opt_v > 2;
-    # matlab markers:
-    #   first line starts with "function"
-    #   some lines start with "%"
-    #   high marks for lines that start with [
-    #
-    # Objective C markers:
-    #   must have at least two brace characters, { }
-    #   has /* ... */ style comments
-    #   some lines start with @
-    #   some lines start with #include
-    #
-    # MUMPS:
-    #   has ; comment markers
-    #   do not match:  \w+\s*=\s*\w
-    #   lines begin with   \s*\.?\w+\s+\w
-    #   high marks for lines that start with \s*K\s+ or \s*Kill\s+
-    #
-    # Mercury:
-    #   any line that begins with :- immediately triggers this
-    #
-    # Mathematica:
-    #   (* .. *)
-    #   BeginPackage
-
-    ${$rs_language} = "";
-    my $IN = new IO::File $file, "r";
-    if (!defined $IN) {
-        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
-        return;
-    }
-
-    my $DEBUG              = 0;
-
-    my $matlab_points      = 0;
-    my $mathematica_points = 0;
-    my $objective_C_points = 0;
-    my $mumps_points       = 0;
-    my $mercury_points     = 0;
-    my $has_braces         = 0;
-    while (<$IN>) {
-        ++$has_braces if $_ =~ m/[{}]/;
-#print "LINE $. has_braces=$has_braces\n";
-        ++$mumps_points if $. == 1 and m{^[A-Z]};
-        if      (m{^\s*/\*} or m {^\s*//}) {   #   /* or //
-            $objective_C_points += 5;
-            $matlab_points      -= 5;
-printf ".m:  /*|//  obj C=% 2d  matlab=% 2d  mathematica=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
-        } elsif (m{^:-\s+}) {      # gotta be mercury
-            $mercury_points = 1000;
-            last;
-        } elsif (m{\w+\s*=\s*\[}) {      # matrix assignment, very matlab
-            $matlab_points += 5;
-        }
-        if (m{\w+\[}) {      # function call by []
-            $mathematica_points += 2;
-printf ".m:  \\w=[   obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
-        } elsif (m{^\s*\w+\s*=\s*}) {    # definitely not MUMPS
-            --$mumps_points;
-printf ".m:  \\w=    obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
-        } elsif (m{^\s*\.?(\w)\s+(\w)} and $1 !~ /\d/ and $2 !~ /\d/) {
-            ++$mumps_points;
-printf ".m:  \\w \\w  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
-        } elsif (m{^\s*;}) {
-            ++$mumps_points;
-printf ".m:  ;      obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
-        }
-        if (m{^\s*#(include|import)}) {
-            # Objective C without a doubt
-            $objective_C_points = 1000;
-            $matlab_points      = 0;
-printf ".m: #includ obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
-            $has_braces         = 2;
-            last;
-        } elsif (m{^\s*@(interface|implementation|protocol|public|protected|private|end)\s}o) {
-            # Objective C without a doubt
-            $objective_C_points = 1000;
-            $matlab_points      = 0;
-printf ".m: keyword obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
-            last;
-        } elsif (m{^\s*BeginPackage}) {
-            $mathematica_points += 2;
-        } elsif (m{^\s*\[}) {             #   line starts with [  -- very matlab
-            $matlab_points += 5;
-printf ".m:  [      obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
-        } elsif (m{^\sK(ill)?\s+}) {
-            $mumps_points  += 5;
-printf ".m:  Kill   obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
-        } elsif (m{^\s*function}) {
-            --$objective_C_points;
-            ++$matlab_points;
-printf ".m:  funct  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
-        } elsif (m{^\s*%}) {              #   %
-            # matlab commented line
-            --$objective_C_points;
-            ++$matlab_points;
-printf ".m:  pcent  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
-        }
-    }
-    $IN->close;
-printf "END LOOP    obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
-
-    # next heuristic is unreliable for small files
-#   $objective_C_points = -9.9e20 unless $has_braces >= 2;
-
-    my %points = ( 'MATLAB'      => $matlab_points     ,
-                   'Mathematica' => $mathematica_points     ,
-                   'MUMPS'       => $mumps_points      ,
-                   'Objective C' => $objective_C_points,
-                   'Mercury'     => $mercury_points    , );
-
-    ${$rs_language} = (sort { $points{$b} <=> $points{$a} or $a cmp $b } keys %points)[0];
-
-    print "<- matlab_or_objective_C($file: matlab=$matlab_points, mathematica=$mathematica_points, C=$objective_C_points, mumps=$mumps_points, mercury=$mercury_points) => ${$rs_language}\n"
-        if $opt_v > 2;
-
-} # 1}}}
-sub Lisp_or_OpenCL {                         # {{{1
-    my ($file        , # in
-        $rh_Err      , # in   hash of error codes
-        $raa_errors  , # out
-       ) = @_;
-
-    print "-> Lisp_or_OpenCL\n" if $opt_v > 2;
-
-    my $lang = undef;
-    my $IN = new IO::File $file, "r";
-    if (!defined $IN) {
-        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
-        return $lang;
-    }
-    my $lisp_points   = 0;
-    my $opcl_points = 0;
-    while (<$IN>) {
-        ++$lisp_points if  /^\s*;/;
-        ++$lisp_points if  /\((def|eval|require|export|let|loop|dec|format)/;
-        ++$opcl_points if  /^\s*(int|float|const|{)/;
-    }
-    $IN->close;
-    # print "lisp_points=$lisp_points   opcl_points=$opcl_points\n";
-    if ($lisp_points > $opcl_points) {
-        $lang = "Lisp";
-    } else {
-        $lang = "OpenCL";
-    }
-
-    print "<- Lisp_or_OpenCL\n" if $opt_v > 2;
-    return $lang;
-} # 1}}}
-sub Lisp_or_Julia {                          # {{{1
-    my ($file        , # in
-        $rh_Err      , # in   hash of error codes
-        $raa_errors  , # out
-       ) = @_;
-
-    print "-> Lisp_or_Julia\n" if $opt_v > 2;
-
-    my $lang = undef;
-    my $IN = new IO::File $file, "r";
-    if (!defined $IN) {
-        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
-        return $lang;
-    }
-    my $lisp_points   = 0;
-    my $julia_points = 0;
-    while (<$IN>) {
-        ++$lisp_points if  /^\s*;/;
-        ++$lisp_points if  /\((def|eval|require|export|let|loop|dec|format)/;
-        ++$julia_points if  /^\s*(function|end|println|for|while)/;
-    }
-    $IN->close;
-    # print "lisp_points=$lisp_points   julia_points=$julia_points\n";
-    if ($lisp_points > $julia_points) {
-        $lang = "Lisp";
-    } else {
-        $lang = "Julia";
-    }
-
-    print "<- Lisp_or_Julia\n" if $opt_v > 2;
-    return $lang;
-} # 1}}}
-sub Perl_or_Prolog {                         # {{{1
-    my ($file        , # in
-        $rh_Err      , # in   hash of error codes
-        $raa_errors  , # out
-       ) = @_;
-
-    print "-> Perl_or_Prolog\n" if $opt_v > 2;
-
-    my $lang = undef;
-    my $IN = new IO::File $file, "r";
-    if (!defined $IN) {
-        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
-        return $lang;
-    }
-    my $perl_points = 0;
-    my $prolog_points = 0;
-    while (<$IN>) {
-        ++$perl_points if  /;\s*$/;
-        ++$perl_points if  /({|})/;
-        ++$perl_points if  /^\s*sub\s+/;
-        ++$perl_points if  /\s*<<'/;  # start HERE block
-        ++$perl_points if  /\$(\w+\->|[_!])/;
-        ++$prolog_points if !/\s*#/ and /\.\s*$/;
-        ++$prolog_points if /:-/;
-    }
-    $IN->close;
-    # print "perl_points=$perl_points   prolog_points=$prolog_points\n";
-    if ($perl_points > $prolog_points) {
-        $lang = "Perl";
-    } else {
-        $lang = "Prolog";
-    }
-
-    printf "<- Perl_or_Prolog(%s, Perl=%d Prolog=%d)\n",
-        $file, $perl_points, $prolog_points if $opt_v > 2;
-    return $lang;
-} # 1}}}
-sub IDL_or_QtProject {                       # {{{1
-    # IDL, QtProject, Prolog, or ProGuard
-    my ($file        , # in
-        $rh_Err      , # in   hash of error codes
-        $raa_errors  , # out
-       ) = @_;
-
-    print "-> IDL_or_QtProject($file)\n" if $opt_v > 2;
-
-    my $lang = undef;
-    my $IN = new IO::File $file, "r";
-    if (!defined $IN) {
-        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
-        return $lang;
-    }
-    my $idl_points      = 0;
-    my $qtproj_points   = 0;
-    my $prolog_points   = 0;
-    my $proguard_points = 0;
-    while (<$IN>) {
-        ++$idl_points      if /^\s*;/;
-        ++$idl_points      if /plot\(/i;
-        ++$qtproj_points   if /^\s*(qt|configs|sources|template|target|targetpath|subdirs)\b/i;
-        ++$qtproj_points   if /qthavemodule/i;
-        ++$prolog_points   if /\.\s*$/;
-        ++$prolog_points   if /:-/;
-        ++$proguard_points if /^\s*#/;
-        ++$proguard_points if /^-keep/;
-        ++$proguard_points if /^-(dont)?obfuscate/;
-    }
-    $IN->close;
-    # print "idl_points=$idl_points   qtproj_points=$qtproj_points\n";
-
-####if ($idl_points > $qtproj_points) {
-####    $lang = "IDL";
-####} else {
-####    $lang = "Qt Project";
-####}
-
-    my %points = ( 'IDL'        => $idl_points       ,
-                   'Qt Project' => $qtproj_points    ,
-                   'Prolog'     => $prolog_points    ,
-                   'ProGuard'   => $proguard_points  ,
-                 );
-
-    $lang = (sort { $points{$b} <=> $points{$a} or $a cmp $b} keys %points)[0];
-
-    print "<- IDL_or_QtProject(idl_points=$idl_points, ",
-          "qtproj_points=$qtproj_points, prolog_points=$prolog_points, ",
-          "proguard_points=$proguard_points)\n"
-           if $opt_v > 2;
-    return $lang;
-} # 1}}}
-sub Ant_or_XML {                             # {{{1
-    my ($file        , # in
-        $rh_Err      , # in   hash of error codes
-        $raa_errors  , # out
-       ) = @_;
-
-    print "-> Ant_or_XML($file)\n" if $opt_v > 2;
-
-    my $lang = "XML";
-    my $IN = new IO::File $file, "r";
-    if (!defined $IN) {
-        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
-        return $lang;
-    }
-    my $Ant_points   = 0;
-    my $XML_points   = 1;
-    while (<$IN>) {
-        if (/^\s*<project\s+/) {
-            ++$Ant_points  ;
-            --$XML_points  ;
-        }
-        if (/xmlns:artifact="antlib:org.apache.maven.artifact.ant"/) {
-            ++$Ant_points  ;
-            --$XML_points  ;
-        }
-    }
-    $IN->close;
-
-    if ($XML_points >= $Ant_points) {
-        # tie or better goes to XML
-        $lang = "XML";
-    } else {
-        $lang = "Ant";
-    }
-
-    print "<- Ant_or_XML($lang)\n" if $opt_v > 2;
-    return $lang;
-} # 1}}}
-sub Maven_or_XML {                           # {{{1
-    my ($file        , # in
-        $rh_Err      , # in   hash of error codes
-        $raa_errors  , # out
-       ) = @_;
-
-    print "-> Maven_or_XML($file)\n" if $opt_v > 2;
-
-    my $lang = "XML";
-    my $IN = new IO::File $file, "r";
-    if (!defined $IN) {
-        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
-        return $lang;
-    }
-    my $Mvn_points   = 0;
-    my $XML_points   = 1;
-    while (<$IN>) {
-        if (/^\s*<project\s+/) {
-            ++$Mvn_points  ;
-            --$XML_points  ;
-        }
-        if (m{xmlns="http://maven.apache.org/POM/}) {
-            ++$Mvn_points  ;
-            --$XML_points  ;
-        }
-    }
-    $IN->close;
-
-    if ($XML_points >= $Mvn_points) {
-        # tie or better goes to XML
-        $lang = "XML";
-    } else {
-        $lang = "Maven";
-    }
-
-    print "<- Maven_or_XML($lang)\n" if $opt_v > 2;
-    return $lang;
-} # 1}}}
-sub pascal_or_puppet {                       # {{{1
-    # Decide if code is Pascal or Puppet manifest
-    my ($file        , # in
-        $rh_Err      , # in   hash of error codes
-        $raa_errors  , # out
-        $rs_language , # out
-       ) = @_;
-
-    print "-> pascal_or_puppet\n" if $opt_v > 2;
-
-    ${$rs_language} = "";
-    my $IN = new IO::File $file, "r";
-    if (!defined $IN) {
-        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
-        return;
-    }
-
-    my $DEBUG              = 0;
-    my $pascal_points      = 0;
-    my $puppet_points      = 0;
-
-    while (<$IN>) {
-
-        if ( /^\s*\#\s+/ ) {
-                $puppet_points += .001;
-                next;
-        }
-
-        ++$pascal_points if /\bprogram\s+[A-Za-z]/i;
-        ++$pascal_points if /\bunit\s+[A-Za-z]/i;
-        ++$pascal_points if /\bmodule\s+[A-Za-z]/i;
-        ++$pascal_points if /\bprocedure\b/i;
-        ++$pascal_points if /\bfunction\b/i;
-        ++$pascal_points if /^\s*interface\s+/i;
-        ++$pascal_points if /^\s*implementation\s+/i;
-        ++$pascal_points if /^\s*uses\s+/i;
-        ++$pascal_points if /(?<!\:\:)\bbegin\b(?!\:\:)/i;
-        ++$pascal_points if /(?<!\:\:)\bend\b(?!\:\:)/i;
-        ++$pascal_points if /\:\=/;
-        ++$pascal_points if /\<\>/;
-        ++$pascal_points if /^\s*\{\$(I|INCLUDE)\s+.*\}/i;
-        ++$pascal_points if /writeln/;
-
-        ++$puppet_points if /^\s*class\s+/ and not /class\s+operator\s+/;
-        ++$puppet_points if /^\s*case\s+/;
-        ++$puppet_points if /^\s*package\s+/;
-        ++$puppet_points if /^\s*file\s+/;
-        ++$puppet_points if /^\s*include\s\w+/;
-        ++$puppet_points if /^\s*service\s+/;
-        ++$puppet_points if /\s\$\w+\s*\=\s*\S/;
-        ++$puppet_points if /\S\s*\=\>\s*\S/;
-
-        # No need to process rest of file if language seems obvious.
-        last
-                if (abs ($pascal_points - $puppet_points ) > 20 );
-    }
-    $IN->close;
-
-    print "<- pascal_or_puppet(pascal=$pascal_points, puppet=$puppet_points)\n"
-        if $opt_v > 2;
-
-    if ($pascal_points > $puppet_points) {
-        ${$rs_language} = "Pascal";
-    } else {
-        ${$rs_language} = "Puppet";
-    }
-
-} # 1}}}
-sub Forth_or_Fortran {                       # {{{1
-    my ($file        , # in
-        $rh_Err      , # in   hash of error codes
-        $raa_errors  , # out
-       ) = @_;
-
-    print "-> Forth_or_Fortran\n" if $opt_v > 2;
-
-    my $lang = undef;
-    my $IN = new IO::File $file, "r";
-    if (!defined $IN) {
-        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
-        return $lang;
-    }
-    my $forth_points = 0;
-    my $fortran_points = 0;
-    while (<$IN>) {
-        ++$forth_points if  /^:\s/;
-        ++$fortran_points if  /^([c*][^a-z]|\s{6,}(subroutine|program|end|implicit)\s|\s*!)/i;
-    }
-    $IN->close;
-    if ($forth_points > $fortran_points) {
-        $lang = "Forth";
-    } else {
-        $lang = "Fortran 77";
-    }
-
-    print "<- Forth_or_Fortran\n" if $opt_v > 2;
-    return $lang;
-} # 1}}}
-sub Forth_or_Fsharp {                        # {{{1
-    my ($file        , # in
-        $rh_Err      , # in   hash of error codes
-        $raa_errors  , # out
-       ) = @_;
-
-    print "-> Forth_or_Fsharp\n" if $opt_v > 2;
-
-    my $lang = undef;
-    my $IN = new IO::File $file, "r";
-    if (!defined $IN) {
-        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
-        return $lang;
-    }
-    my $forth_points = 0;
-    my $fsharp_points = 0;
-    while (<$IN>) {
-        ++$forth_points if  /^:\s/;
-        ++$fsharp_points if  /^\s*(#light|import|let|module|namespace|open|type)/;
-    }
-    $IN->close;
-    if ($forth_points > $fsharp_points) {
-        $lang = "Forth";
-    } else {
-        $lang = "F#";
-    }
-
-    print "<- Forth_or_Fsharp\n" if $opt_v > 2;
-    return $lang;
-} # 1}}}
-sub Verilog_or_Coq {                         # {{{1
-    my ($file        , # in
-        $rh_Err      , # in   hash of error codes
-        $raa_errors  , # out
-       ) = @_;
-
-    print "-> Verilog_or_Coq\n" if $opt_v > 2;
-
-    my $lang = undef;
-    my $IN = new IO::File $file, "r";
-    if (!defined $IN) {
-        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
-        return $lang;
-    }
-    my $coq_points     = 0;
-    my $verilog_points = 0;
-    while (<$IN>) {
-        ++$verilog_points if  /^\s*(module|begin|input|output|always)/;
-        ++$coq_points if /\b(Inductive|Fixpoint|Definition|
-                             Theorem|Lemma|Proof|Qed|forall|
-                             Section|Check|Notation|Variable|
-                             Goal|Fail|Require|Scheme|Module|Ltac|
-                             Set|Unset|Parameter|Coercion|Axiom|
-                             Locate|Type|Record|Existing|Class)\b/x;
-    }
-    $IN->close;
-    if ($coq_points > $verilog_points) {
-        $lang = "Coq";
-    } else {
-        $lang = "Verilog-SystemVerilog";
-    }
-
-    print "<- Verilog_or_Coq\n" if $opt_v > 2;
-    return $lang;
-} # 1}}}
-sub TypeScript_or_QtLinguist {               # {{{1
-    my ($file        , # in
-        $rh_Err      , # in   hash of error codes
-        $raa_errors  , # out
-       ) = @_;
-
-    print "-> TypeScript_or_QtLinguist\n" if $opt_v > 2;
-
-    my $lang = undef;
-    my $IN = new IO::File $file, "r";
-    if (!defined $IN) {
-        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
-        return $lang;
-    }
-    my $tscript_points  = 0;
-    my $linguist_points = 0;
-    while (<$IN>) {
-        ++$linguist_points if m{\b</?(message|source|translation)>};
-        ++$tscript_points  if /^\s*(var|class|document)\b/;
-        ++$tscript_points  if /[;}]\s*$/;
-        ++$tscript_points  if m{^\s*//};
-    }
-    $IN->close;
-    if ($tscript_points > $linguist_points) {
-        $lang = "TypeScript";
-    } else {
-        $lang = "Qt Linguist";
-    }
-    print "<- TypeScript_or_QtLinguist\n" if $opt_v > 2;
-    return $lang;
-} # 1}}}
-sub Qt_or_Glade {                            # {{{1
-    my ($file        , # in
-        $rh_Err      , # in   hash of error codes
-        $raa_errors  , # out
-       ) = @_;
-
-    print "-> Qt_or_Glade\n" if $opt_v > 2;
-
-    my $lang = undef;
-    my $IN = new IO::File $file, "r";
-    if (!defined $IN) {
-        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
-        return $lang;
-    }
-    my $qt_points    =  1;
-    my $glade_points = -1;
-    while (<$IN>) {
-        if (/generated\s+with\s+glade/i) {
-            $glade_points =  1;
-            $qt_points    = -1;
-            last;
-        }
-    }
-    $IN->close;
-    if ($glade_points > $qt_points) {
-        $lang = "Glade";
-    } else {
-        $lang = "Qt";
-    }
-    print "<- Qt_or_Glade\n" if $opt_v > 2;
-    return $lang;
-} # 1}}}
-sub Csharp_or_Smalltalk {                    # {{{1
-    my ($file        , # in
-        $rh_Err      , # in   hash of error codes
-        $raa_errors  , # out
-       ) = @_;
-
-    print "-> Csharp_or_Smalltalk($file)\n" if $opt_v > 2;
-
-    my $lang = undef;
-    my $IN = new IO::File $file, "r";
-    if (!defined $IN) {
-        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
-        return $lang;
-    }
-    my $cs_points        = 0;
-    my $smalltalk_points = 0;
-    while (<$IN>) {
-        s{//.*?$}{};        # strip inline C# comments for better clarity
-        next if /^\s*$/;
-        if (/[;}{]\s*$/) {
-            ++$cs_points       ;
-        } elsif (/^(using|namespace)\s/) {
-            $cs_points += 20;
-        } elsif (/^\s*(public|private|new)\s/) {
-            $cs_points += 20;
-        } elsif (/^\s*\[assembly:/) {
-            ++$cs_points       ;
-        } else {
-            ++$smalltalk_points;
-        }
-        if (/(\!|\]\.)\s*$/) {
-            ++$smalltalk_points;
-            --$cs_points       ;
-        }
-    }
-    $IN->close;
-    if ($smalltalk_points > $cs_points) {
-        $lang = "Smalltalk";
-    } else {
-        $lang = "C#";
-    }
-    print "<- Csharp_or_Smalltalk($file)=$lang\n" if $opt_v > 2;
-    return $lang;
-} # 1}}}
-sub html_colored_text {                      # {{{1
-    # http://www.pagetutor.com/pagetutor/makapage/pics/net216-2.gif
-    my ($color, $text) = @_;
-#?#die "html_colored_text($text)";
-    if      ($color =~ /^red$/i)   {
-        $color = "#ff0000";
-    } elsif ($color =~ /^green$/i) {
-        $color = "#00ff00";
-    } elsif ($color =~ /^blue$/i)  {
-        $color = "#0000ff";
-    } elsif ($color =~ /^grey$/i)  {
-        $color = "#cccccc";
-    }
-#   return "" unless $text;
-    return '<font color="' . $color . '">' . html_metachars($text) . "</font>";
-} # 1}}}
-sub xml_metachars {                          # {{{1
-    # http://en.wikipedia.org/wiki/Character_encodings_in_HTML#XML_character_references
-    my ($string, ) = shift @_;
-
-    my  @in_chars    = split(//, $string);
-    my  @out_chars   = ();
-    foreach my $c (@in_chars) {
-        if      ($c eq '&') { push @out_chars, '&amp;'
-        } elsif ($c eq '<') { push @out_chars, '&lt;'
-        } elsif ($c eq '>') { push @out_chars, '&gt;'
-        } elsif ($c eq '"') { push @out_chars, '&quot;'
-        } elsif ($c eq "'") { push @out_chars, '&apos;'
-        } else {
-            push @out_chars, $c;
-        }
-    }
-    return join "", @out_chars;
-} # 1}}}
-sub html_metachars {                         # {{{1
-    # Replace HTML metacharacters with their printable forms.
-    # Future:  use HTML-Encoder-0.00_04/lib/HTML/Encoder.pm
-    # from Fabiano Reese Righetti's HTML::Encoder module if
-    # this subroutine proves to be too simplistic.
-    my ($string, ) = shift @_;
-
-    my  @in_chars    = split(//, $string);
-    my  @out_chars   = ();
-    foreach my $c (@in_chars) {
-        if      ($c eq '<') {
-            push @out_chars, '&lt;'
-        } elsif ($c eq '>') {
-            push @out_chars, '&gt;'
-        } elsif ($c eq '&') {
-            push @out_chars, '&amp;'
-        } else {
-            push @out_chars, $c;
-        }
-    }
-    return join "", @out_chars;
-} # 1}}}
-sub test_alg_diff {                          # {{{1
-    my ($file_1 ,
-        $file_2 )
-       = @_;
-    my $fh_1 = new IO::File $file_1, "r";
-    die "Unable to read $file_1:  $!\n" unless defined $fh_1;
-    chomp(my @lines_1 = <$fh_1>);
-    $fh_1->close;
-
-    my $fh_2 = new IO::File $file_2, "r";
-    die "Unable to read $file_2:  $!\n" unless defined $fh_2;
-    chomp(my @lines_2 = <$fh_2>);
-    $fh_2->close;
-
-    my $n_no_change = 0;
-    my $n_modified  = 0;
-    my $n_added     = 0;
-    my $n_deleted   = 0;
-    my @min_sdiff   = ();
-my $NN = chr(27) . "[0m";  # normal
-my $BB = chr(27) . "[1m";  # bold
-
-    my @sdiffs = sdiff( \@lines_1, \@lines_2 );
-    foreach my $entry (@sdiffs) {
-        my ($out_1, $out_2) = ('', '');
-        if ($entry->[0] eq 'u') {
-            ++$n_no_change;
-          # $out_1 = $entry->[1];
-          # $out_2 = $entry->[2];
-            next;
-        }
-#       push @min_sdiff, $entry;
-        if      ($entry->[0] eq 'c') {
-            ++$n_modified;
-            ($out_1, $out_2) = diff_two_strings($entry->[1], $entry->[2]);
-            $out_1 =~ s/\cA(\w)/${BB}$1${NN}/g;
-            $out_2 =~ s/\cA(\w)/${BB}$1${NN}/g;
-          # $out_1 =~ s/\cA//g;
-          # $out_2 =~ s/\cA//g;
-        } elsif ($entry->[0] eq '+') {
-            ++$n_added;
-            $out_1 = $entry->[1];
-            $out_2 = $entry->[2];
-        } elsif ($entry->[0] eq '-') {
-            ++$n_deleted;
-            $out_1 = $entry->[1];
-            $out_2 = $entry->[2];
-        } elsif ($entry->[0] eq 'u') {
-        } else { die "unknown entry->[0]=[$entry->[0]]\n"; }
-        printf "%-80s | %s\n", $out_1, $out_2;
-    }
-
-#   foreach my $entry (@min_sdiff) {
-#       printf "DIFF:  %s  %s  %s\n", @{$entry};
-#   }
-} # 1}}}
-sub write_comments_to_html {                 # {{{1
-    my ($filename      , # in
-        $rah_diff_L    , # in  see routine array_diff() for explanation
-        $rah_diff_R    , # in  see routine array_diff() for explanation
-        $rh_blank      , # in  location and counts of blank lines
-       ) = @_;
-
-    print "-> write_comments_to_html($filename)\n" if $opt_v > 2;
-    my $file = $filename . ".html";
-#use Data::Dumper;
-#print Dumper("rah_diff_L", $rah_diff_L, "rah_diff_R", $rah_diff_R);
-    my $OUT = new IO::File $file, "w";
-    if (!defined $OUT) {
-        warn "Unable to write to $file\n";
-        print "<- write_comments_to_html\n" if $opt_v > 2;
-        return;
-    }
-
-    my $approx_line_count = scalar @{$rah_diff_L};
-    my $n_digits = 1 + int(log($approx_line_count)/2.30258509299405); # log_10
-
-    my $html_out = html_header($filename);
-
-    my $comment_line_number = 0;
-    for (my $i = 0; $i < scalar @{$rah_diff_R}; $i++) {
-        if (defined $rh_blank->{$i}) {
-            foreach (1..$rh_blank->{$i}) {
-                $html_out .= "<!-- blank -->\n";
-            }
-        }
-        my $line_num = "";
-        my $pre      = "";
-        my $post     = '</span> &nbsp;';
-warn "undef rah_diff_R[$i]{type} " unless defined $rah_diff_R->[$i]{type};
-        if ($rah_diff_R->[$i]{type} eq 'nonexist') {
-            ++$comment_line_number;
-            $line_num = sprintf "\&nbsp; <span class=\"clinenum\"> %0${n_digits}d %s",
-                            $comment_line_number, $post;
-            $pre = '<span class="comment">';
-            $html_out .= $line_num;
-            $html_out .= $pre .
-                         html_metachars($rah_diff_L->[$i]{char}) .
-                         $post . "\n";
-            next;
-        }
-        if      ($rah_diff_R->[$i]{type} eq 'code' and
-                 $rah_diff_R->[$i]{desc} eq 'same') {
-            # entire line remains as-is
-            $line_num = sprintf "\&nbsp; <span class=\"linenum\"> %0${n_digits}d %s",
-                            $rah_diff_R->[$i]{lnum}, $post;
-            $pre    = '<span class="normal">';
-            $html_out .= $line_num;
-            $html_out .= $pre .
-                         html_metachars($rah_diff_R->[$i]{char}) . $post;
-#XX     } elsif ($rah_diff_R->[$i]{type} eq 'code') { # code+comments
-#XX
-#XX         $line_num = '<span class="linenum">' .
-#XX                      $rah_diff_R->[$i]{lnum} . $post;
-#XX         $html_out .= $line_num;
-#XX
-#XX         my @strings = @{$rah_diff_R->[$i]{char}{strings}};
-#XX         my @type    = @{$rah_diff_R->[$i]{char}{type}};
-#XX         for (my $i = 0; $i < scalar @strings; $i++) {
-#XX             if ($type[$i] eq 'u') {
-#XX                 $pre = '<span class="normal">';
-#XX             } else {
-#XX                 $pre = '<span class="comment">';
-#XX             }
-#XX             $html_out .= $pre .  html_metachars($strings[$i]) . $post;
-#XX         }
-# print Dumper(@strings, @type); die;
-
-        } elsif ($rah_diff_R->[$i]{type} eq 'comment') {
-            $line_num = '<span class="clinenum">' . $comment_line_number . $post;
-            # entire line is a comment
-            $pre    = '<span class="comment">';
-            $html_out .= $pre .
-                         html_metachars($rah_diff_R->[$i]{char}) . $post;
-        }
-#printf "%-30s %s %-30s\n", $line_1, $separator, $line_2;
-        $html_out .= "\n";
-    }
-
-    $html_out .= html_end();
-
-    my $out_file = "$filename.html";
-    open  OUT, ">$out_file" or die "Cannot write to $out_file $!\n";
-    print OUT $html_out;
-    close OUT;
-    print "Wrote $out_file\n" unless $opt_quiet;
-    $OUT->close;
-
-    print "<- write_comments_to_html\n" if $opt_v > 2;
-} # 1}}}
-sub array_diff {                             # {{{1
-    my ($file          , # in  only used for error reporting
-        $ra_lines_L    , # in  array of lines in Left  file (no blank lines)
-        $ra_lines_R    , # in  array of lines in Right file (no blank lines)
-        $mode          , # in  "comment" | "revision"
-        $rah_diff_L    , # out
-        $rah_diff_R    , # out
-        $raa_Errors    , # in/out
-       ) = @_;
-
-    # This routine operates in two ways:
-    # A. Computes diffs of the same file with and without comments.
-    #    This is used to classify lines as code, comments, or blank.
-    # B. Computes diffs of two revisions of a file.  This method
-    #    requires a prior run of method A using the older version
-    #    of the file because it needs lines to be classified.
-
-    # $rah_diff structure:
-    # An array with n entries where n equals the number of lines in
-    # an sdiff of the two files.  Each entry in the array describes
-    # the contents of the corresponding line in file Left and file Right:
-    #  diff[]{type} = blank | code | code+comment | comment | nonexist
-    #        {lnum} = line number within the original file (1-based)
-    #        {desc} = same | added | removed | modified
-    #        {char} = the input line unless {desc} = 'modified' in
-    #                 which case
-    #        {char}{strings} = [ substrings ]
-    #        {char}{type}    = [ disposition (added, removed, etc)]
-    #
-
-    @{$rah_diff_L} = ();
-    @{$rah_diff_R} = ();
-
-    print "-> array_diff()\n" if $opt_v > 2;
-    my $COMMENT_MODE = 0;
-       $COMMENT_MODE = 1 if $mode eq "comment";
-
-#print "array_diff(mode=$mode)\n";
-#print Dumper("block left:" , $ra_lines_L);
-#print Dumper("block right:", $ra_lines_R);
-
-    my @sdiffs = ();
-    eval {
-        local $SIG{ALRM} = sub { die "alarm\n" };
-        alarm $opt_diff_timeout;
-        @sdiffs = sdiff($ra_lines_L, $ra_lines_R);
-        alarm 0;
-    };
-    if ($@) {
-        # timed out
-        die unless $@ eq "alarm\n"; # propagate unexpected errors
-        push @{$raa_Errors},
-             [ $Error_Codes{'Diff error, exceeded timeout'}, $file ];
-        if ($opt_v) {
-          warn "array_diff: diff timeout failure for $file--ignoring\n";
-        }
-        return;
-    }
-
-#use Data::Dumper::Simple;
-#print Dumper($ra_lines_L, $ra_lines_R, @sdiffs);
-#die;
-
-    my $n_L        = 0;
-    my $n_R        = 0;
-    my $n_sdiff    = 0;  # index to $rah_diff_L, $rah_diff_R
-    foreach my $triple (@sdiffs) {
-        my $flag   = $triple->[0];
-        my $line_L = $triple->[1];
-        my $line_R = $triple->[2];
-        $rah_diff_L->[$n_sdiff]{char} = $line_L;
-        $rah_diff_R->[$n_sdiff]{char} = $line_R;
-        if      ($flag eq 'u') {  # u = unchanged
-            ++$n_L;
-            ++$n_R;
-            if ($COMMENT_MODE) {
-                # line exists in both with & without comments, must be code
-                $rah_diff_L->[$n_sdiff]{type} = "code";
-                $rah_diff_R->[$n_sdiff]{type} = "code";
-            }
-            $rah_diff_L->[$n_sdiff]{desc} = "same";
-            $rah_diff_R->[$n_sdiff]{desc} = "same";
-            $rah_diff_L->[$n_sdiff]{lnum} = $n_L;
-            $rah_diff_R->[$n_sdiff]{lnum} = $n_R;
-        } elsif ($flag eq 'c') {  # c = changed
-# warn "per line sdiff() commented out\n"; if (0) {
-            ++$n_L;
-            ++$n_R;
-
-            if ($COMMENT_MODE) {
-                # line has text both with & without comments;
-                # count as code
-                $rah_diff_L->[$n_sdiff]{type} = "code";
-                $rah_diff_R->[$n_sdiff]{type} = "code";
-            }
-
-            my @chars_L = split '', $line_L;
-            my @chars_R = split '', $line_R;
-
-#XX         my @inline_sdiffs = sdiff( \@chars_L, \@chars_R );
-
-#use Data::Dumper::Simple;
-#if ($n_R == 6 or $n_R == 1 or $n_R == 2) {
-#print "L=[$line_L]\n";
-#print "R=[$line_R]\n";
-#print Dumper(@chars_L, @chars_R, @inline_sdiffs);
-#}
-#XX         my @index = ();
-#XX         foreach my $il_triple (@inline_sdiffs) {
-#XX             # make an array of u|c|+|- corresponding
-#XX             # to each character
-#XX             push @index, $il_triple->[0];
-#XX         }
-#XX#print Dumper(@index); die;
-#XX          # expect problems if arrays @index and $inline_sdiffs[1];
-#XX          # (@{$inline_sdiffs->[1]} are the characters of line_L)
-#XX          # aren't the same length
-#XX          my $prev_type = $index[0];
-#XX          my @strings   = ();  # blocks of consecutive code or comment
-#XX          my @type      = ();  # u (=code) or c (=comment)
-#XX          my $j_str     = 0;
-#XX          $strings[$j_str] .= $chars_L[0];
-#XX          $type[$j_str] = $prev_type;
-#XX          for (my $i = 1; $i < scalar @chars_L; $i++) {
-#XX              if ($index[$i] ne $prev_type) {
-#XX                  ++$j_str;
-#XX#print "change at j_str=$j_str type=$index[$i]\n";
-#XX                  $type[$j_str] = $index[$i];
-#XX                  $prev_type    = $index[$i];
-#XX              }
-#XX              $strings[$j_str] .= $chars_L[$i];
-#XX          }
-# print Dumper(@strings, @type); die;
-#XX         delete $rah_diff_R->[$n_sdiff]{char};
-#XX         @{$rah_diff_R->[$n_sdiff]{char}{strings}} = @strings;
-#XX         @{$rah_diff_R->[$n_sdiff]{char}{type}}    = @type;
-            $rah_diff_L->[$n_sdiff]{desc} = "modified";
-            $rah_diff_R->[$n_sdiff]{desc} = "modified";
-            $rah_diff_L->[$n_sdiff]{lnum} = $n_L;
-            $rah_diff_R->[$n_sdiff]{lnum} = $n_R;
-#}
-
-        } elsif ($flag eq '+') {  # + = added
-            ++$n_R;
-            if ($COMMENT_MODE) {
-                # should never get here
-                @{$rah_diff_L} = ();
-                @{$rah_diff_R} = ();
-                push @{$raa_Errors},
-                     [ $Error_Codes{'Diff error (quoted comments?)'}, $file ];
-                if ($opt_v) {
-                  warn "array_diff: diff failure (diff says the\n";
-                  warn "comment-free file has added lines).\n";
-                  warn "$n_sdiff  $line_L\n";
-                }
-                last;
-            }
-            $rah_diff_L->[$n_sdiff]{type} = "nonexist";
-            $rah_diff_L->[$n_sdiff]{desc} = "removed";
-            $rah_diff_R->[$n_sdiff]{desc} = "added";
-            $rah_diff_R->[$n_sdiff]{lnum} = $n_R;
-        } elsif ($flag eq '-') {  # - = removed
-            ++$n_L;
-            if ($COMMENT_MODE) {
-                # line must be comment because blanks already gone
-                $rah_diff_L->[$n_sdiff]{type} = "comment";
-            }
-            $rah_diff_R->[$n_sdiff]{type} = "nonexist";
-            $rah_diff_R->[$n_sdiff]{desc} = "removed";
-            $rah_diff_L->[$n_sdiff]{desc} = "added";
-            $rah_diff_L->[$n_sdiff]{lnum} = $n_L;
-        }
-#printf "%-30s %s %-30s\n", $line_L, $separator, $line_R;
-        ++$n_sdiff;
-    }
-#use Data::Dumper::Simple;
-#print Dumper($rah_diff_L, $rah_diff_R);
-#print Dumper($rah_diff_L);
-
-    print "<- array_diff\n" if $opt_v > 2;
-} # 1}}}
-sub remove_leading_dir {                     # {{{1
-    my @filenames = @_;
-    #
-    #  Input should be a list of file names
-    #  with the same leading directory such as
-    #
-    #      dir1/dir2/a.txt
-    #      dir1/dir2/b.txt
-    #      dir1/dir2/dir3/c.txt
-    #
-    #  Output is the same list minus the common
-    #  directory path:
-    #
-    #      a.txt
-    #      b.txt
-    #      dir3/c.txt
-    #
-    print "-> remove_leading_dir()\n" if $opt_v > 2;
-    my @D = (); # a matrix:   [ [ dir1, dir2 ],         # dir1/dir2/a.txt
-                #               [ dir1, dir2 ],         # dir1/dir2/b.txt
-                #               [ dir1, dir2 , dir3] ]  # dir1/dir2/dir3/c.txt
-    if ($ON_WINDOWS) {
-        foreach my $F (@filenames) {
-            $F =~ s{\\}{/}g;
-            $F = ucfirst($F) if $F =~ /^\w:/;  # uppercase drive letter
-        }
-    }
-    if (scalar @filenames == 1) {
-        # special case:  with only one filename
-        # cannot determine a baseline, just remove first directory level
-        $filenames[0] =~ s{^.*?/}{};
-        # print "-> $filenames[0]\n";
-        return $filenames[0];
-    }
-    foreach my $F (@filenames) {
-        my ($Vol, $Dir, $File) = File::Spec->splitpath($F);
-        my @x = File::Spec->splitdir( $Dir );
-        pop @x unless $x[$#x]; # last entry usually null, remove it
-        if ($ON_WINDOWS) {
-            if (defined($Vol) and $Vol) {
-                # put the drive letter, eg, C:, at the front
-                unshift @x, uc $Vol;
-            }
-        }
-#print "F=$F, Dir=$Dir  x=[", join("][", @x), "]\n";
-        push @D, [ @x ];
-    }
-
-    # now loop over columns until either they are all
-    # eliminated or a unique column is found
-
-#use Data::Dumper::Simple;
-#print Dumper("remove_leading_dir after ", @D);
-
-    my @common   = ();  # to contain the common leading directories
-    my $mismatch = 0;
-    while (!$mismatch) {
-        for (my $row = 1; $row < scalar @D; $row++) {
-#print "comparing $D[$row][0] to $D[0][0]\n";
-
-            if (!defined $D[$row][0] or !defined $D[0][0] or
-                ($D[$row][0] ne $D[0][0])) {
-                $mismatch = 1;
-                last;
-            }
-        }
-#print "mismatch=$mismatch\n";
-        if (!$mismatch) {
-            push @common, $D[0][0];
-            # all terms in the leading match; unshift the batch
-            foreach my $ra (@D) {
-                shift @{$ra};
-            }
-        }
-    }
-
-    push @common, " ";  # so that $leading will end with "/ "
-    my $leading = File::Spec->catdir( @common );
-       $leading =~ s{ $}{};  # now take back the bogus appended space
-#print "remove_leading_dir leading=[$leading]\n"; die;
-    if ($ON_WINDOWS) {
-       $leading =~ s{\\}{/}g;
-    }
-    foreach my $F (@filenames) {
-        $F =~ s{^$leading}{};
-    }
-
-    print "<- remove_leading_dir()\n" if $opt_v > 2;
-    return @filenames;
-
-} # 1}}}
-sub strip_leading_dir {                      # {{{1
-    my ($leading, @filenames) = @_;
-    #  removes the string $leading from each entry in @filenames
-    print "-> strip_leading_dir()\n" if $opt_v > 2;
-
-#print "remove_leading_dir leading=[$leading]\n"; die;
-    if ($ON_WINDOWS) {
-       $leading =~ s{\\}{/}g;
-        foreach my $F (@filenames) {
-            $F =~ s{\\}{/}g;
-        }
-    }
-    foreach my $F (@filenames) {
-        $F =~ s{^$leading}{};
-    }
-
-    print "<- strip_leading_dir()\n" if $opt_v > 2;
-    return @filenames;
-
-} # 1}}}
-sub find_deepest_file {                      # {{{1
-    my @filenames = @_;
-    #
-    #  Input should be a list of file names
-    #  with the same leading directory such as
-    #
-    #      dir1/dir2/a.txt
-    #      dir1/dir2/b.txt
-    #      dir1/dir2/dir3/c.txt
-    #
-    #  Output is the file with the most parent directories:
-    #
-    #      dir1/dir2/dir3/c.txt
-
-    print "-> find_deepest_file()\n" if $opt_v > 2;
-
-    my $deepest    = undef;
-    my $max_subdir = -1;
-    foreach my $F (sort @filenames) {
-        my ($Vol, $Dir, $File) = File::Spec->splitpath($F);
-        my @x = File::Spec->splitdir( $Dir );
-        pop @x unless $x[$#x]; # last entry usually null, remove it
-        if (scalar @x > $max_subdir) {
-            $deepest    = $F;
-            $max_subdir = scalar @x;
-        }
-    }
-
-    print "<- find_deepest_file()\n" if $opt_v > 2;
-    return $deepest;
-
-} # 1}}}
-sub find_uncommon_parent_dir {               # {{{1
-    my ($file_L, $file_R) = @_;
-    #
-    # example:
-    #
-    #   file_L = "perl-5.16.1/cpan/CPANPLUS/lib/CPANPLUS/Internals/Source/SQLite/Tie.pm"
-    #   file_R = "/tmp/8VxQG0OLbp/perl-5.16.3/cpan/CPANPLUS/lib/CPANPLUS/Internals/Source/SQLite/Tie.pm"
-    #
-    # then return
-    #
-    #   "perl-5.16.1",
-    #   "/tmp/8VxQG0OLbp/perl-5.16.3",
-
-    my ($Vol_L, $Dir_L, $File_L) = File::Spec->splitpath($file_L);
-    my @x_L = File::Spec->splitdir( $Dir_L );
-    my ($Vol_R, $Dir_R, $File_R) = File::Spec->splitpath($file_R);
-    my @x_R = File::Spec->splitdir( $Dir_R );
-
-    my @common  = ();
-
-    # work backwards
-    while ($x_L[$#x_L] eq $x_R[$#x_R]) {
-        push @common, $x_L[$#x_L];
-        pop  @x_L;
-        pop  @x_R;
-    }
-    my $success = scalar @common;
-
-    my $dirs_L = File::Spec->catdir( @x_L );
-    my $dirs_R = File::Spec->catdir( @x_R );
-    my $lead_L = File::Spec->catpath( $Vol_L, $dirs_L, "" );
-    my $lead_R = File::Spec->catpath( $Vol_R, $dirs_R, "" );
-
-    return $lead_L, $lead_R, $success;
-
-} # 1}}}
-sub get_leading_dirs {                       # {{{1
-    my ($rh_file_list_L, $rh_file_list_R) = @_;
-    # find uniquely named files in both sets to help determine the
-    # leading directory positions
-    my %unique_filename = ();
-    my %basename_L = ();
-    my %basename_R = ();
-    foreach my $f (keys %{$rh_file_list_L}) {
-        my $bn = basename($f);
-        $basename_L{ $bn }{'count'}   += 1;
-        $basename_L{ $bn }{'fullpath'} = $f;
-    }
-    foreach my $f (keys %{$rh_file_list_R}) {
-        my $bn = basename($f);
-        $basename_R{ $bn }{'count'}   += 1;
-        $basename_R{ $bn }{'fullpath'} = $f;
-    }
-    foreach my $f (keys %basename_L) {
-        next unless $basename_L{$f}{'count'} == 1;
-        next unless defined $basename_R{$f} and $basename_R{$f}{'count'} == 1;
-        $unique_filename{$f}{'L'} = $basename_L{ $f }{'fullpath'};
-        $unique_filename{$f}{'R'} = $basename_R{ $f }{'fullpath'};
-    }
-    return undef, undef, 0 unless %unique_filename;
-
-    my %candidate_leading_dir_L = ();
-    my %candidate_leading_dir_R = ();
-    foreach my $f (keys %unique_filename) {
-        my $fL = $unique_filename{ $f }{'L'};
-        my $fR = $unique_filename{ $f }{'R'};
-#printf "%-36s -> %-36s\n", $fL, $fR;
-        my $ptr_L = length($fL) - 1;
-        my $ptr_R = length($fR) - 1;
-        my @aL    = split '', $fL;
-        my @aR    = split '', $fR;
-        while ($ptr_L >= 0 and $ptr_R >= 0) {
-            last if $aL[$ptr_L] ne $aR[$ptr_R];
-            --$ptr_L;
-            --$ptr_R;
-        }
-#print "ptr_L=$ptr_L   ptr_R=$ptr_R\n";
-        my $leading_dir_L = "";
-           $leading_dir_L = substr($fL, 0, $ptr_L+1) if $ptr_L >= 0;
-        my $leading_dir_R = "";
-           $leading_dir_R = substr($fR, 0, $ptr_R+1) if $ptr_R >= 0;
-#print "leading_dir_L=$leading_dir_L   leading_dir_R=$leading_dir_R\n";
-        ++$candidate_leading_dir_L{$leading_dir_L};
-        ++$candidate_leading_dir_R{$leading_dir_R};
-    }
-#use Data::Dumper::Simple;
-#print Dumper(%candidate_leading_dir_L);
-#print Dumper(%candidate_leading_dir_R);
-#die;
-    my $best_L = (sort {
-               $candidate_leading_dir_L{$b} <=>
-               $candidate_leading_dir_L{$a}} keys %candidate_leading_dir_L)[0];
-    my $best_R = (sort {
-               $candidate_leading_dir_R{$b} <=>
-               $candidate_leading_dir_R{$a}} keys %candidate_leading_dir_R)[0];
-    return $best_L, $best_R, 1;
-} # 1}}}
-sub align_by_pairs {                         # {{{1
-    my ($rh_file_list_L        , # in
-        $rh_file_list_R        , # in
-        $ra_added              , # out
-        $ra_removed            , # out
-        $ra_compare_list       , # out
-        ) = @_;
-    print "-> align_by_pairs()\n" if $opt_v > 2;
-    @{$ra_compare_list} = ();
-
-    my @files_L = sort keys %{$rh_file_list_L};
-    my @files_R = sort keys %{$rh_file_list_R};
-    return () unless @files_L or  @files_R;  # at least one must have stuff
-    if      ( @files_L and !@files_R) {
-        # left side has stuff, right side is empty; everything deleted
-        @{$ra_added   }     = ();
-        @{$ra_removed }     = @files_L;
-        @{$ra_compare_list} = ();
-        return;
-    } elsif (!@files_L and  @files_R) {
-        # left side is empty, right side has stuff; everything added
-        @{$ra_added   }     = @files_R;
-        @{$ra_removed }     = ();
-        @{$ra_compare_list} = ();
-        return;
-    }
-#use Data::Dumper::Simple;
-#print Dumper("align_by_pairs", %{$rh_file_list_L}, %{$rh_file_list_R},);
-#die;
-    if (scalar @files_L == 1 and scalar @files_R == 1) {
-        # The easy case:  compare two files.
-        push @{$ra_compare_list}, [ $files_L[0],  $files_R[0] ];
-        @{$ra_added  } = ();
-        @{$ra_removed} = ();
-        return;
-    }
-    # The harder case:  compare groups of files.  This only works
-    # if the groups are in different directories so the first step
-    # is to strip the leading directory names from file lists to
-    # make it possible to align by file names.
-    my @files_L_minus_dir = undef;
-    my @files_R_minus_dir = undef;
-
-    my $deepest_file_L    = find_deepest_file(@files_L);
-    my $deepest_file_R    = find_deepest_file(@files_R);
-#print "deepest L = [$deepest_file_L]\n";
-#print "deepest R = [$deepest_file_R]\n";
-####my ($leading_dir_L, $leading_dir_R, $success) =
-####    find_uncommon_parent_dir($deepest_file_L, $deepest_file_R);
-    my ($leading_dir_L, $leading_dir_R, $success) =
-                get_leading_dirs($rh_file_list_L, $rh_file_list_R);
-#print "leading_dir_L=[$leading_dir_L]\n";
-#print "leading_dir_R=[$leading_dir_R]\n";
-#print "success      =[$success]\n";
-    if ($success) {
-        @files_L_minus_dir = strip_leading_dir($leading_dir_L, @files_L);
-        @files_R_minus_dir = strip_leading_dir($leading_dir_R, @files_R);
-    } else {
-        # otherwise fall back to old strategy
-        @files_L_minus_dir = remove_leading_dir(@files_L);
-        @files_R_minus_dir = remove_leading_dir(@files_R);
-    }
-
-    # Keys of the stripped_X arrays are canonical file names;
-    # should overlap mostly.  Keys in stripped_L but not in
-    # stripped_R are files that have been deleted.  Keys in
-    # stripped_R but not in stripped_L have been added.
-    my %stripped_L = ();
-       @stripped_L{ @files_L_minus_dir } = @files_L;
-    my %stripped_R = ();
-       @stripped_R{ @files_R_minus_dir } = @files_R;
-
-    my %common = ();
-    foreach my $f (keys %stripped_L) {
-        $common{$f}  = 1 if     defined $stripped_R{$f};
-    }
-
-    my %deleted = ();
-    foreach my $f (keys %stripped_L) {
-        $deleted{$stripped_L{$f}} = $f unless defined $stripped_R{$f};
-    }
-
-    my %added = ();
-    foreach my $f (keys %stripped_R) {
-        $added{$stripped_R{$f}}   = $f unless defined $stripped_L{$f};
-    }
-
-#use Data::Dumper::Simple;
-#print Dumper("align_by_pairs", %stripped_L, %stripped_R);
-#print Dumper("align_by_pairs", %common, %added, %deleted);
-
-    foreach my $f (keys %common) {
-        push @{$ra_compare_list}, [ $stripped_L{$f},
-                                    $stripped_R{$f} ];
-    }
-    @{$ra_added   } = keys %added  ;
-    @{$ra_removed } = keys %deleted;
-
-    print "<- align_by_pairs()\n" if $opt_v > 2;
-    return;
-#print Dumper("align_by_pairs", @files_L_minus_dir, @files_R_minus_dir);
-#die;
-} # 1}}}
-sub html_header {                            # {{{1
-    my ($title , ) = @_;
-
-    print "-> html_header\n" if $opt_v > 2;
-    return
-'<html>
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-<meta name="GENERATOR" content="cloc http://github.com/AlDanial/cloc">
-' .
-"
-<!-- Created by $script v$VERSION -->
-<title>$title</title>
-" .
-'
-<style TYPE="text/css">
-<!--
-    body {
-        color: black;
-        background-color: white;
-        font-family: monospace
-    }
-
-    .whitespace {
-        background-color: gray;
-    }
-
-    .comment {
-        color: gray;
-        font-style: italic;
-    }
-
-    .clinenum {
-        color: red;
-    }
-
-    .linenum {
-        color: green;
-    }
- -->
-</style>
-</head>
-<body>
-<pre><tt>
-';
-    print "<- html_header\n" if $opt_v > 2;
-} # 1}}}
-sub html_end {                               # {{{1
-return
-'</tt></pre>
-</body>
-</html>
-';
-} # 1}}}
-sub die_unknown_lang {                       # {{{1
-    my ($lang, $option_name) = @_;
-    die "Unknown language '$lang' used with $option_name option.  " .
-        "The command\n  $script --show-lang\n" .
-        "will print all recognized languages.  Language names are " .
-        "case sensitive.\n" ;
-} # 1}}}
-sub unicode_file {                           # {{{1
-    my $file = shift @_;
-
-    print "-> unicode_file($file)\n" if $opt_v > 2;
-    return 0 if (-s $file > 2_000_000);
-    # don't bother trying to test binary files bigger than 2 MB
-
-    my $IN = new IO::File $file, "r";
-    if (!defined $IN) {
-        warn "Unable to read $file; ignoring.\n";
-        return 0;
-    }
-    my @lines = <$IN>;
-    $IN->close;
-
-    if (unicode_to_ascii( join('', @lines) )) {
-        print "<- unicode_file()\n" if $opt_v > 2;
-        return 1;
-    } else {
-        print "<- unicode_file()\n" if $opt_v > 2;
-        return 0;
-    }
-
-} # 1}}}
-sub unicode_to_ascii {                       # {{{1
-    my $string = shift @_;
-
-    # A trivial attempt to convert UTF-16 little or big endian
-    # files into ASCII.  These files exhibit the following byte
-    # sequence:
-    #   byte   1:  255
-    #   byte   2:  254
-    #   byte   3:  ord of ASCII character
-    #   byte   4:    0
-    #   byte 3+i:  ord of ASCII character
-    #   byte 4+i:    0
-    # or
-    #   byte   1:  255
-    #   byte   2:  254
-    #   byte   3:    0
-    #   byte   4:  ord of ASCII character
-    #   byte 3+i:    0
-    #   byte 4+i:  ord of ASCII character
-
-    my $length  = length $string;
-#print "length=$length\n";
-    return '' if $length <= 3;
-    my @unicode = split(//, $string);
-
-    # check the first 100 characters for big or little endian UTF-16 encoding
-    my $max_peek = $length < 200 ? $length : 200;
-    my @view_1   = ();
-    for (my $i = 2; $i < $max_peek; $i += 2) { push @view_1, $unicode[$i] }
-    my @view_2   = ();
-    for (my $i = 3; $i < $max_peek; $i += 2) { push @view_2, $unicode[$i] }
-
-    my $points_1 = 0;
-    foreach my $C (@view_1) {
-        ++$points_1 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13
-                                                          or ord($C) == 10
-                                                          or ord($C) ==  9;
-    }
-
-    my $points_2 = 0;
-    foreach my $C (@view_2) {
-        ++$points_2 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13
-                                                          or ord($C) == 10
-                                                          or ord($C) ==  9;
-    }
-#print "points 1: $points_1\n";
-#print "points 2: $points_2\n";
-
-    my $offset = undef;
-    if    ($points_1 > 90) { $offset = 2; }
-    elsif ($points_2 > 90) { $offset = 3; }
-    else                   { return '' }  # neither big or little endian UTF-16
-
-    my @ascii              = ();
-    for (my $i = $offset; $i < $length; $i += 2) { push @ascii, $unicode[$i]; }
-    return join("", @ascii);
-} # 1}}}
-sub uncompress_archive_cmd {                 # {{{1
-    my ($archive_file, ) = @_;
-
-    # Wrap $archive_file in single or double quotes in the system
-    # commands below to avoid filename chicanery (including
-    # spaces in the names).
-
-    print "-> uncompress_archive_cmd($archive_file)\n" if $opt_v > 2;
-    my $extract_cmd = "";
-    my $missing     = "";
-    if ($opt_extract_with) {
-        ( $extract_cmd = $opt_extract_with ) =~ s/>FILE</$archive_file/g;
-    } elsif (basename($archive_file) eq "-" and !$ON_WINDOWS) {
-        $extract_cmd = "cat > -";
-    } elsif (($archive_file =~ /\.tar\.(gz|Z)$/ or
-              $archive_file =~ /\.tgz$/       ) and !$ON_WINDOWS)    {
-        if (external_utility_exists("gzip --version")) {
-            if (external_utility_exists("tar --version")) {
-                $extract_cmd = "gzip -dc '$archive_file' | tar xf -";
-            } else {
-                $missing = "tar";
-            }
-        } else {
-            $missing = "gzip";
-        }
-    } elsif ($archive_file =~ /\.tar\.bz2$/ and !$ON_WINDOWS)    {
-        if (external_utility_exists("bzip2 --help")) {
-            if (external_utility_exists("tar --version")) {
-                $extract_cmd = "bzip2 -dc '$archive_file' | tar xf -";
-            } else {
-                $missing = "tar";
-            }
-        } else {
-            $missing = "bzip2";
-        }
-    } elsif ($archive_file =~ /\.tar\.xz$/ and !$ON_WINDOWS)    {
-        if (external_utility_exists("unxz --version")) {
-            if (external_utility_exists("tar --version")) {
-                $extract_cmd = "unxz -dc '$archive_file' | tar xf -";
-            } else {
-                $missing = "tar";
-            }
-        } else {
-            $missing = "bzip2";
-        }
-    } elsif ($archive_file =~ /\.tar$/ and !$ON_WINDOWS)    {
-        $extract_cmd = "tar xf '$archive_file'";
-    } elsif ($archive_file =~ /\.src\.rpm$/i and !$ON_WINDOWS) {
-        if (external_utility_exists("cpio --version")) {
-            if (external_utility_exists("rpm2cpio")) {
-                $extract_cmd = "rpm2cpio '$archive_file' | cpio -i";
-            } else {
-                $missing = "rpm2cpio";
-            }
-        } else {
-            $missing = "bzip2";
-        }
-    } elsif ($archive_file =~ /\.zip$/i and !$ON_WINDOWS)    {
-        if (external_utility_exists("unzip")) {
-            $extract_cmd = "unzip -qq -d . '$archive_file'";
-        } else {
-            $missing = "unzip";
-        }
-    } elsif ($archive_file =~ /\.deb$/i and !$ON_WINDOWS)    {
-        # only useful if the .deb contains source code--most
-        # .deb files just have compiled executables
-        if (external_utility_exists("dpkg-deb")) {
-            $extract_cmd = "dpkg-deb -x '$archive_file' .";
-        } else {
-            $missing = "dpkg-deb";
-        }
-    } elsif ($ON_WINDOWS and $archive_file =~ /\.zip$/i) {
-        # use unzip on Windows (comes with git-for-Windows)
-        if (external_utility_exists("unzip")) {
-             $extract_cmd = "unzip -qq -d . \"$archive_file\" ";
-        } else {
-            $missing = "unzip";
-        }
-    }
-    print "<- uncompress_archive_cmd\n" if $opt_v > 2;
-    if ($missing) {
-        die "Unable to expand $archive_file because external\n",
-            "utility '$missing' is not available.\n",
-            "Another possibility is to use the --extract-with option.\n";
-    } else {
-        return $extract_cmd;
-    }
-}
-# 1}}}
-sub read_list_file {                         # {{{1
-    my ($file, ) = @_;
-
-    print "-> read_list_file($file)\n" if $opt_v > 2;
-    my $IN = new IO::File $file, "r";
-    if (!defined $IN) {
-        warn "Unable to read $file; ignoring.\n";
-        return ();
-    }
-    my @entry = ();
-    while (<$IN>) {
-        next if /^\s*$/ or /^\s*#/; # skip empty or commented lines
-        s/\cM$//;  # DOS to Unix
-        chomp;
-        push @entry, $_;
-    }
-    $IN->close;
-
-    print "<- read_list_file\n" if $opt_v > 2;
-    return @entry;
-}
-# 1}}}
-sub external_utility_exists {                # {{{1
-    my $exe = shift @_;
-
-    my $success      = 0;
-    if ($ON_WINDOWS) {
-        $success = 1 unless system $exe . ' > nul';
-    } else {
-        $success = 1 unless system $exe . ' >/dev/null 2>&1';
-        if (!$success) {
-            $success = 1 unless system "which" . " $exe" . ' >/dev/null 2>&1';
-        }
-    }
-
-    return $success;
-} # 1}}}
-sub write_xsl_file {                         # {{{1
-    my $OUT = new IO::File $CLOC_XSL, "w";
-    if (!defined $OUT) {
-        warn "Unable to write $CLOC_XSL  $!\n";
-        return;
-    }
-    my $XSL =             # <style>  </style> {{{2
-'<?xml version="1.0" encoding="US-ASCII"?>
-<!-- XLS file by Paul Schwann, January 2009.
-     Fixes for by-file and by-file-by-lang by d_uragan, November 2010.
-     -->
-<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
-  <xsl:output method="html"/>
-  <xsl:template match="/">
-    <html xmlns="http://www.w3.org/1999/xhtml">
-      <head>
-        <title>CLOC Results</title>
-      </head>
-      <style type="text/css">
-        table {
-          table-layout: auto;
-          border-collapse: collapse;
-          empty-cells: show;
-        }
-        td, th {
-          padding: 4px;
-        }
-        th {
-          background-color: #CCCCCC;
-        }
-        td {
-          text-align: center;
-        }
-        table, td, tr, th {
-          border: thin solid #999999;
-        }
-      </style>
-      <body>
-        <h3><xsl:value-of select="results/header"/></h3>
-';
-# 2}}}
-
-    if ($opt_by_file) {
-        $XSL .=             # <table> </table>{{{2
-'        <table>
-          <thead>
-            <tr>
-              <th>File</th>
-              <th>Blank</th>
-              <th>Comment</th>
-              <th>Code</th>
-              <th>Language</th>
-';
-        $XSL .=
-'             <th>3<sup>rd</sup> Generation Equivalent</th>
-              <th>Scale</th>
-' if $opt_3;
-        $XSL .=
-'           </tr>
-          </thead>
-          <tbody>
-          <xsl:for-each select="results/files/file">
-            <tr>
-              <th><xsl:value-of select="@name"/></th>
-              <td><xsl:value-of select="@blank"/></td>
-              <td><xsl:value-of select="@comment"/></td>
-              <td><xsl:value-of select="@code"/></td>
-              <td><xsl:value-of select="@language"/></td>
-';
-        $XSL .=
-'             <td><xsl:value-of select="@factor"/></td>
-              <td><xsl:value-of select="@scaled"/></td>
-' if $opt_3;
-        $XSL .=
-'           </tr>
-          </xsl:for-each>
-            <tr>
-              <th>Total</th>
-              <th><xsl:value-of select="results/files/total/@blank"/></th>
-              <th><xsl:value-of select="results/files/total/@comment"/></th>
-              <th><xsl:value-of select="results/files/total/@code"/></th>
-              <th><xsl:value-of select="results/files/total/@language"/></th>
-';
-        $XSL .=
-'             <th><xsl:value-of select="results/files/total/@factor"/></th>
-              <th><xsl:value-of select="results/files/total/@scaled"/></th>
-' if $opt_3;
-        $XSL .=
-'           </tr>
-          </tbody>
-        </table>
-        <br/>
-';
-# 2}}}
-    }
-
-    if (!$opt_by_file or $opt_by_file_by_lang) {
-        $XSL .=             # <table> </table> {{{2
-'       <table>
-          <thead>
-            <tr>
-              <th>Language</th>
-              <th>Files</th>
-              <th>Blank</th>
-              <th>Comment</th>
-              <th>Code</th>
-';
-        $XSL .=
-'             <th>Scale</th>
-              <th>3<sup>rd</sup> Generation Equivalent</th>
-' if $opt_3;
-        $XSL .=
-'           </tr>
-          </thead>
-          <tbody>
-          <xsl:for-each select="results/languages/language">
-            <tr>
-              <th><xsl:value-of select="@name"/></th>
-              <td><xsl:value-of select="@files_count"/></td>
-              <td><xsl:value-of select="@blank"/></td>
-              <td><xsl:value-of select="@comment"/></td>
-              <td><xsl:value-of select="@code"/></td>
-';
-        $XSL .=
-'             <td><xsl:value-of select="@factor"/></td>
-              <td><xsl:value-of select="@scaled"/></td>
-' if $opt_3;
-        $XSL .=
-'          </tr>
-          </xsl:for-each>
-            <tr>
-              <th>Total</th>
-              <th><xsl:value-of select="results/languages/total/@sum_files"/></th>
-              <th><xsl:value-of select="results/languages/total/@blank"/></th>
-              <th><xsl:value-of select="results/languages/total/@comment"/></th>
-              <th><xsl:value-of select="results/languages/total/@code"/></th>
-';
-        $XSL .=
-'             <th><xsl:value-of select="results/languages/total/@factor"/></th>
-              <th><xsl:value-of select="results/languages/total/@scaled"/></th>
-' if $opt_3;
-        $XSL .=
-'           </tr>
-          </tbody>
-        </table>
-';
-# 2}}}
-    }
-
-    $XSL.= <<'EO_XSL'; # {{{2
-      </body>
-    </html>
-  </xsl:template>
-</xsl:stylesheet>
-
-EO_XSL
-# 2}}}
-
-    my $XSL_DIFF = <<'EO_DIFF_XSL'; # {{{2
-<?xml version="1.0" encoding="US-ASCII"?>
-<!-- XLS file by Blazej Kroll, November 2010 -->
-<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
-  <xsl:output method="html"/>
-  <xsl:template match="/">
-    <html xmlns="http://www.w3.org/1999/xhtml">
-      <head>
-        <title>CLOC Results</title>
-      </head>
-      <style type="text/css">
-        table {
-          table-layout: auto;
-          border-collapse: collapse;
-          empty-cells: show;
-          margin: 1em;
-        }
-        td, th {
-          padding: 4px;
-        }
-        th {
-          background-color: #CCCCCC;
-        }
-        td {
-          text-align: center;
-        }
-        table, td, tr, th {
-          border: thin solid #999999;
-        }
-      </style>
-      <body>
-        <h3><xsl:value-of select="results/header"/></h3>
-EO_DIFF_XSL
-# 2}}}
-
-    if ($opt_by_file) {
-        $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
-        <table>
-          <thead>
-          <tr><th colspan="4">Same</th>
-          </tr>
-            <tr>
-              <th>File</th>
-              <th>Blank</th>
-              <th>Comment</th>
-              <th>Code</th>
-            </tr>
-          </thead>
-          <tbody>
-          <xsl:for-each select="diff_results/same/file">
-            <tr>
-              <th><xsl:value-of select="@name"/></th>
-              <td><xsl:value-of select="@blank"/></td>
-              <td><xsl:value-of select="@comment"/></td>
-              <td><xsl:value-of select="@code"/></td>
-            </tr>
-          </xsl:for-each>
-          </tbody>
-        </table>
-
-        <table>
-          <thead>
-          <tr><th colspan="4">Modified</th>
-          </tr>
-            <tr>
-              <th>File</th>
-              <th>Blank</th>
-              <th>Comment</th>
-              <th>Code</th>
-            </tr>
-          </thead>
-          <tbody>
-          <xsl:for-each select="diff_results/modified/file">
-            <tr>
-              <th><xsl:value-of select="@name"/></th>
-              <td><xsl:value-of select="@blank"/></td>
-              <td><xsl:value-of select="@comment"/></td>
-              <td><xsl:value-of select="@code"/></td>
-            </tr>
-          </xsl:for-each>
-          </tbody>
-        </table>
-
-        <table>
-          <thead>
-          <tr><th colspan="4">Added</th>
-          </tr>
-            <tr>
-              <th>File</th>
-              <th>Blank</th>
-              <th>Comment</th>
-              <th>Code</th>
-            </tr>
-          </thead>
-          <tbody>
-          <xsl:for-each select="diff_results/added/file">
-            <tr>
-              <th><xsl:value-of select="@name"/></th>
-              <td><xsl:value-of select="@blank"/></td>
-              <td><xsl:value-of select="@comment"/></td>
-              <td><xsl:value-of select="@code"/></td>
-            </tr>
-          </xsl:for-each>
-          </tbody>
-        </table>
-
-        <table>
-          <thead>
-          <tr><th colspan="4">Removed</th>
-          </tr>
-            <tr>
-              <th>File</th>
-              <th>Blank</th>
-              <th>Comment</th>
-              <th>Code</th>
-            </tr>
-          </thead>
-          <tbody>
-          <xsl:for-each select="diff_results/removed/file">
-            <tr>
-              <th><xsl:value-of select="@name"/></th>
-              <td><xsl:value-of select="@blank"/></td>
-              <td><xsl:value-of select="@comment"/></td>
-              <td><xsl:value-of select="@code"/></td>
-            </tr>
-          </xsl:for-each>
-          </tbody>
-        </table>
-EO_DIFF_XSL
-# 2}}}
-    }
-
-    if (!$opt_by_file or $opt_by_file_by_lang) {
-        $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
-        <table>
-          <thead>
-          <tr><th colspan="5">Same</th>
-          </tr>
-            <tr>
-              <th>Language</th>
-              <th>Files</th>
-              <th>Blank</th>
-              <th>Comment</th>
-              <th>Code</th>
-            </tr>
-          </thead>
-          <tbody>
-          <xsl:for-each select="diff_results/same/language">
-            <tr>
-              <th><xsl:value-of select="@name"/></th>
-              <td><xsl:value-of select="@files_count"/></td>
-              <td><xsl:value-of select="@blank"/></td>
-              <td><xsl:value-of select="@comment"/></td>
-              <td><xsl:value-of select="@code"/></td>
-            </tr>
-          </xsl:for-each>
-          </tbody>
-        </table>
-
-        <table>
-          <thead>
-          <tr><th colspan="5">Modified</th>
-          </tr>
-            <tr>
-              <th>Language</th>
-              <th>Files</th>
-              <th>Blank</th>
-              <th>Comment</th>
-              <th>Code</th>
-            </tr>
-          </thead>
-          <tbody>
-          <xsl:for-each select="diff_results/modified/language">
-            <tr>
-              <th><xsl:value-of select="@name"/></th>
-              <td><xsl:value-of select="@files_count"/></td>
-              <td><xsl:value-of select="@blank"/></td>
-              <td><xsl:value-of select="@comment"/></td>
-              <td><xsl:value-of select="@code"/></td>
-            </tr>
-          </xsl:for-each>
-          </tbody>
-        </table>
-
-        <table>
-          <thead>
-          <tr><th colspan="5">Added</th>
-          </tr>
-            <tr>
-              <th>Language</th>
-              <th>Files</th>
-              <th>Blank</th>
-              <th>Comment</th>
-              <th>Code</th>
-            </tr>
-          </thead>
-          <tbody>
-          <xsl:for-each select="diff_results/added/language">
-            <tr>
-              <th><xsl:value-of select="@name"/></th>
-              <td><xsl:value-of select="@files_count"/></td>
-              <td><xsl:value-of select="@blank"/></td>
-              <td><xsl:value-of select="@comment"/></td>
-              <td><xsl:value-of select="@code"/></td>
-            </tr>
-          </xsl:for-each>
-          </tbody>
-        </table>
-
-        <table>
-          <thead>
-          <tr><th colspan="5">Removed</th>
-          </tr>
-            <tr>
-              <th>Language</th>
-              <th>Files</th>
-              <th>Blank</th>
-              <th>Comment</th>
-              <th>Code</th>
-            </tr>
-          </thead>
-          <tbody>
-          <xsl:for-each select="diff_results/removed/language">
-            <tr>
-              <th><xsl:value-of select="@name"/></th>
-              <td><xsl:value-of select="@files_count"/></td>
-              <td><xsl:value-of select="@blank"/></td>
-              <td><xsl:value-of select="@comment"/></td>
-              <td><xsl:value-of select="@code"/></td>
-            </tr>
-          </xsl:for-each>
-          </tbody>
-        </table>
-EO_DIFF_XSL
-# 2}}}
-
-    }
-
-    $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
-      </body>
-    </html>
-  </xsl:template>
-</xsl:stylesheet>
-EO_DIFF_XSL
-# 2}}}
-    if ($opt_diff) {
-        print $OUT $XSL_DIFF;
-    } else {
-        print $OUT $XSL;
-    }
-    $OUT->close();
-} # 1}}}
-sub normalize_file_names {                   # {{{1
-    my (@files, ) = @_;
-
-    # Returns a hash of file names reduced to a canonical form
-    # (fully qualified file names, all path separators changed to /,
-    # Windows file names lowercased).  Hash values are the original
-    # file name.
-
-    my %normalized = ();
-    foreach my $F (@files) {
-        my $F_norm = $F;
-        if ($ON_WINDOWS) {
-            $F_norm = lc $F_norm; # for case insensitive file name comparisons
-            $F_norm =~ s{\\}{/}g; # Windows directory separators to Unix
-            $F_norm =~ s{^\./}{}g;  # remove leading ./
-            if (($F_norm !~ m{^/}) and ($F_norm !~ m{^\w:/})) {
-                # looks like a relative path; prefix with cwd
-                $F_norm = lc "$cwd/$F_norm";
-            }
-        } else {
-            $F_norm =~ s{^\./}{}g;  # remove leading ./
-            if ($F_norm !~ m{^/}) {
-                # looks like a relative path; prefix with cwd
-                $F_norm = lc "$cwd/$F_norm";
-            }
-        }
-        # Remove trailing / so it does not interfere with further regex code
-        # that does not expect it
-        $F_norm =~ s{/+$}{};
-        $normalized{ $F_norm } = $F;
-    }
-    return %normalized;
-} # 1}}}
-sub combine_diffs {                          # {{{1
-    # subroutine by Andy (awalshe@sf.net)
-    # https://sourceforge.net/tracker/?func=detail&aid=3261017&group_id=174787&atid=870625
-    my ($ra_files) = @_;
-
-    my $res   = "$URL v $VERSION\n";
-    my $dl    = '-';
-    my $width = 79;
-    # columns are in this order
-    my @cols  = ('files', 'blank', 'comment', 'code');
-    my %HoH   = ();
-
-    foreach my $file (@{$ra_files}) {
-        my $IN = new IO::File $file, "r";
-        if (!defined $IN) {
-            warn "Unable to read $file; ignoring.\n";
-            next;
-        }
-
-        my $sec;
-        while (<$IN>) {
-            chomp;
-            s/\cM$//;
-            next if /^(http|Language|-----)/;
-            if (/^[A-Za-z0-9]+/) {        # section title
-                $sec = $_;
-                chomp($sec);
-                $HoH{$sec} = () if ! exists $HoH{$sec};
-                next;
-            }
-
-            if (/^\s(same|modified|added|removed)/) {  # calculated totals row
-                my @ar = grep { $_ ne '' } split(/ /, $_);
-                chomp(@ar);
-                my $ttl = shift @ar;
-                my $i = 0;
-                foreach(@ar) {
-                    my $t = "${ttl}${dl}${cols[$i]}";
-                    $HoH{$sec}{$t} = 0 if ! exists $HoH{$sec}{$t};
-                    $HoH{$sec}{$t} += $_;
-                    $i++;
-                }
-            }
-        }
-        $IN->close;
-    }
-
-    # rows are in this order
-    my @rows = ('same', 'modified', 'added', 'removed');
-
-    $res .= sprintf("%s\n", "-" x $width);
-    $res .= sprintf("%-19s %14s %14s %14s %14s\n", 'Language',
-                    $cols[0], $cols[1], $cols[2], $cols[3]);
-    $res .= sprintf("%s\n", "-" x $width);
-
-    for my $sec ( keys %HoH ) {
-        next if $sec =~ /SUM:/;
-        $res .= "$sec\n";
-        foreach (@rows) {
-            $res .= sprintf(" %-18s %14s %14s %14s %14s\n",
-                            $_, $HoH{$sec}{"${_}${dl}${cols[0]}"},
-                                $HoH{$sec}{"${_}${dl}${cols[1]}"},
-                                $HoH{$sec}{"${_}${dl}${cols[2]}"},
-                                $HoH{$sec}{"${_}${dl}${cols[3]}"});
-        }
-    }
-    $res .= sprintf("%s\n", "-" x $width);
-    my $sec = 'SUM:';
-    $res .= "$sec\n";
-    foreach (@rows) {
-        $res .= sprintf(" %-18s %14s %14s %14s %14s\n",
-                        $_, $HoH{$sec}{"${_}${dl}${cols[0]}"},
-                            $HoH{$sec}{"${_}${dl}${cols[1]}"},
-                            $HoH{$sec}{"${_}${dl}${cols[2]}"},
-                            $HoH{$sec}{"${_}${dl}${cols[3]}"});
-    }
-    $res .= sprintf("%s\n", "-" x $width);
-
-    return $res;
-} # 1}}}
-sub get_time {                               # {{{1
-    if ($HAVE_Time_HiRes) {
-        return Time::HiRes::time();
-    } else {
-        return time();
-    }
-} # 1}}}
-sub really_is_D {                            # {{{1
-    # Ref bug 131, files ending with .d could be init.d scripts
-    # instead of D language source files.
-    my ($file        , # in
-        $rh_Err      , # in   hash of error codes
-        $raa_errors  , # out
-       ) = @_;
-    print "-> really_is_D($file)\n" if $opt_v > 2;
-    my $possible_script = peek_at_first_line($file, $rh_Err, $raa_errors);
-
-    print "<- really_is_D($file)\n" if $opt_v > 2;
-    return $possible_script;    # null string if D, otherwise a language
-} # 1}}}
-sub no_autogen_files {                       # {{{1
-    # ref https://github.com/AlDanial/cloc/issues/151
-    my ($print,) = @_;
-    print "-> no_autogen($print)\n" if $opt_v > 2;
-
-	# These sometimes created manually?
-    #               acinclude.m4
-    #               configure.ac
-    #               Makefile.am
-
-    my @files = qw (
-                    aclocal.m4
-                    announce-gen
-                    autogen.sh
-                    bootstrap
-                    compile
-                    config.guess
-                    config.h.in
-                    config.rpath
-                    config.status
-                    config.sub
-                    configure
-                    configure.in
-                    depcomp
-                    gendocs.sh
-                    gitlog-to-changelog
-                    git-version-gen
-                    gnupload
-                    gnu-web-doc-update
-                    install-sh
-                    libtool
-                    libtool.m4
-                    link-warning.h
-                    ltmain.sh
-                    lt~obsolete.m4
-                    ltoptions.m4
-                    ltsugar.m4
-                    ltversion.in
-                    ltversion.m4
-                    Makefile.in
-                    mdate-sh
-                    missing
-                    mkinstalldirs
-                    test-driver
-                    texinfo.tex
-                    update-copyright
-                    useless-if-before-free
-                    vc-list-files
-                    ylwrap
-                   );
-
-    if ($print) {
-        printf "cloc will ignore these %d files with --no-autogen:\n", scalar @files;
-        foreach my $F (@files) {
-            print "    $F\n";
-        }
-        print "Additionally, Go files with '// Code generated by .* DO NOT EDIT.'\n";
-        print "on the first line are ignored.\n";
-    }
-    print "<- no_autogen()\n" if $opt_v > 2;
-    return @files;
-} # 1}}}
-# subroutines copied from SLOCCount
-my %lex_files    = ();  # really_is_lex()
-my %expect_files = ();  # really_is_expect()
-my %php_files    = ();  # really_is_php()
-sub really_is_lex {                          # {{{1
-# Given filename, returns TRUE if its contents really is lex.
-# lex file must have "%%", "%{", and "%}".
-# In theory, a lex file doesn't need "%{" and "%}", but in practice
-# they all have them, and requiring them avoid mislabeling a
-# non-lexfile as a lex file.
-
- my $filename = shift;
- chomp($filename);
-
- my $is_lex = 0;      # Value to determine.
- my $percent_percent = 0;
- my $percent_opencurly = 0;
- my $percent_closecurly = 0;
-
- # Return cached result, if available:
- if ($lex_files{$filename}) { return $lex_files{$filename};}
-
- open(LEX_FILE, "<$filename") ||
-      die "Can't open $filename to determine if it's lex.\n";
- while(<LEX_FILE>) {
-   $percent_percent++     if (m/^\s*\%\%/);
-   $percent_opencurly++   if (m/^\s*\%\{/);
-   $percent_closecurly++   if (m/^\s*\%\}/);
- }
- close(LEX_FILE);
-
- if ($percent_percent && $percent_opencurly && $percent_closecurly)
-          {$is_lex = 1;}
-
- $lex_files{$filename} = $is_lex; # Store result in cache.
-
- return $is_lex;
-} # 1}}}
-sub really_is_expect {                       # {{{1
-# Given filename, returns TRUE if its contents really are Expect.
-# Many "exp" files (such as in Apache and Mesa) are just "export" data,
-# summarizing something else # (e.g., its interface).
-# Sometimes (like in RPM) it's just misc. data.
-# Thus, we need to look at the file to determine
-# if it's really an "expect" file.
-
- my $filename = shift;
- chomp($filename);
-
-# The heuristic is as follows: it's Expect _IF_ it:
-# 1. has "load_lib" command and either "#" comments or {}.
-# 2. {, }, and one of: proc, if, [...], expect
-
- my $is_expect = 0;      # Value to determine.
-
- my $begin_brace = 0;  # Lines that begin with curly braces.
- my $end_brace = 0;    # Lines that begin with curly braces.
- my $load_lib = 0;     # Lines with the Load_lib command.
- my $found_proc = 0;
- my $found_if = 0;
- my $found_brackets = 0;
- my $found_expect = 0;
- my $found_pound = 0;
-
- # Return cached result, if available:
- if ($expect_files{$filename}) { return expect_files{$filename};}
-
- open(EXPECT_FILE, "<$filename") ||
-      die "Can't open $filename to determine if it's expect.\n";
- while(<EXPECT_FILE>) {
-
-   if (m/#/) {$found_pound++; s/#.*//;}
-   if (m/^\s*\{/) { $begin_brace++;}
-   if (m/\{\s*$/) { $begin_brace++;}
-   if (m/^\s*\}/) { $end_brace++;}
-   if (m/\};?\s*$/) { $end_brace++;}
-   if (m/^\s*load_lib\s+\S/) { $load_lib++;}
-   if (m/^\s*proc\s/) { $found_proc++;}
-   if (m/^\s*if\s/) { $found_if++;}
-   if (m/\[.*\]/) { $found_brackets++;}
-   if (m/^\s*expect\s/) { $found_expect++;}
- }
- close(EXPECT_FILE);
-
- if ($load_lib && ($found_pound || ($begin_brace && $end_brace)))
-          {$is_expect = 1;}
- if ( $begin_brace && $end_brace &&
-      ($found_proc || $found_if || $found_brackets || $found_expect))
-          {$is_expect = 1;}
-
- $expect_files{$filename} = $is_expect; # Store result in cache.
-
- return $is_expect;
-} # 1}}}
-sub really_is_pascal {                       # {{{1
-# Given filename, returns TRUE if its contents really are Pascal.
-
-# This isn't as obvious as it seems.
-# Many ".p" files are Perl files
-# (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p),
-# others are C extractions
-# (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p
-# and some files in linuxconf).
-# However, test files in "p2c" really are Pascal, for example.
-
-# Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p
-# is actually C code.  The heuristics determine that they're not Pascal,
-# but because it ends in ".p" it's not counted as C code either.
-# I believe this is actually correct behavior, because frankly it
-# looks like it's automatically generated (it's a bitmap expressed as code).
-# Rather than guess otherwise, we don't include it in a list of
-# source files.  Let's face it, someone who creates C files ending in ".p"
-# and expects them to be counted by default as C files in SLOCCount needs
-# their head examined.  I suggest examining their head
-# with a sucker rod (see syslogd(8) for more on sucker rods).
-
-# This heuristic counts as Pascal such files such as:
-#  /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p
-# Which is hand-generated.  We don't count woven documents now anyway,
-# so this is justifiable.
-
- my $filename = shift;
- chomp($filename);
-
-# The heuristic is as follows: it's Pascal _IF_ it has all of the following
-# (ignoring {...} and (*...*) comments):
-# 1. "^..program NAME" or "^..unit NAME",
-# 2. "procedure", "function", "^..interface", or "^..implementation",
-# 3. a "begin", and
-# 4. it ends with "end.",
-#
-# Or it has all of the following:
-# 1. "^..module NAME" and
-# 2. it ends with "end.".
-#
-# Or it has all of the following:
-# 1. "^..program NAME",
-# 2. a "begin", and
-# 3. it ends with "end.".
-#
-# The "end." requirements in particular filter out non-Pascal.
-#
-# Note (jgb): this does not detect Pascal main files in fpc, like
-# fpc-1.0.4/api/test/testterminfo.pas, which does not have "program" in
-# it
-
- my $is_pascal = 0;      # Value to determine.
-
- my $has_program = 0;
- my $has_unit = 0;
- my $has_module = 0;
- my $has_procedure_or_function = 0;
- my $found_begin = 0;
- my $found_terminating_end = 0;
- my $has_begin = 0;
-
- open(PASCAL_FILE, "<$filename") ||
-      die "Can't open $filename to determine if it's pascal.\n";
- while(<PASCAL_FILE>) {
-   s/\{.*?\}//g;  # Ignore {...} comments on this line; imperfect, but effective.
-   s/\(\*.*?\*\)//g;  # Ignore (*...*) comments on this line; imperfect, but effective.
-   if (m/\bprogram\s+[A-Za-z]/i)  {$has_program=1;}
-   if (m/\bunit\s+[A-Za-z]/i)     {$has_unit=1;}
-   if (m/\bmodule\s+[A-Za-z]/i)   {$has_module=1;}
-   if (m/\bprocedure\b/i)         { $has_procedure_or_function = 1; }
-   if (m/\bfunction\b/i)          { $has_procedure_or_function = 1; }
-   if (m/^\s*interface\s+/i)      { $has_procedure_or_function = 1; }
-   if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; }
-   if (m/\bbegin\b/i) { $has_begin = 1; }
-   # Originally I said:
-   # "This heuristic fails if there are multi-line comments after
-   # "end."; I haven't seen that in real Pascal programs:"
-   # But jgb found there are a good quantity of them in Debian, specially in
-   # fpc (at the end of a lot of files there is a multiline comment
-   # with the changelog for the file).
-   # Therefore, assume Pascal if "end." appears anywhere in the file.
-   if (m/end\.\s*$/i) {$found_terminating_end = 1;}
-#   elsif (m/\S/) {$found_terminating_end = 0;}
- }
- close(PASCAL_FILE);
-
- # Okay, we've examined the entire file looking for clues;
- # let's use those clues to determine if it's really Pascal:
-
- if ( ( ($has_unit || $has_program) && $has_procedure_or_function &&
-     $has_begin && $found_terminating_end ) ||
-      ( $has_module && $found_terminating_end ) ||
-      ( $has_program && $has_begin && $found_terminating_end ) )
-          {$is_pascal = 1;}
-
- return $is_pascal;
-} # 1}}}
-sub really_is_incpascal {                    # {{{1
-# Given filename, returns TRUE if its contents really are Pascal.
-# For .inc files (mainly seen in fpc)
-
- my $filename = shift;
- chomp($filename);
-
-# The heuristic is as follows: it is Pascal if any of the following:
-# 1. really_is_pascal returns true
-# 2. Any usual reserved word is found (program, unit, const, begin...)
-
- # If the general routine for Pascal files works, we have it
- if (really_is_pascal($filename)) {
-   return 1;
- }
-
- my $is_pascal = 0;      # Value to determine.
- my $found_begin = 0;
-
- open(PASCAL_FILE, "<$filename") ||
-      die "Can't open $filename to determine if it's pascal.\n";
- while(<PASCAL_FILE>) {
-   s/\{.*?\}//g;  # Ignore {...} comments on this line; imperfect, but effective.
-   s/\(\*.*?\*\)//g;  # Ignore (*...*) comments on this line; imperfect, but effective.
-   if (m/\bprogram\s+[A-Za-z]/i)  {$is_pascal=1;}
-   if (m/\bunit\s+[A-Za-z]/i)     {$is_pascal=1;}
-   if (m/\bmodule\s+[A-Za-z]/i)   {$is_pascal=1;}
-   if (m/\bprocedure\b/i)         {$is_pascal = 1; }
-   if (m/\bfunction\b/i)          {$is_pascal = 1; }
-   if (m/^\s*interface\s+/i)      {$is_pascal = 1; }
-   if (m/^\s*implementation\s+/i) {$is_pascal = 1; }
-   if (m/\bconstant\s+/i)         {$is_pascal=1;}
-   if (m/\bbegin\b/i) { $found_begin = 1; }
-   if ((m/end\.\s*$/i) && ($found_begin = 1)) {$is_pascal = 1;}
-   if ($is_pascal) {
-     last;
-   }
- }
-
- close(PASCAL_FILE);
- return $is_pascal;
-} # 1}}}
-sub really_is_php {                          # {{{1
-# Given filename, returns TRUE if its contents really is php.
-
- my $filename = shift;
- chomp($filename);
-
- my $is_php = 0;      # Value to determine.
- # Need to find a matching pair of surrounds, with ending after beginning:
- my $normal_surround = 0;  # <?; bit 0 = <?, bit 1 = ?>
- my $script_surround = 0;  # <script..>; bit 0 = <script language="php">
- my $asp_surround = 0;     # <%; bit 0 = <%, bit 1 = %>
-
- # Return cached result, if available:
- if ($php_files{$filename}) { return $php_files{$filename};}
-
- open(PHP_FILE, "<$filename") ||
-      die "Can't open $filename to determine if it's php.\n";
- while(<PHP_FILE>) {
-   if (m/\<\?/)                           { $normal_surround |= 1; }
-   if (m/\?\>/ && ($normal_surround & 1)) { $normal_surround |= 2; }
-   if (m/\<script.*language="?php"?/i)    { $script_surround |= 1; }
-   if (m/\<\/script\>/i && ($script_surround & 1)) { $script_surround |= 2; }
-   if (m/\<\%/)                           { $asp_surround |= 1; }
-   if (m/\%\>/ && ($asp_surround & 1)) { $asp_surround |= 2; }
- }
- close(PHP_FILE);
-
- if ( ($normal_surround == 3) || ($script_surround == 3) ||
-      ($asp_surround == 3)) {
-   $is_php = 1;
- }
-
- $php_files{$filename} = $is_php; # Store result in cache.
-
- return $is_php;
-} # 1}}}
 __END__
 mode values (stat $item)[2]
        Unix    Windows