diff --git a/codepot/README.md b/codepot/README.md index 630ea35b..fafe3830 100644 --- a/codepot/README.md +++ b/codepot/README.md @@ -336,7 +336,7 @@ jQuery JavaScript Library v1.11.2 MIT See http://jquery.org/license jQuery UI 1.9.2 MIT See http://jquery.org/license X-editable 1.5.1 MIT PHPGraphLib MIT -CLOC 1.62 GPL +CLOC 1.91 GPL Flot https://github.com/flot/flot/blob/master/LICENSE.txt Font Awesome 4.3.0 MIT & SIL OFL 1.1 D3.js 3.5.5 BSD diff --git a/codepot/etc/cloc.pl b/codepot/etc/cloc.pl index d3b9faeb..adca5f4d 100755 --- a/codepot/etc/cloc.pl +++ b/codepot/etc/cloc.pl @@ -1,14 +1,14 @@ #!/usr/bin/env perl # cloc -- Count Lines of Code {{{1 -# Copyright (C) 2006-2017 Al Danial +# Copyright (C) 2006-2021 Al Danial # First release August 2006 # # Includes code from: # - SLOCCount v2.26 # http://www.dwheeler.com/sloccount/ # by David Wheeler. -# - Regexp::Common v2013031301 -# http://search.cpan.org/~abigail/Regexp-Common-2013031301/lib/Regexp/Common.pm +# - Regexp::Common v2017060201 +# http://search.cpan.org/~abigail/Regexp-Common-2017060201/lib/Regexp/Common.pm # by Damian Conway and Abigail. # - Win32::Autoglob # http://search.cpan.org/~sburke/Win32-Autoglob-1.01/Autoglob.pm @@ -29,7 +29,7 @@ # . # # 1}}} -my $VERSION = "1.75"; # odd number == beta; even number == stable +my $VERSION = "1.91"; # odd number == beta; even number == stable my $URL = "github.com/AlDanial/cloc"; # 'https://' pushes header too wide require 5.006; # use modules {{{1 @@ -46,7 +46,6 @@ use IO::File; use List::Util qw( min max ); use Cwd; use POSIX qw { strftime ceil}; - # Parallel::ForkManager isn't in the standard distribution. # Use it only if installed, and only if --processes=N is given. # The module load happens in get_max_processes(). @@ -69,12 +68,11 @@ $HAVE_Time_HiRes = 1 if defined $Time::HiRes::VERSION; my $HAVE_Rexexp_Common; # Regexp::Common isn't in the standard distribution. It will # be installed in a temp directory if necessary. -BEGIN { - if (eval "use Regexp::Common;") { - $HAVE_Rexexp_Common = 1; - } else { - $HAVE_Rexexp_Common = 0; - } +eval "use Regexp::Common qw ( comment ) "; +if (defined $Regexp::Common::VERSION) { + $HAVE_Rexexp_Common = 1; +} else { + $HAVE_Rexexp_Common = 0; } my $HAVE_Algorith_Diff = 0; @@ -136,12 +134,26 @@ if ($ON_WINDOWS and $ENV{'SHELL'}) { $ON_WINDOWS = 1; # MKS defines $SHELL but still acts like Windows } } +my $config_file = ''; +if ( $ENV{'HOME'} ) { + $config_file = File::Spec->catfile( $ENV{'HOME'}, '.config', 'cloc', 'options.txt'); +} elsif ( $ENV{'APPDATA'} and $ON_WINDOWS ) { + $config_file = File::Spec->catfile( $ENV{'APPDATA'}, 'cloc'); +} +# $config_file may be updated by check_alternate_config_files() my $NN = chr(27) . "[0m"; # normal - $NN = "" if $ON_WINDOWS or !(-t STDERR); # -t STDERR: is it a terminal? + $NN = "" if $ON_WINDOWS or !(-t STDOUT); # -t STDOUT: is it a terminal? my $BB = chr(27) . "[1m"; # bold - $BB = "" if $ON_WINDOWS or !(-t STDERR); + $BB = "" if $ON_WINDOWS or !(-t STDOUT); my $script = basename $0; + +# Intended for v1.88: +# --git-diff-simindex Git diff strategy #3: use git's similarity index +# (git diff -M --name-status) to identify file pairs +# to compare. This is especially useful to compare +# files that were renamed between the commits. + my $brief_usage = " cloc -- Count Lines of Code @@ -159,14 +171,15 @@ Usage: Example: cloc --diff Python-3.5.tar.xz python-3.6/ $script --help shows full documentation on the options. -http://$URL has numerous examples and more information. +https://$URL has numerous examples and more information. "; my $usage = " Usage: $script [options] | | Count, or compute differences of, physical lines of source code in the - given files (may be archives such as compressed tarballs or zip files) - and/or recursively below the given directories. + given files (may be archives such as compressed tarballs or zip files, + or git commit hashes or branch names) and/or recursively below the + given directories. ${BB}Input Options${NN} --extract-with= This option is only needed if cloc is unable @@ -189,8 +202,16 @@ Usage: $script [options] | | , which has one file/directory name per line. Only exact matches are counted; relative path names will be resolved starting from - the directory where cloc is invoked. - See also --exclude-list-file. + the directory where cloc is invoked. Set + to - to read file names from a STDIN pipe. + See also --exclude-list-file, --config. + --diff-list-file= Take the pairs of file names to be diff'ed from + , whose format matches the output of + --diff-alignment. (Run with that option to + see a sample.) The language identifier at the + end of each line is ignored. This enables --diff + mode and bypasses file pair alignment logic. + See also --config. --vcs= Invoke a system call to to obtain a list of files to work on. If is 'git', then will invoke 'git ls-files' to get a file list and @@ -211,6 +232,10 @@ Usage: $script [options] | | to 'auto' selects between 'git' + and 'svn' (or neither) depending on the presence + of a .git or .svn subdirectory below the directory + where cloc is invoked. --unicode Check binary files to see if they contain Unicode expanded ASCII text. This causes performance to drop noticeably. @@ -221,6 +246,18 @@ Usage: $script [options] | | Read command line switches from instead of + the default location of $config_file. + The file should contain one switch, along with + arguments (if any), per line. Blank lines and lines + beginning with '#' are skipped. Options given on + the command line take priority over entries read from + the file. + If a directory is also given with any of these + switches: --list-file, --exclude-list-file, + --read-lang-def, --force-lang-def, --diff-list-file + and a config file exists in that directory, it will + take priority over $config_file. --count-and-diff First perform direct code counts of source file(s) of and separately, then perform a diff @@ -235,14 +272,24 @@ Usage: $script [options] | | Ignore files which take more than seconds - to process. Default is 10 seconds. - (Large files with many repeated lines can cause - Algorithm::Diff::sdiff() to take hours.) + to process. Default is 10 seconds. Setting + to 0 allows unlimited time. (Large files with many + repeated lines can cause Algorithm::Diff::sdiff() + to take hours.) See also --timeout. + --docstring-as-code cloc considers docstrings to be comments, but this is + not always correct as docstrings represent regular + strings when they appear on the right hand side of an + assignment or as function arguments. This switch + forces docstrings to be counted as code. --follow-links [Unix only] Follow symbolic links to directories (sym links to files are always followed). + See also --stat. --force-lang=[,] Process all files that have a extension with the counter for language . For @@ -260,13 +307,14 @@ Usage: $script [options] | | | | Count files without extensions using the counter. This option overrides internal logic for files without extensions (where such files @@ -314,7 +374,8 @@ Usage: $script [options] | | defines a language cloc already knows about, cloc's definition will take precedence. Use --force-lang-def to over-ride cloc's - definitions (see also --write-lang-def ). + definitions (see also --write-lang-def, + --write-lang-def-incl-dup, --config). --script-lang=, Process all files that invoke as a #! scripting language with the counter for language . For example, files that begin with @@ -330,10 +391,25 @@ Usage: $script [options] | | Skip the first lines of each file. If a + comma separated list of extensions is also given, + only skip lines from those file types. Example: + --skip-leading=10,cpp,h + will skip the first ten lines of *.cpp and *.h + files. This is useful for ignoring boilerplate + text. --skip-uniqueness Skip the file uniqueness check. This will give a performance boost at the expense of counting files with identical contents multiple times (if such duplicates exist). + --stat Some file systems (AFS, CD-ROM, FAT, HPFS, SMB) + do not have directory 'nlink' counts that match + the number of its subdirectories. Consequently + cloc may undercount or completely skip the + contents of such file systems. This switch forces + File::Find to stat directories to obtain the + correct count. File search speed will decrease. + See also --follow-links. --stdin-name= Give a file name to use to determine the language for standard input. (Use - as the input name to receive source code via STDIN.) @@ -344,21 +420,33 @@ Usage: $script [options] | | appended to it. It is written to the current directory unless --original-dir is on. + --strip-str-comments Replace comment markers embedded in strings with + 'xx'. This attempts to work around a limitation + in Regexp::Common::Comment where comment markers + embedded in strings are seen as actual comment + markers and not strings, often resulting in a + 'Complex regular subexpression recursion limit' + warning and incorrect counts. There are two + disadvantages to using this switch: 1/code count + performance drops, and 2/code generated with + --strip-comments will contain different strings + where ever embedded comments are found. --sum-reports Input arguments are report files previously - created with the --report-file option. Makes - a cumulative set of results containing the + created with the --report-file option in plain + format (eg. not JSON, YAML, XML, or SQL). + Makes a cumulative set of results containing the sum of data from the individual report files. - --processes=NUM Sets the maximum number of processes that cloc - uses. If this parameter is set to 0, multi- - processing will be disabled. On Linux and MacOS - systems, cloc creates up to one process per core - by default if a recent version of the - Parallel::ForkManager module is available. - Multiprocessing is disabled by default if cloc - is unable to determine the number of CPU cores. - Multiprocessing is not supported on Windows systems - and on systems which don't have a recent version - of the Parallel::ForkManager module. + --timeout Ignore files which take more than seconds + to process at any of the language's filter stages. + The default maximum number of seconds spent on a + filter stage is the number of lines in the file + divided by one thousand. Setting to 0 allows + unlimited time. See also --diff-timeout. + --processes=NUM [Available only on systems with a recent version + of the Parallel::ForkManager module. Not + available on Windows.] Sets the maximum number of + cores that cloc uses. The default value of 0 + disables multiprocessing. --unix Override the operating system autodetection logic and run in UNIX mode. See also --windows, --show-os. @@ -376,6 +464,8 @@ Usage: $script [options] | | Exclude files containing text that matches the given + regular expression. --exclude-dir=[,D2,] Exclude the given comma separated directories D1, D2, D3, et cetera, from being scanned. For example --exclude-dir=.cache,test will skip @@ -390,14 +480,15 @@ Usage: $script [options] | | [,[...]] Do not count files having the given file name extensions. - --exclude-lang=[,L2,] Exclude the given comma separated languages + --exclude-lang=[,L2[...]] + Exclude the given comma separated languages L1, L2, L3, et cetera, from being counted. --exclude-list-file= Ignore files and/or directories whose names appear in . should have one file name per line. Only exact matches are ignored; relative path names will be resolved starting from the directory where cloc is invoked. - See also --list-file. + See also --list-file, --config. --fullpath Modifies the behavior of --match-f, --not-match-f, and --not-match-d to include the file's path in the regex, not just the file's basename. @@ -406,8 +497,14 @@ Usage: $script [options] | | [,L2,] Count only the given comma separated languages - L1, L2, L3, et cetera. + --include-ext=[,ext2[...]] + Count only languages having the given comma + separated file extensions. Use --show-ext to + see the recognized extensions. + --include-lang=[,L2[...]] + Count only the given comma separated languages + L1, L2, L3, et cetera. Use --show-lang to see + the list of recognized languages. --match-d= Only count files in directories matching the Perl regex. For example --match-d='/(src|include)/' @@ -472,8 +569,14 @@ Usage: $script [options] | | Writes to the language processing filters then exits. Useful as a first step to creating - custom language definitions (see also - --force-lang-def, --read-lang-def). + custom language definitions. Note: languages which + map to the same file extension will be excluded. + (See also --force-lang-def, --read-lang-def). + --write-lang-def-incl-dup= + Same as --write-lang-def, but includes duplicated + extensions. This generates a problematic language + definition file because cloc will refuse to use + it until duplicates are removed. ${BB}Output Options${NN} --3 Print third-generation language output. @@ -482,11 +585,11 @@ Usage: $script [options] | | # lines of code - X = 'cm' -> # lines of code + comments - X = 'cb' -> # lines of code + blanks - X = 'cmb' -> # lines of code + comments + blanks + of X in the denominator, where X is + c meaning lines of code + cm meaning lines of code + comments + cb meaning lines of code + blanks + cmb meaning lines of code + comments + blanks For example, if using method 'c' and your code has twice as many lines of comments as lines of code, the value in the comment column will @@ -494,6 +597,13 @@ Usage: $script [options] | | Use the character as the delimiter for comma separated files instead of ,. This switch forces + --file-encoding= Write output files using the encoding instead of + the default ASCII ( = 'UTF-7'). Examples: 'UTF-16', + 'euc-kr', 'iso-8859-16'. Known encodings can be + printed with + perl -MEncode -e 'print join(\"\\n\", Encode->encodings(\":all\")), \"\\n\"' + --hide-rate Do not show line and file processing rates in the + output header. This makes output deterministic. --json Write the results as JavaScript Object Notation (JSON) formatted output. --md Write the results as Markdown-formatted text. @@ -505,6 +615,16 @@ Usage: $script [options] | | Write the results to instead of STDOUT. + --summary-cutoff=X:N Aggregate to 'Other' results having X lines + below N where X is one of + c meaning lines of code + f meaning files + m meaning lines of comments + cm meaning lines of code + comments + Appending a percent sign to N changes + the calculation from straight count to + percentage. + Ignored with --diff or --by-file. --sql= Write results as SQL create and insert statements which can be read by a database program such as SQLite. If is -, output is sent to STDOUT. @@ -514,8 +634,8 @@ Usage: $script [options] | | Use as the project identifier for the current run. Only valid with the --sql option. --sql-style= + + +

+';
+    print "<- html_header\n" if $opt_v > 2;
+} # 1}}}
+sub html_end {                               # {{{1
+return
+'
+ + +'; +} # 1}}} +sub die_unknown_lang { # {{{1 + my ($lang, $option_name) = @_; + die "Unknown language '$lang' used with $option_name option. " . + "The command\n $script --show-lang\n" . + "will print all recognized languages. Language names are " . + "case sensitive.\n" ; +} # 1}}} +sub unicode_file { # {{{1 + my $file = shift @_; + + print "-> unicode_file($file)\n" if $opt_v > 2; + return 0 if (-s $file > 2_000_000); + # don't bother trying to test binary files bigger than 2 MB + + my $IN = new IO::File $file, "r"; + if (!defined $IN) { + warn "Unable to read $file; ignoring.\n"; + return 0; + } + my @lines = <$IN>; + $IN->close; + + if (unicode_to_ascii( join('', @lines) )) { + print "<- unicode_file()\n" if $opt_v > 2; + return 1; + } else { + print "<- unicode_file()\n" if $opt_v > 2; + return 0; + } + +} # 1}}} +sub unicode_to_ascii { # {{{1 + my $string = shift @_; + + # A trivial attempt to convert UTF-16 little or big endian + # files into ASCII. These files exhibit the following byte + # sequence: + # byte 1: 255 + # byte 2: 254 + # byte 3: ord of ASCII character + # byte 4: 0 + # byte 3+i: ord of ASCII character + # byte 4+i: 0 + # or + # byte 1: 255 + # byte 2: 254 + # byte 3: 0 + # byte 4: ord of ASCII character + # byte 3+i: 0 + # byte 4+i: ord of ASCII character + # + print "-> unicode_to_ascii()\n" if $opt_v > 2; + + my $length = length $string; +#print "length=$length\n"; + return '' if $length <= 3; + my @unicode = split(//, $string); + + # check the first 100 characters (= 200 bytes) for big or + # little endian UTF-16 encoding + my $max_peek = $length < 200 ? $length : 200; + my $max_for_pass = $length < 200 ? 0.9*$max_peek/2 : 90; + my @view_1 = (); + for (my $i = 2; $i < $max_peek; $i += 2) { push @view_1, $unicode[$i] } + my @view_2 = (); + for (my $i = 3; $i < $max_peek; $i += 2) { push @view_2, $unicode[$i] } + + my $points_1 = 0; + foreach my $C (@view_1) { + ++$points_1 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13 + or ord($C) == 10 + or ord($C) == 9; + } + + my $points_2 = 0; + foreach my $C (@view_2) { + ++$points_2 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13 + or ord($C) == 10 + or ord($C) == 9; + } +#print "points 1: $points_1\n"; +#print "points 2: $points_2\n"; +#print "max_peek : $max_peek\n"; +#print "max_for_pass: $max_for_pass\n"; + + my $offset = undef; + if ($points_1 > $max_for_pass) { $offset = 2; } + elsif ($points_2 > $max_for_pass) { $offset = 3; } + else { + print "<- unicode_to_ascii() a p1=$points_1 p2=$points_2\n" if $opt_v > 2; + return ''; + } # neither big or little endian UTF-16 + + my @ascii = (); + for (my $i = $offset; $i < $length; $i += 2) { + # some compound characters are made of HT (9), LF (10), or CR (13) + # True HT, LF, CR are followed by 00; only add those. + my $L = $unicode[$i]; + if (ord($L) == 9 or ord($L) == 10 or ord($L) == 13) { + my $companion; + if ($points_1) { + last if $i+1 >= $length; + $companion = $unicode[$i+1]; + } else { + $companion = $unicode[$i-1]; + } + if (ord($companion) == 0) { + push @ascii, $L; + } else { + push @ascii, " "; # no clue what this letter is + } + } else { + push @ascii, $L; + } + } + print "<- unicode_to_ascii() b p1=$points_1 p2=$points_2\n" if $opt_v > 2; + return join("", @ascii); +} # 1}}} +sub uncompress_archive_cmd { # {{{1 + my ($archive_file, ) = @_; + + # Wrap $archive_file in single or double quotes in the system + # commands below to avoid filename chicanery (including + # spaces in the names). + + print "-> uncompress_archive_cmd($archive_file)\n" if $opt_v > 2; + my $extract_cmd = ""; + my $missing = ""; + if ($opt_extract_with) { + ( $extract_cmd = $opt_extract_with ) =~ s/>FILE -"; + } elsif ($archive_file =~ /\.tar$/ and $ON_WINDOWS) { + $extract_cmd = "tar -xf \"$archive_file\""; + } elsif (($archive_file =~ /\.tar\.(gz|Z)$/ or + $archive_file =~ /\.tgz$/ ) and !$ON_WINDOWS) { + if (external_utility_exists("gzip --version")) { + if (external_utility_exists("tar --version")) { + $extract_cmd = "gzip -dc '$archive_file' | tar xf -"; + } else { + $missing = "tar"; + } + } else { + $missing = "gzip"; + } + } elsif ($archive_file =~ /\.tar\.bz2$/ and !$ON_WINDOWS) { + if (external_utility_exists("bzip2 --help")) { + if (external_utility_exists("tar --version")) { + $extract_cmd = "bzip2 -dc '$archive_file' | tar xf -"; + } else { + $missing = "tar"; + } + } else { + $missing = "bzip2"; + } + } elsif ($archive_file =~ /\.tar\.xz$/ and !$ON_WINDOWS) { + if (external_utility_exists("unxz --version")) { + if (external_utility_exists("tar --version")) { + $extract_cmd = "unxz -dc '$archive_file' | tar xf -"; + } else { + $missing = "tar"; + } + } else { + $missing = "bzip2"; + } + } elsif ($archive_file =~ /\.tar$/ and !$ON_WINDOWS) { + $extract_cmd = "tar xf '$archive_file'"; + } elsif ($archive_file =~ /\.src\.rpm$/i and !$ON_WINDOWS) { + if (external_utility_exists("cpio --version")) { + if (external_utility_exists("rpm2cpio")) { + $extract_cmd = "rpm2cpio '$archive_file' | cpio -i"; + } else { + $missing = "rpm2cpio"; + } + } else { + $missing = "bzip2"; + } + } elsif ($archive_file =~ /\.(whl|zip)$/i and !$ON_WINDOWS) { + if (external_utility_exists("unzip")) { + $extract_cmd = "unzip -qq -d . '$archive_file'"; + } else { + $missing = "unzip"; + } + } elsif ($archive_file =~ /\.deb$/i and !$ON_WINDOWS) { + # only useful if the .deb contains source code--most + # .deb files just have compiled executables + if (external_utility_exists("dpkg-deb")) { + $extract_cmd = "dpkg-deb -x '$archive_file' ."; + } else { + $missing = "dpkg-deb"; + } + } elsif ($ON_WINDOWS and $archive_file =~ /\.(whl|zip)$/i) { + # use unzip on Windows (comes with git-for-Windows) + if (external_utility_exists("unzip")) { + $extract_cmd = "unzip -qq -d . \"$archive_file\" "; + } else { + $missing = "unzip"; + } + } + print "<- uncompress_archive_cmd\n" if $opt_v > 2; + if ($missing) { + die "Unable to expand $archive_file because external\n", + "utility '$missing' is not available.\n", + "Another possibility is to use the --extract-with option.\n"; + } else { + return $extract_cmd; + } +} +# 1}}} +sub read_list_file { # {{{1 + my ($file, ) = @_; + # reads filenames from a STDIN pipe if $file == "-" + + print "-> read_list_file($file)\n" if $opt_v > 2; + my @entry = (); + + if ($file eq "-") { + # read from a STDIN pipe + my $IN; + open($IN, $file); + if (!defined $IN) { + warn "Unable to read $file; ignoring.\n"; + return (); + } + while (<$IN>) { + next if /^\s*$/ or /^\s*#/; # skip empty or commented lines + s/\cM$//; # DOS to Unix + chomp; + push @entry, $_; + } + $IN->close; + } else { + # read from an actual file + foreach my $line (read_file($file)) { + next if $line =~ /^\s*$/ or $line =~ /^\s*#/; + $line =~ s/\cM$//; # DOS to Unix + chomp $line; + push @entry, $line; + } + } + + print "<- read_list_file\n" if $opt_v > 2; + return @entry; +} +# 1}}} +sub external_utility_exists { # {{{1 + my $exe = shift @_; + + my $success = 0; + if ($ON_WINDOWS) { + $success = 1 unless system $exe . ' > nul'; + } else { + $success = 1 unless system $exe . ' >/dev/null 2>&1'; + if (!$success) { + $success = 1 unless system "which" . " $exe" . ' >/dev/null 2>&1'; + } + } + + return $success; +} # 1}}} +sub write_xsl_file { # {{{1 + print "-> write_xsl_file\n" if $opt_v > 2; + my $XSL = # {{{2 +' + + + + + + + CLOC Results + + + +

+'; +# 2}}} + + if ($opt_by_file) { + $XSL .= #
{{{2 +' + + + + + + + +'; + $XSL .= +' + +' if $opt_3; + $XSL .= +' + + + + + + + + + +'; + $XSL .= +' + +' if $opt_3; + $XSL .= +' + + + + + + + +'; + $XSL .= +' + +' if $opt_3; + $XSL .= +' + +
FileBlankCommentCodeLanguage3rd Generation EquivalentScale
Total
+
+'; +# 2}}} + } + + if (!$opt_by_file or $opt_by_file_by_lang) { + $XSL .= #
{{{2 +' + + + + + + + +'; + $XSL .= +' + +' if $opt_3; + $XSL .= +' + + + + + + + + + +'; + $XSL .= +' + +' if $opt_3; + $XSL .= +' + + + + + + + +'; + $XSL .= +' + +' if $opt_3; + $XSL .= +' + +
LanguageFilesBlankCommentCodeScale3rd Generation Equivalent
Total
+'; +# 2}}} + } + + $XSL.= <<'EO_XSL'; # {{{2 + + +
+
+ +EO_XSL +# 2}}} + + my $XSL_DIFF = <<'EO_DIFF_XSL'; # {{{2 + + + + + + + + CLOC Results + + + +

+EO_DIFF_XSL +# 2}}} + + if ($opt_by_file) { + $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2 + + + + + + + + + + + + + + + + + + + + + +
Same
FileBlankCommentCode
+ + + + + + + + + + + + + + + + + + + + + + +
Modified
FileBlankCommentCode
+ + + + + + + + + + + + + + + + + + + + + + +
Added
FileBlankCommentCode
+ + + + + + + + + + + + + + + + + + + + + + +
Removed
FileBlankCommentCode
+EO_DIFF_XSL +# 2}}} + } + + if (!$opt_by_file or $opt_by_file_by_lang) { + $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2 + + + + + + + + + + + + + + + + + + + + + + + +
Same
LanguageFilesBlankCommentCode
+ + + + + + + + + + + + + + + + + + + + + + + + +
Modified
LanguageFilesBlankCommentCode
+ + + + + + + + + + + + + + + + + + + + + + + + +
Added
LanguageFilesBlankCommentCode
+ + + + + + + + + + + + + + + + + + + + + + + + +
Removed
LanguageFilesBlankCommentCode
+EO_DIFF_XSL +# 2}}} + + } + + $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2 + + +
+
+EO_DIFF_XSL +# 2}}} + if ($opt_diff) { + write_file($CLOC_XSL, {}, ( $XSL_DIFF ) ); + } else { + write_file($CLOC_XSL, {}, ( $XSL ) ); + } + print "<- write_xsl_file\n" if $opt_v > 2; +} # 1}}} +sub normalize_file_names { # {{{1 + print "-> normalize_file_names\n" if $opt_v > 2; + my (@files, ) = @_; + + # Returns a hash of file names reduced to a canonical form + # (fully qualified file names, all path separators changed to /, + # Windows file names lowercased). Hash values are the original + # file name. + + my %normalized = (); + foreach my $F (@files) { + my $F_norm = $F; + if ($ON_WINDOWS) { + $F_norm = lc $F_norm; # for case insensitive file name comparisons + $F_norm =~ s{\\}{/}g; # Windows directory separators to Unix + $F_norm =~ s{^\./}{}g; # remove leading ./ + if (($F_norm !~ m{^/}) and ($F_norm !~ m{^\w:/})) { + # looks like a relative path; prefix with cwd + $F_norm = lc "$cwd/$F_norm"; + } + } else { + $F_norm =~ s{^\./}{}g; # remove leading ./ + if ($F_norm !~ m{^/}) { + # looks like a relative path; prefix with cwd + $F_norm = "$cwd/$F_norm"; + } + } + # Remove trailing / so it does not interfere with further regex code + # that does not expect it + $F_norm =~ s{/+$}{}; + $normalized{ $F_norm } = $F; + } + print "<- normalize_file_names\n" if $opt_v > 2; + return %normalized; +} # 1}}} +sub combine_diffs { # {{{1 + # subroutine by Andy (awalshe@sf.net) + # https://sourceforge.net/tracker/?func=detail&aid=3261017&group_id=174787&atid=870625 + my ($ra_files) = @_; + print "-> combine_diffs\n" if $opt_v > 2; + + my $res = "$URL v $VERSION\n"; + my $dl = '-'; + my $width = 79; + # columns are in this order + my @cols = ('files', 'blank', 'comment', 'code'); + my %HoH = (); + + foreach my $file (@{$ra_files}) { + my $IN = new IO::File $file, "r"; + if (!defined $IN) { + warn "Unable to read $file; ignoring.\n"; + next; + } + + my $sec; + while (<$IN>) { + chomp; + s/\cM$//; + next if /^(http|Language|-----)/; + if (/^[A-Za-z0-9]+/) { # section title + $sec = $_; + chomp($sec); + $HoH{$sec} = () if ! exists $HoH{$sec}; + next; + } + + if (/^\s(same|modified|added|removed)/) { # calculated totals row + my @ar = grep { $_ ne '' } split(/ /, $_); + chomp(@ar); + my $ttl = shift @ar; + my $i = 0; + foreach(@ar) { + my $t = "${ttl}${dl}${cols[$i]}"; + $HoH{$sec}{$t} = 0 if ! exists $HoH{$sec}{$t}; + $HoH{$sec}{$t} += $_; + $i++; + } + } + } + $IN->close; + } + + # rows are in this order + my @rows = ('same', 'modified', 'added', 'removed'); + + $res .= sprintf("%s\n", "-" x $width); + $res .= sprintf("%-19s %14s %14s %14s %14s\n", 'Language', + $cols[0], $cols[1], $cols[2], $cols[3]); + $res .= sprintf("%s\n", "-" x $width); + + # no inputs? %HoH will be empty + return $res unless %HoH; + + for my $sec ( keys %HoH ) { + next if $sec =~ /SUM:/; + next unless defined $HoH{$sec}; # eg, the header line + $res .= "$sec\n"; + foreach (@rows) { + $res .= sprintf(" %-18s %14s %14s %14s %14s\n", + $_, $HoH{$sec}{"${_}${dl}${cols[0]}"}, + $HoH{$sec}{"${_}${dl}${cols[1]}"}, + $HoH{$sec}{"${_}${dl}${cols[2]}"}, + $HoH{$sec}{"${_}${dl}${cols[3]}"}); + } + } + $res .= sprintf("%s\n", "-" x $width); + my $sec = 'SUM:'; + $res .= "$sec\n"; + foreach (@rows) { + $res .= sprintf(" %-18s %14s %14s %14s %14s\n", + $_, $HoH{$sec}{"${_}${dl}${cols[0]}"}, + $HoH{$sec}{"${_}${dl}${cols[1]}"}, + $HoH{$sec}{"${_}${dl}${cols[2]}"}, + $HoH{$sec}{"${_}${dl}${cols[3]}"}); + } + $res .= sprintf("%s\n", "-" x $width); + + print "<- combine_diffs\n" if $opt_v > 2; + return $res; +} # 1}}} +sub combine_csv_diffs { # {{{1 + my ($delimiter, $ra_files) = @_; + print "-> combine_csv_diffs\n" if $opt_v > 2; + + my %sum = (); # sum{ language } = array of 17 values + foreach my $file (@{$ra_files}) { + my $IN = new IO::File $file, "r"; + if (!defined $IN) { + warn "Unable to read $file; ignoring.\n"; + next; + } + + my $sec; + while (<$IN>) { + next if /^Language${delimiter}\s==\sfiles${delimiter}/; + chomp; + my @words = split(/$delimiter/); + my $n_col = scalar(@words); + if ($n_col != 18) { + warn "combine_csv_diffs(): Parse failure line $. of $file\n"; + warn "Expected 18 columns, got $n_col\n"; + die; + } + my $Lang = $words[0]; + my @count = map { int($_) } @words[1..16]; + if (defined $sum{$Lang}) { + for (my $i = 0; $i < 16; $i++) { + $sum{$Lang}[$i] += $count[$i]; + } + } else { + @{$sum{$Lang}} = @count; + } + } + $IN->close; + } + + my @header = ("Language", "== files", "!= files", "+ files", "- files", + "== blank", "!= blank", "+ blank", "- blank", "== comment", + "!= comment", "+ comment", "- comment", "== code", + "!= code", "+ code", "- code", "$URL v $VERSION" ); + + my $res = join("$delimiter ", @header) . "$delimiter\n"; + foreach my $Lang (sort keys %sum) { + $res .= $Lang . "$delimiter "; + for (my $i = 0; $i < 16; $i++) { + $res .= $sum{$Lang}[$i] . "$delimiter "; + } + $res .= "\n"; + } + + print "<- combine_csv_diffs\n" if $opt_v > 2; + return $res; +} # 1}}} +sub get_time { # {{{1 + if ($HAVE_Time_HiRes) { + return Time::HiRes::time(); + } else { + return time(); + } +} # 1}}} +sub really_is_D { # {{{1 + # Ref bug 131, files ending with .d could be init.d scripts + # instead of D language source files. + my ($file , # in + $rh_Err , # in hash of error codes + $raa_errors , # out + ) = @_; + print "-> really_is_D($file)\n" if $opt_v > 2; + my ($possible_script, $L) = peek_at_first_line($file, $rh_Err, $raa_errors); + + print "<- really_is_D($file)\n" if $opt_v > 2; + return $possible_script; # null string if D, otherwise a language +} # 1}}} +sub no_autogen_files { # {{{1 + # ref https://github.com/AlDanial/cloc/issues/151 + my ($print,) = @_; + print "-> no_autogen($print)\n" if $opt_v > 2; + + # These sometimes created manually? + # acinclude.m4 + # configure.ac + # Makefile.am + + my @files = qw ( + aclocal.m4 + announce-gen + autogen.sh + bootstrap + compile + config.guess + config.h.in + config.rpath + config.status + config.sub + configure + configure.in + depcomp + gendocs.sh + gitlog-to-changelog + git-version-gen + gnupload + gnu-web-doc-update + install-sh + libtool + libtool.m4 + link-warning.h + ltmain.sh + lt~obsolete.m4 + ltoptions.m4 + ltsugar.m4 + ltversion.in + ltversion.m4 + Makefile.in + mdate-sh + missing + mkinstalldirs + test-driver + texinfo.tex + update-copyright + useless-if-before-free + vc-list-files + ylwrap + ); + + if ($print) { + printf "cloc will ignore these %d files with --no-autogen:\n", scalar @files; + foreach my $F (@files) { + print " $F\n"; + } + print "Additionally, Go files with '// Code generated by .* DO NOT EDIT.'\n"; + print "on the first line are ignored.\n"; + } + print "<- no_autogen()\n" if $opt_v > 2; + return @files; +} # 1}}} +sub load_from_config_file { # {{{1 + # Supports all options except --config itself which would + # be pointless. + my ($config_file, + $rs_by_file , + $rs_by_file_by_lang , + $rs_categorized , + $rs_counted , + $rs_include_ext , + $rs_include_lang , + $rs_exclude_content , + $rs_exclude_lang , + $rs_exclude_dir , + $rs_exclude_list_file , + $rs_explain , + $rs_extract_with , + $rs_found , + $rs_count_diff , + $rs_diff , + $rs_diff_alignment , + $rs_diff_timeout , + $rs_timeout , + $rs_html , + $rs_ignored , + $rs_quiet , + $rs_force_lang_def , + $rs_read_lang_def , + $rs_show_ext , + $rs_show_lang , + $rs_progress_rate , + $rs_print_filter_stages , + $rs_report_file , + $ra_script_lang , + $rs_sdir , + $rs_skip_uniqueness , + $rs_strip_comments , + $rs_original_dir , + $rs_sum_reports , + $rs_hide_rate , + $rs_processes , + $rs_unicode , + $rs_3 , + $rs_v , + $rs_vcs , + $rs_version , + $rs_write_lang_def , + $rs_write_lang_def_incl_dup, + $rs_xml , + $rs_xsl , + $ra_force_lang , + $rs_lang_no_ext , + $rs_yaml , + $rs_csv , + $rs_csv_delimiter , + $rs_json , + $rs_md , + $rs_fullpath , + $rs_match_f , + $rs_not_match_f , + $rs_match_d , + $rs_not_match_d , + $rs_list_file , + $rs_help , + $rs_skip_win_hidden , + $rs_read_binary_files , + $rs_sql , + $rs_sql_project , + $rs_sql_append , + $rs_sql_style , + $rs_inline , + $rs_exclude_ext , + $rs_ignore_whitespace , + $rs_ignore_case , + $rs_ignore_case_ext , + $rs_follow_links , + $rs_autoconf , + $rs_sum_one , + $rs_by_percent , + $rs_stdin_name , + $rs_force_on_windows , + $rs_force_on_unix , + $rs_show_os , + $rs_skip_archive , + $rs_max_file_size , + $rs_use_sloccount , + $rs_no_autogen , + $rs_force_git , + $rs_strip_str_comments , + $rs_file_encoding , + $rs_docstring_as_code , + $rs_stat , + ) = @_; + # look for runtime configuration file in + # $ENV{'HOME'}/.config/cloc/options.txt -> POSIX + # $ENV{'APPDATA'} . 'cloc' + + print "-> load_from_config_file($config_file)\n" if $opt_v and $opt_v > 2; + if (!-f $config_file) { + print "<- load_from_config_file() (no such file: $config_file)\n" if $opt_v and $opt_v > 2; + return; + } elsif (!-r $config_file) { + print "<- load_from_config_file() (unable to read $config_file)\n" if $opt_v and $opt_v > 2; + return; + } + print "Reading options from $config_file.\n" if defined $opt_v; + + my $has_force_lang = @{$ra_force_lang}; + my $has_script_lang = @{$ra_script_lang}; + my @lines = read_file($config_file); + foreach (@lines) { + next if /^\s*$/ or /^\s*#/; + s/\s*--//; + s/^\s+//; + if (!defined ${$rs_by_file} and /^(by_file|by-file)/) { ${$rs_by_file} = 1; + } elsif (!defined ${$rs_by_file_by_lang} and /^(by_file_by_lang|by-file-by-lang)/) { ${$rs_by_file_by_lang} = 1; + } elsif (!defined ${$rs_categorized} and /^categorized(=|\s+)(.*?)$/) { ${$rs_categorized} = $2; + } elsif (!defined ${$rs_counted} and /^counted(=|\s+)(.*?)$/) { ${$rs_counted} = $2; + } elsif (!defined ${$rs_include_ext} and /^(?:include_ext|include-ext)(=|\s+)(.*?)$/) { ${$rs_include_ext} = $2; + } elsif (!defined ${$rs_include_lang} and /^(?:include_lang|include-lang)(=|\s+)(.*?)$/) { ${$rs_include_lang} = $2; + } elsif (!defined ${$rs_exclude_content} and /^(?:exclude_content|exclude-content)(=|\s+)(.*?)$/) { ${$rs_exclude_content} = $2; + } elsif (!defined ${$rs_exclude_lang} and /^(?:exclude_lang|exclude-lang)(=|\s+)(.*?)$/) { ${$rs_exclude_lang} = $2; + } elsif (!defined ${$rs_exclude_dir} and /^(?:exclude_dir|exclude-dir)(=|\s+)(.*?)$/) { ${$rs_exclude_dir} = $2; + } elsif (!defined ${$rs_explain} and /^explain(=|\s+)(.*?)$/) { ${$rs_explain} = $2; + } elsif (!defined ${$rs_extract_with} and /^(?:extract_with|extract-with)(=|\s+)(.*?)$/) { ${$rs_extract_with} = $2; + } elsif (!defined ${$rs_found} and /^found(=|\s+)(.*?)$/) { ${$rs_found} = $2; + } elsif (!defined ${$rs_count_diff} and /^(count_and_diff|count-and-diff)/) { ${$rs_count_diff} = 1; + } elsif (!defined ${$rs_diff} and /^diff/) { ${$rs_diff} = 1; + } elsif (!defined ${$rs_diff_alignment} and /^(?:diff-alignment|diff_alignment)(=|\s+)(.*?)$/) { ${$rs_diff_alignment} = $2; + } elsif (!defined ${$rs_diff_timeout} and /^(?:diff-timeout|diff_timeout)(=|\s+)i/) { ${$rs_diff_timeout} = $1; + } elsif (!defined ${$rs_timeout} and /^timeout(=|\s+)i/) { ${$rs_timeout} = $1; + } elsif (!defined ${$rs_html} and /^html/) { ${$rs_html} = 1; + } elsif (!defined ${$rs_ignored} and /^ignored(=|\s+)(.*?)$/) { ${$rs_ignored} = $2; + } elsif (!defined ${$rs_quiet} and /^quiet/) { ${$rs_quiet} = 1; + } elsif (!defined ${$rs_force_lang_def} and /^(?:force_lang_def|force-lang-def)(=|\s+)(.*?)$/) { ${$rs_force_lang_def} = $2; + } elsif (!defined ${$rs_read_lang_def} and /^(?:read_lang_def|read-lang-def)(=|\s+)(.*?)$/) { ${$rs_read_lang_def} = $2; + } elsif (!defined ${$rs_progress_rate} and /^(?:progress_rate|progress-rate)(=|\s+)(\d+)/) { ${$rs_progress_rate} = $2; + } elsif (!defined ${$rs_print_filter_stages} and /^(print_filter_stages|print-filter-stages)/) { ${$rs_print_filter_stages}= 1; + } elsif (!defined ${$rs_report_file} and /^(?:report_file|report-file)(=|\s+)(.*?)$/) { ${$rs_report_file} = $2; + } elsif (!defined ${$rs_report_file} and /^out(=|\s+)(.*?)$/) { ${$rs_report_file} = $2; + } elsif (!defined ${$rs_sdir} and /^sdir(=|\s+)(.*?)$/) { ${$rs_sdir} = $2; + } elsif (!defined ${$rs_skip_uniqueness} and /^(skip_uniqueness|skip-uniqueness)/) { ${$rs_skip_uniqueness} = 1; + } elsif (!defined ${$rs_strip_comments} and /^(?:strip_comments|strip-comments)(=|\s+)(.*?)$/) { ${$rs_strip_comments} = $2; + } elsif (!defined ${$rs_original_dir} and /^(original_dir|original-dir)/) { ${$rs_original_dir} = 1; + } elsif (!defined ${$rs_sum_reports} and /^(sum_reports|sum-reports)/) { ${$rs_sum_reports} = 1; + } elsif (!defined ${$rs_hide_rate} and /^(hid_rate|hide-rate)/) { ${$rs_hide_rate} = 1; + } elsif (!defined ${$rs_processes} and /^processes(=|\s+)(\d+)/) { ${$rs_processes} = $2; + } elsif (!defined ${$rs_unicode} and /^unicode/) { ${$rs_unicode} = 1; + } elsif (!defined ${$rs_3} and /^3/) { ${$rs_3} = 1; + } elsif (!defined ${$rs_vcs} and /^vcs(=|\s+)(\S+)/) { ${$rs_vcs} = $2; + } elsif (!defined ${$rs_version} and /^version/) { ${$rs_version} = 1; + } elsif (!defined ${$rs_write_lang_def} and /^(?:write_lang_def|write-lang-def)(=|\s+)(.*?)$/) { ${$rs_write_lang_def} = $2; + } elsif (!defined ${$rs_write_lang_def_incl_dup} and /^(?:write_lang_def_incl_dup|write-lang-def-incl-dup)(=|\s+)(.*?)$/) { ${$rs_write_lang_def_incl_dup} = $2; + } elsif (!defined ${$rs_xml} and /^xml/) { ${$rs_xml} = 1; + } elsif (!defined ${$rs_xsl} and /^xsl(=|\s+)(.*?)$/) { ${$rs_xsl} = $2; + } elsif (!defined ${$rs_lang_no_ext} and /^(?:lang_no_ext|lang-no-ext)(=|\s+)(.*?)$/) { ${$rs_lang_no_ext} = $2; + } elsif (!defined ${$rs_yaml} and /^yaml/) { ${$rs_yaml} = 1; + } elsif (!defined ${$rs_csv} and /^csv/) { ${$rs_csv} = 1; + } elsif (!defined ${$rs_csv_delimiter} and /^(?:csv_delimiter|csv-delimiter)(=|\s+)(.*?)$/) { ${$rs_csv_delimiter} = $2; + } elsif (!defined ${$rs_json} and /^json/) { ${$rs_json} = 1; + } elsif (!defined ${$rs_md} and /^md/) { ${$rs_md} = 1; + } elsif (!defined ${$rs_fullpath} and /^fullpath/) { ${$rs_fullpath} = 1; + } elsif (!defined ${$rs_match_f} and /^(?:match_f|match-f)(=|\s+)(.*?)$/) { ${$rs_match_f} = $2; + } elsif (!defined ${$rs_not_match_f} and /^(?:not_match_f|not-match-f)(=|\s+)(.*?)$/) { ${$rs_not_match_f} = $2; + } elsif (!defined ${$rs_match_d} and /^(?:match_d|match-d)(=|\s+)(.*?)$/) { ${$rs_match_d} = $2; + } elsif (!defined ${$rs_not_match_d} and /^(?:not_match_d|not-match-d)(=|\s+)(.*?)$/) { ${$rs_not_match_d} = $2; + } elsif (!defined ${$rs_list_file} and /^(?:list_file|list-file)(=|\s+)(.*?)$/) { ${$rs_list_file} = $2; + } elsif (!defined ${$rs_help} and /^help/) { ${$rs_help} = 1; + } elsif (!defined ${$rs_skip_win_hidden} and /^(skip_win_hidden|skip-win-hidden)/) { ${$rs_skip_win_hidden} = 1; + } elsif (!defined ${$rs_read_binary_files} and /^(read_binary_files|read-binary-files)/) { ${$rs_read_binary_files} = 1; + } elsif (!defined ${$rs_sql} and /^sql(=|\s+)(.*?)$/) { ${$rs_sql} = $2; + } elsif (!defined ${$rs_sql_project} and /^(?:sql_project|sql-project)(=|\s+)(.*?)$/) { ${$rs_sql_project} = $2; + } elsif (!defined ${$rs_sql_append} and /^(sql_append|sql-append)/) { ${$rs_sql_append} = 1; + } elsif (!defined ${$rs_sql_style} and /^(?:sql_style|sql-style)(=|\s+)(.*?)$/) { ${$rs_sql_style} = $2; + } elsif (!defined ${$rs_inline} and /^inline/) { ${$rs_inline} = 1; + } elsif (!defined ${$rs_exclude_ext} and /^(?:exclude_ext|exclude-ext)(=|\s+)(.*?)$/) { ${$rs_exclude_ext} = $2; + } elsif (!defined ${$rs_ignore_whitespace} and /^(ignore_whitespace|ignore-whitespace)/) { ${$rs_ignore_whitespace} = 1; + } elsif (!defined ${$rs_ignore_case_ext} and /^(ignore_case_ext|ignore-case-ext)/) { ${$rs_ignore_case_ext} = 1; + } elsif (!defined ${$rs_ignore_case} and /^(ignore_case|ignore-case)/) { ${$rs_ignore_case} = 1; + } elsif (!defined ${$rs_follow_links} and /^(follow_links|follow-links)/) { ${$rs_follow_links} = 1; + } elsif (!defined ${$rs_autoconf} and /^autoconf/) { ${$rs_autoconf} = 1; + } elsif (!defined ${$rs_sum_one} and /^(sum_one|sum-one)/) { ${$rs_sum_one} = 1; + } elsif (!defined ${$rs_by_percent} and /^(?:by_percent|by-percent)(=|\s+)(.*?)$/) { ${$rs_by_percent} = $2; + } elsif (!defined ${$rs_stdin_name} and /^(?:stdin_name|stdin-name)(=|\s+)(.*?)$/) { ${$rs_stdin_name} = $2; + } elsif (!defined ${$rs_force_on_windows} and /^windows/) { ${$rs_force_on_windows} = 1; + } elsif (!defined ${$rs_force_on_unix} and /^unix/) { ${$rs_force_on_unix} = 1; + } elsif (!defined ${$rs_show_os} and /^(show_os|show-os)/) { ${$rs_show_os} = 1; + } elsif (!defined ${$rs_skip_archive} and /^(?:skip_archive|skip-archive)(=|\s+)(.*?)$/) { ${$rs_skip_archive} = $2; + } elsif (!defined ${$rs_max_file_size} and /^(?:max_file_size|max-file-size)(=|\s+)(\d+)/) { ${$rs_max_file_size} = $2; + } elsif (!defined ${$rs_use_sloccount} and /^(use_sloccount|use-sloccount)/) { ${$rs_use_sloccount} = 1; + } elsif (!defined ${$rs_no_autogen} and /^(no_autogen|no-autogen)/) { ${$rs_no_autogen} = 1; + } elsif (!defined ${$rs_force_git} and /^git/) { ${$rs_force_git} = 1; + } elsif (!defined ${$rs_exclude_list_file} and /^(?:exclude_list_file|exclude-list-file)(=|\s+)(.*?)$/) + { ${$rs_exclude_list_file} = $2; + } elsif (!defined ${$rs_v} and /(verbose|v)((=|\s+)(\d+))?/) { + if (!defined $4) { ${$rs_v} = 0; } + else { ${$rs_v} = $4; } + } elsif (!$has_script_lang and /^(?:script_lang|script-lang)(=|\s+)(.*?)$/) { + push @{$ra_script_lang} , $2; + } elsif (!$has_force_lang and /^(?:force_lang|force-lang)(=|\s+)(.*?)$/) { + push @{$ra_force_lang} , $2; + } elsif (!defined ${$rs_show_ext} and /^(show_ext|show-ext)((=|\s+)(.*))?$/) { + if (!defined $4) { ${$rs_show_ext} = 0; } + else { ${$rs_show_ext} = $4; } + } elsif (!defined ${$rs_show_lang} and /^(show_lang|show-lang)((=|\s+)(.*))?s/){ + if (!defined $4) { ${$rs_show_lang} = 0; } + else { ${$rs_show_lang} = $4; } + } elsif (!defined ${$rs_strip_str_comments} and /^(strip_str_comments|strip-str-comments)/) { ${$rs_strip_str_comments} = 1; + } elsif (!defined ${$rs_file_encoding} and /^(?:file_encoding|file-encoding)(=|\s+)(\S+)/) { ${$rs_file_encoding} = $2; + } elsif (!defined ${$rs_docstring_as_code} and /^(docstring_as_code|docstring-as-code)/) { ${$rs_docstring_as_code} = 1; + } elsif (!defined ${$rs_stat} and /stat/) { ${$rs_stat} = 1; + } + + } +} # 1}}} +sub trick_pp_packer_encode { # {{{1 + use Encode; + # PAR::Packer gives 'Unknown PerlIO layer "encoding"' unless it is + # forced into using this module. + my ($OUT, $JunkFile) = tempfile(UNLINK => 1); # delete on exit + open($OUT, "> :encoding(utf8)", $JunkFile); + close($OUT); +} +# 1}}} +sub really_is_smarty { # {{{1 + # Given filename, returns TRUE if its contents look like Smarty template + my ($filename, ) = @_; + + print "-> really_is_smarty($filename)\n" if $opt_v > 2; + + my @lines = read_file($filename); + + my $points = 0; + foreach my $L (@lines) { + if (($L =~ /\{(if|include)\s/) or + ($L =~ /\{\/if\}/) or + ($L =~ /(\{\*|\*\})/) or + ($L =~ /\{\$\w/)) { + ++$points; + } + last if $points >= 2; + } + print "<- really_is_smarty(points=$points)\n" if $opt_v > 2; + return $points >= 2; +} # 1}}} +sub check_alternate_config_files { # {{{1 + my ($list_file, $exclude_list_file, $read_lang_def, + $force_lang_def, $diff_list_file, ) = @_; + my $found_it = ""; + foreach my $file ($list_file, + $exclude_list_file, + $read_lang_def, + $force_lang_def, + $diff_list_file ) { + next unless defined $file; + my $dir = dirname $file; + next unless -r $dir and -d $dir; + my $bn = basename $config_file; + if (-r "$dir/$bn") { + $found_it = "$dir/$bn"; + print "Using configuration file $found_it\n" if $opt_v; + last; + } + } + return $found_it; +} +# 1}}} +# really_is_pascal, really_is_incpascal, really_is_php from SLOCCount +my %php_files = (); # really_is_php() +sub really_is_pascal { # {{{1 +# Given filename, returns TRUE if its contents really are Pascal. + +# This isn't as obvious as it seems. +# Many ".p" files are Perl files +# (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p), +# others are C extractions +# (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p +# and some files in linuxconf). +# However, test files in "p2c" really are Pascal, for example. + +# Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p +# is actually C code. The heuristics determine that they're not Pascal, +# but because it ends in ".p" it's not counted as C code either. +# I believe this is actually correct behavior, because frankly it +# looks like it's automatically generated (it's a bitmap expressed as code). +# Rather than guess otherwise, we don't include it in a list of +# source files. Let's face it, someone who creates C files ending in ".p" +# and expects them to be counted by default as C files in SLOCCount needs +# their head examined. I suggest examining their head +# with a sucker rod (see syslogd(8) for more on sucker rods). + +# This heuristic counts as Pascal such files such as: +# /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p +# Which is hand-generated. We don't count woven documents now anyway, +# so this is justifiable. + + my $filename = shift; + chomp($filename); + +# The heuristic is as follows: it's Pascal _IF_ it has all of the following +# (ignoring {...} and (*...*) comments): +# 1. "^..program NAME" or "^..unit NAME", +# 2. "procedure", "function", "^..interface", or "^..implementation", +# 3. a "begin", and +# 4. it ends with "end.", +# +# Or it has all of the following: +# 1. "^..module NAME" and +# 2. it ends with "end.". +# +# Or it has all of the following: +# 1. "^..program NAME", +# 2. a "begin", and +# 3. it ends with "end.". +# +# The "end." requirements in particular filter out non-Pascal. +# +# Note (jgb): this does not detect Pascal main files in fpc, like +# fpc-1.0.4/api/test/testterminfo.pas, which does not have "program" in +# it + + my $is_pascal = 0; # Value to determine. + + my $has_program = 0; + my $has_unit = 0; + my $has_module = 0; + my $has_procedure_or_function = 0; + my $found_begin = 0; + my $found_terminating_end = 0; + my $has_begin = 0; + + open(PASCAL_FILE, "<$filename") || + die "Can't open $filename to determine if it's pascal.\n"; + while() { + s/\{.*?\}//g; # Ignore {...} comments on this line; imperfect, but effective. + s/\(\*.*?\*\)//g; # Ignore (*...*) comments on this line; imperfect, but effective. + if (m/\bprogram\s+[A-Za-z]/i) {$has_program=1;} + if (m/\bunit\s+[A-Za-z]/i) {$has_unit=1;} + if (m/\bmodule\s+[A-Za-z]/i) {$has_module=1;} + if (m/\bprocedure\b/i) { $has_procedure_or_function = 1; } + if (m/\bfunction\b/i) { $has_procedure_or_function = 1; } + if (m/^\s*interface\s+/i) { $has_procedure_or_function = 1; } + if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; } + if (m/\bbegin\b/i) { $has_begin = 1; } + # Originally I said: + # "This heuristic fails if there are multi-line comments after + # "end."; I haven't seen that in real Pascal programs:" + # But jgb found there are a good quantity of them in Debian, specially in + # fpc (at the end of a lot of files there is a multiline comment + # with the changelog for the file). + # Therefore, assume Pascal if "end." appears anywhere in the file. + if (m/end\.\s*$/i) {$found_terminating_end = 1;} +# elsif (m/\S/) {$found_terminating_end = 0;} + } + close(PASCAL_FILE); + + # Okay, we've examined the entire file looking for clues; + # let's use those clues to determine if it's really Pascal: + + if ( ( ($has_unit || $has_program) && $has_procedure_or_function && + $has_begin && $found_terminating_end ) || + ( $has_module && $found_terminating_end ) || + ( $has_program && $has_begin && $found_terminating_end ) ) + {$is_pascal = 1;} + + return $is_pascal; +} # 1}}} +sub really_is_incpascal { # {{{1 +# Given filename, returns TRUE if its contents really are Pascal. +# For .inc files (mainly seen in fpc) + + my $filename = shift; + chomp($filename); + +# The heuristic is as follows: it is Pascal if any of the following: +# 1. really_is_pascal returns true +# 2. Any usual reserved word is found (program, unit, const, begin...) + + # If the general routine for Pascal files works, we have it + if (really_is_pascal($filename)) { + return 1; + } + + my $is_pascal = 0; # Value to determine. + my $found_begin = 0; + + open(PASCAL_FILE, "<$filename") || + die "Can't open $filename to determine if it's pascal.\n"; + while() { + s/\{.*?\}//g; # Ignore {...} comments on this line; imperfect, but effective. + s/\(\*.*?\*\)//g; # Ignore (*...*) comments on this line; imperfect, but effective. + if (m/\bprogram\s+[A-Za-z]/i) {$is_pascal=1;} + if (m/\bunit\s+[A-Za-z]/i) {$is_pascal=1;} + if (m/\bmodule\s+[A-Za-z]/i) {$is_pascal=1;} + if (m/\bprocedure\b/i) {$is_pascal = 1; } + if (m/\bfunction\b/i) {$is_pascal = 1; } + if (m/^\s*interface\s+/i) {$is_pascal = 1; } + if (m/^\s*implementation\s+/i) {$is_pascal = 1; } + if (m/\bconstant\s+/i) {$is_pascal=1;} + if (m/\bbegin\b/i) { $found_begin = 1; } + if ((m/end\.\s*$/i) && ($found_begin = 1)) {$is_pascal = 1;} + if ($is_pascal) { + last; + } + } + + close(PASCAL_FILE); + return $is_pascal; +} # 1}}} +sub really_is_php { # {{{1 +# Given filename, returns TRUE if its contents really is php. + + my $filename = shift; + chomp($filename); + + my $is_php = 0; # Value to determine. + # Need to find a matching pair of surrounds, with ending after beginning: + my $normal_surround = 0; # + my $script_surround = 0; # ; bit 0 =