diff options
Diffstat (limited to 'third_party/git/contrib/stats')
-rwxr-xr-x | third_party/git/contrib/stats/git-common-hash | 26 | ||||
-rwxr-xr-x | third_party/git/contrib/stats/mailmap.pl | 70 | ||||
-rwxr-xr-x | third_party/git/contrib/stats/packinfo.pl | 212 |
3 files changed, 0 insertions, 308 deletions
diff --git a/third_party/git/contrib/stats/git-common-hash b/third_party/git/contrib/stats/git-common-hash deleted file mode 100755 index e27fd088be1b..000000000000 --- a/third_party/git/contrib/stats/git-common-hash +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh - -# This script displays the distribution of longest common hash prefixes. -# This can be used to determine the minimum prefix length to use -# for object names to be unique. - -git rev-list --objects --all | sort | perl -lne ' - substr($_, 40) = ""; - # uncomment next line for a distribution of bits instead of hex chars - # $_ = unpack("B*",pack("H*",$_)); - if (defined $p) { - ($p ^ $_) =~ /^(\0*)/; - $common = length $1; - if (defined $pcommon) { - $count[$pcommon > $common ? $pcommon : $common]++; - } else { - $count[$common]++; # first item - } - } - $p = $_; - $pcommon = $common; - END { - $count[$common]++; # last item - print "$_: $count[$_]" for 0..$#count; - } -' diff --git a/third_party/git/contrib/stats/mailmap.pl b/third_party/git/contrib/stats/mailmap.pl deleted file mode 100755 index 9513f5e35b44..000000000000 --- a/third_party/git/contrib/stats/mailmap.pl +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/perl - -use warnings 'all'; -use strict; -use Getopt::Long; - -my $match_emails; -my $match_names; -my $order_by = 'count'; -Getopt::Long::Configure(qw(bundling)); -GetOptions( - 'emails|e!' => \$match_emails, - 'names|n!' => \$match_names, - 'count|c' => sub { $order_by = 'count' }, - 'time|t' => sub { $order_by = 'stamp' }, -) or exit 1; -$match_emails = 1 unless $match_names; - -my $email = {}; -my $name = {}; - -open(my $fh, '-|', "git log --format='%at <%aE> %aN'"); -while(<$fh>) { - my ($t, $e, $n) = /(\S+) <(\S+)> (.*)/; - mark($email, $e, $n, $t); - mark($name, $n, $e, $t); -} -close($fh); - -if ($match_emails) { - foreach my $e (dups($email)) { - foreach my $n (vals($email->{$e})) { - show($n, $e, $email->{$e}->{$n}); - } - print "\n"; - } -} -if ($match_names) { - foreach my $n (dups($name)) { - foreach my $e (vals($name->{$n})) { - show($n, $e, $name->{$n}->{$e}); - } - print "\n"; - } -} -exit 0; - -sub mark { - my ($h, $k, $v, $t) = @_; - my $e = $h->{$k}->{$v} ||= { count => 0, stamp => 0 }; - $e->{count}++; - $e->{stamp} = $t unless $t < $e->{stamp}; -} - -sub dups { - my $h = shift; - return grep { keys($h->{$_}) > 1 } keys($h); -} - -sub vals { - my $h = shift; - return sort { - $h->{$b}->{$order_by} <=> $h->{$a}->{$order_by} - } keys($h); -} - -sub show { - my ($n, $e, $h) = @_; - print "$n <$e> ($h->{$order_by})\n"; -} diff --git a/third_party/git/contrib/stats/packinfo.pl b/third_party/git/contrib/stats/packinfo.pl deleted file mode 100755 index be188c0f11db..000000000000 --- a/third_party/git/contrib/stats/packinfo.pl +++ /dev/null @@ -1,212 +0,0 @@ -#!/usr/bin/perl -# -# This tool will print vaguely pretty information about a pack. It -# expects the output of "git verify-pack -v" as input on stdin. -# -# $ git verify-pack -v | packinfo.pl -# -# This prints some full-pack statistics; currently "all sizes", "all -# path sizes", "tree sizes", "tree path sizes", and "depths". -# -# * "all sizes" stats are across every object size in the file; -# full sizes for base objects, and delta size for deltas. -# * "all path sizes" stats are across all object's "path sizes". -# A path size is the sum of the size of the delta chain, including the -# base object. In other words, it's how many bytes need be read to -# reassemble the file from deltas. -# * "tree sizes" are object sizes grouped into delta trees. -# * "tree path sizes" are path sizes grouped into delta trees. -# * "depths" should be obvious. -# -# When run as: -# -# $ git verify-pack -v | packinfo.pl -tree -# -# the trees of objects are output along with the stats. This looks -# like: -# -# 0 commit 031321c6... 803 803 -# -# 0 blob 03156f21... 1767 1767 -# 1 blob f52a9d7f... 10 1777 -# 2 blob a8cc5739... 51 1828 -# 3 blob 660e90b1... 15 1843 -# 4 blob 0cb8e3bb... 33 1876 -# 2 blob e48607f0... 311 2088 -# size: count 6 total 2187 min 10 max 1767 mean 364.50 median 51 std_dev 635.85 -# path size: count 6 total 11179 min 1767 max 2088 mean 1863.17 median 1843 std_dev 107.26 -# -# The first number after the sha1 is the object size, the second -# number is the path size. The statistics are across all objects in -# the previous delta tree. Obviously they are omitted for trees of -# one object. -# -# When run as: -# -# $ git verify-pack -v | packinfo.pl -tree -filenames -# -# it adds filenames to the tree. Getting this information is slow: -# -# 0 blob 03156f21... 1767 1767 Documentation/git-lost-found.txt @ tags/v1.2.0~142 -# 1 blob f52a9d7f... 10 1777 Documentation/git-lost-found.txt @ tags/v1.5.0-rc1~74 -# 2 blob a8cc5739... 51 1828 Documentation/git-lost+found.txt @ tags/v0.99.9h^0 -# 3 blob 660e90b1... 15 1843 Documentation/git-lost+found.txt @ master~3222^2~2 -# 4 blob 0cb8e3bb... 33 1876 Documentation/git-lost+found.txt @ master~3222^2~3 -# 2 blob e48607f0... 311 2088 Documentation/git-lost-found.txt @ tags/v1.5.2-rc3~4 -# size: count 6 total 2187 min 10 max 1767 mean 364.50 median 51 std_dev 635.85 -# path size: count 6 total 11179 min 1767 max 2088 mean 1863.17 median 1843 std_dev 107.26 -# -# When run as: -# -# $ git verify-pack -v | packinfo.pl -dump -# -# it prints out "sha1 size pathsize depth" for each sha1 in lexical -# order. -# -# 000079a2eaef17b7eae70e1f0f635557ea67b644 30 472 7 -# 00013cafe6980411aa6fdd940784917b5ff50f0a 44 1542 4 -# 000182eacf99cde27d5916aa415921924b82972c 499 499 0 -# ... -# -# This is handy for comparing two packs. Adding "-filenames" will add -# filenames, as per "-tree -filenames" above. - -use strict; -use Getopt::Long; - -my $filenames = 0; -my $tree = 0; -my $dump = 0; -GetOptions("tree" => \$tree, - "filenames" => \$filenames, - "dump" => \$dump); - -my %parents; -my %children; -my %sizes; -my @roots; -my %paths; -my %types; -my @commits; -my %names; -my %depths; -my @depths; - -while (<STDIN>) { - my ($sha1, $type, $size, $space, $offset, $depth, $parent) = split(/\s+/, $_); - next unless ($sha1 =~ /^[0-9a-f]{40}$/); - $depths{$sha1} = $depth || 0; - push(@depths, $depth || 0); - push(@commits, $sha1) if ($type eq 'commit'); - push(@roots, $sha1) unless $parent; - $parents{$sha1} = $parent; - $types{$sha1} = $type; - push(@{$children{$parent}}, $sha1); - $sizes{$sha1} = $size; -} - -if ($filenames && ($tree || $dump)) { - open(NAMES, "git name-rev --all|"); - while (<NAMES>) { - if (/^(\S+)\s+(.*)$/) { - my ($sha1, $name) = ($1, $2); - $names{$sha1} = $name; - } - } - close NAMES; - - for my $commit (@commits) { - my $name = $names{$commit}; - open(TREE, "git ls-tree -t -r $commit|"); - print STDERR "Plumbing tree $name\n"; - while (<TREE>) { - if (/^(\S+)\s+(\S+)\s+(\S+)\s+(.*)$/) { - my ($mode, $type, $sha1, $path) = ($1, $2, $3, $4); - $paths{$sha1} = "$path @ $name"; - } - } - close TREE; - } -} - -sub stats { - my @data = sort {$a <=> $b} @_; - my $min = $data[0]; - my $max = $data[$#data]; - my $total = 0; - my $count = scalar @data; - for my $datum (@data) { - $total += $datum; - } - my $mean = $total / $count; - my $median = $data[int(@data / 2)]; - my $diff_sum = 0; - for my $datum (@data) { - $diff_sum += ($datum - $mean)**2; - } - my $std_dev = sqrt($diff_sum / $count); - return ($count, $total, $min, $max, $mean, $median, $std_dev); -} - -sub print_stats { - my $name = shift; - my ($count, $total, $min, $max, $mean, $median, $std_dev) = stats(@_); - printf("%s: count %s total %s min %s max %s mean %.2f median %s std_dev %.2f\n", - $name, $count, $total, $min, $max, $mean, $median, $std_dev); -} - -my @sizes; -my @path_sizes; -my @all_sizes; -my @all_path_sizes; -my %path_sizes; - -sub dig { - my ($sha1, $depth, $path_size) = @_; - $path_size += $sizes{$sha1}; - push(@sizes, $sizes{$sha1}); - push(@all_sizes, $sizes{$sha1}); - push(@path_sizes, $path_size); - push(@all_path_sizes, $path_size); - $path_sizes{$sha1} = $path_size; - if ($tree) { - printf("%3d%s %6s %s %8d %8d %s\n", - $depth, (" " x $depth), $types{$sha1}, - $sha1, $sizes{$sha1}, $path_size, $paths{$sha1}); - } - for my $child (@{$children{$sha1}}) { - dig($child, $depth + 1, $path_size); - } -} - -my @tree_sizes; -my @tree_path_sizes; - -for my $root (@roots) { - undef @sizes; - undef @path_sizes; - dig($root, 0, 0); - my ($aa, $sz_total) = stats(@sizes); - my ($bb, $psz_total) = stats(@path_sizes); - push(@tree_sizes, $sz_total); - push(@tree_path_sizes, $psz_total); - if ($tree) { - if (@sizes > 1) { - print_stats(" size", @sizes); - print_stats("path size", @path_sizes); - } - print "\n"; - } -} - -if ($dump) { - for my $sha1 (sort keys %sizes) { - print "$sha1 $sizes{$sha1} $path_sizes{$sha1} $depths{$sha1} $paths{$sha1}\n"; - } -} else { - print_stats(" all sizes", @all_sizes); - print_stats(" all path sizes", @all_path_sizes); - print_stats(" tree sizes", @tree_sizes); - print_stats("tree path sizes", @tree_path_sizes); - print_stats(" depths", @depths); -} |