about summary refs log tree commit diff
path: root/scripts/GeneratePatches.pm.in
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/GeneratePatches.pm.in')
-rwxr-xr-xscripts/GeneratePatches.pm.in334
1 files changed, 334 insertions, 0 deletions
diff --git a/scripts/GeneratePatches.pm.in b/scripts/GeneratePatches.pm.in
new file mode 100755
index 000000000000..2d2653255e54
--- /dev/null
+++ b/scripts/GeneratePatches.pm.in
@@ -0,0 +1,334 @@
+#! @perl@ -w -I@libexecdir@/nix
+
+use strict;
+use File::Temp qw(tempdir);
+
+
+# Some patch generations options.
+
+# Max size of NAR archives to generate patches for.
+my $maxNarSize = $ENV{"NIX_MAX_NAR_SIZE"};
+$maxNarSize = 160 * 1024 * 1024 if !defined $maxNarSize;
+
+# If patch is bigger than this fraction of full archive, reject.
+my $maxPatchFraction = $ENV{"NIX_PATCH_FRACTION"};
+$maxPatchFraction = 0.60 if !defined $maxPatchFraction;
+
+my $timeLimit = $ENV{"NIX_BSDIFF_TIME_LIMIT"};
+$timeLimit = 180 if !defined $timeLimit;
+
+my $hashAlgo = "sha256";
+
+
+sub findOutputPaths {
+    my $narFiles = shift;
+
+    my %outPaths;
+    
+    foreach my $p (keys %{$narFiles}) {
+
+        # Ignore derivations.
+        next if ($p =~ /\.drv$/);
+        
+        # Ignore builders (too much ambiguity -- they're all called
+        # `builder.sh').
+        next if ($p =~ /\.sh$/);
+        next if ($p =~ /\.patch$/);
+        
+        # Don't bother including tar files etc.
+        next if ($p =~ /\.tar$/ || $p =~ /\.tar\.(gz|bz2|Z|lzma|xz)$/ || $p =~ /\.zip$/ || $p =~ /\.bin$/ || $p =~ /\.tgz$/ || $p =~ /\.rpm$/ || $p =~ /cvs-export$/ || $p =~ /fetchhg$/);
+
+        $outPaths{$p} = 1;
+    }
+
+    return %outPaths;
+}
+
+
+sub getNameVersion {
+    my $p = shift;
+    $p =~ /\/[0-9a-z]+((?:-[a-zA-Z][^\/-]*)+)([^\/]*)$/;
+    my $name = $1;
+    my $version = $2;
+    return undef unless defined $name && defined $version;
+    $name =~ s/^-//;
+    $version =~ s/^-//;
+    return ($name, $version);
+}
+
+
+# A quick hack to get a measure of the `distance' between two
+# versions: it's just the position of the first character that differs
+# (or 999 if they are the same).
+sub versionDiff {
+    my $s = shift;
+    my $t = shift;
+    my $i;
+    return 999 if $s eq $t;
+    for ($i = 0; $i < length $s; $i++) {
+        return $i if $i >= length $t or
+            substr($s, $i, 1) ne substr($t, $i, 1);
+    }
+    return $i;
+}
+
+
+sub getNarBz2 {
+    my $narPath = shift;
+    my $narFiles = shift;
+    my $storePath = shift;
+    
+    my $narFileList = $$narFiles{$storePath};
+    die "missing path $storePath" unless defined $narFileList;
+
+    my $narFile = @{$narFileList}[0];
+    die unless defined $narFile;
+
+    $narFile->{url} =~ /\/([^\/]+)$/;
+    die unless defined $1;
+    return "$narPath/$1";
+}
+
+
+sub containsPatch {
+    my $patches = shift;
+    my $storePath = shift;
+    my $basePath = shift;
+    my $patchList = $$patches{$storePath};
+    return 0 if !defined $patchList;
+    my $found = 0;
+    foreach my $patch (@{$patchList}) {
+        # !!! baseHash might differ
+        return 1 if $patch->{basePath} eq $basePath;
+    }
+    return 0;
+}
+
+
+sub generatePatches {
+    my ($srcNarFiles, $dstNarFiles, $srcPatches, $dstPatches, $narPath, $patchesPath, $patchesURL, $tmpDir) = @_;
+
+    my %srcOutPaths = findOutputPaths $srcNarFiles;
+    my %dstOutPaths = findOutputPaths $dstNarFiles;
+
+    # For each output path in the destination, see if we need to / can
+    # create a patch.
+
+    print STDERR "creating patches...\n";
+
+    foreach my $p (keys %dstOutPaths) {
+
+        # If exactly the same path already exists in the source, skip it.
+        next if defined $srcOutPaths{$p};
+    
+        print "  $p\n";
+
+        # If not, then we should find the paths in the source that are
+        # `most' likely to be present on a system that wants to
+        # install this path.
+
+        (my $name, my $version) = getNameVersion $p;
+        next unless defined $name && defined $version;
+
+        my @closest = ();
+        my $closestVersion;
+        my $minDist = -1; # actually, larger means closer
+
+        # Find all source paths with the same name.
+
+        foreach my $q (keys %srcOutPaths) {
+            (my $name2, my $version2) = getNameVersion $q;
+            next unless defined $name2 && defined $version2;
+
+            if ($name eq $name2) {
+
+                my $srcSystem = @{$$dstNarFiles{$p}}[0]->{system};
+                my $dstSystem = @{$$srcNarFiles{$q}}[0]->{system};
+                if (defined $srcSystem && defined $dstSystem && $srcSystem ne $dstSystem) {
+                    print "    SKIPPING $q due to different systems ($srcSystem vs. $dstSystem)\n";
+                    next;
+                }
+
+                # If the sizes differ too much, then skip.  This
+                # disambiguates between, e.g., a real component and a
+                # wrapper component (cf. Firefox in Nixpkgs).
+                my $srcSize = @{$$srcNarFiles{$q}}[0]->{size};
+                my $dstSize = @{$$dstNarFiles{$p}}[0]->{size};
+                my $ratio = $srcSize / $dstSize;
+                $ratio = 1 / $ratio if $ratio < 1;
+                # print "  SIZE $srcSize $dstSize $ratio $q\n";
+
+                if ($ratio >= 3) {
+                    print "    SKIPPING $q due to size ratio $ratio ($srcSize vs. $dstSize)\n";
+                    next;
+                }
+
+                # If there are multiple matching names, include the
+                # ones with the closest version numbers.
+                my $dist = versionDiff $version, $version2;
+                if ($dist > $minDist) {
+                    $minDist = $dist;
+                    @closest = ($q);
+                    $closestVersion = $version2;
+                } elsif ($dist == $minDist) {
+                    push @closest, $q;
+                }
+            }
+        }
+
+        if (scalar(@closest) == 0) {
+            print "    NO BASE: $p\n";
+            next;
+        }
+
+        foreach my $closest (@closest) {
+
+            # Generate a patch between $closest and $p.
+            print STDERR "  $p <- $closest\n";
+
+            # If the patch already exists, skip it.
+            if (containsPatch($srcPatches, $p, $closest) ||
+                containsPatch($dstPatches, $p, $closest))
+            {
+                print "    skipping, already exists\n";
+                next;
+            }
+
+            my $srcNarBz2 = getNarBz2 $narPath, $srcNarFiles, $closest;
+            my $dstNarBz2 = getNarBz2 $narPath, $dstNarFiles, $p;
+
+            if (! -f $srcNarBz2) {
+                warn "patch source archive $srcNarBz2 is missing\n";
+                next;
+            }
+
+            system("@bunzip2@ < $srcNarBz2 > $tmpDir/A") == 0
+                or die "cannot unpack $srcNarBz2";
+
+            if ((stat "$tmpDir/A")[7] >= $maxNarSize) {
+                print "    skipping, source is too large\n";
+                next;
+            }
+        
+            system("@bunzip2@ < $dstNarBz2 > $tmpDir/B") == 0
+                or die "cannot unpack $dstNarBz2";
+
+            if ((stat "$tmpDir/B")[7] >= $maxNarSize) {
+                print "    skipping, destination is too large\n";
+                next;
+            }
+        
+            my $time1 = time();
+            my $res = system("ulimit -t $timeLimit; @libexecdir@/bsdiff $tmpDir/A $tmpDir/B $tmpDir/DIFF");
+            my $time2 = time();
+            if ($res) {
+                warn "binary diff computation aborted after ", $time2 - $time1, " seconds\n";
+                next;
+            }
+
+            my $baseHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/A` or die;
+            chomp $baseHash;
+
+            my $narHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/B` or die;
+            chomp $narHash;
+
+            my $narDiffHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/DIFF` or die;
+            chomp $narDiffHash;
+
+            my $narDiffSize = (stat "$tmpDir/DIFF")[7];
+            my $dstNarBz2Size = (stat $dstNarBz2)[7];
+
+            print "    size $narDiffSize; full size $dstNarBz2Size; ", $time2 - $time1, " seconds\n";
+        
+            if ($narDiffSize >= $dstNarBz2Size) {
+                print "    rejecting; patch bigger than full archive\n";
+                next;
+            }
+    
+            if ($narDiffSize / $dstNarBz2Size >= $maxPatchFraction) {
+                print "    rejecting; patch too large relative to full archive\n";
+                next;
+            }
+    
+            my $finalName = "$narDiffHash.nar-bsdiff";
+
+            if (-e "$patchesPath/$finalName") {
+                print "    not copying, already exists\n";
+            }
+
+            else {
+                system("cp '$tmpDir/DIFF' '$patchesPath/$finalName.tmp'") == 0
+                    or die "cannot copy diff";
+                rename("$patchesPath/$finalName.tmp", "$patchesPath/$finalName")
+                    or die "cannot rename $patchesPath/$finalName.tmp";
+            }
+        
+            # Add the patch to the manifest.
+            addPatch $dstPatches, $p,
+                { url => "$patchesURL/$finalName", hash => "$hashAlgo:$narDiffHash"
+                , size => $narDiffSize, basePath => $closest, baseHash => "$hashAlgo:$baseHash"
+                , narHash => "$hashAlgo:$narHash", patchType => "nar-bsdiff"
+                };
+        }
+    }
+}
+
+
+# Propagate useful patches from $srcPatches to $dstPatches.  A patch
+# is useful if it produces either paths in the $dstNarFiles or paths
+# that can be used as the base for other useful patches.
+sub propagatePatches {
+    my ($srcPatches, $dstNarFiles, $dstPatches) = @_;
+
+    print STDERR "propagating patches...\n";
+
+    my $changed;
+    do {
+        # !!! we repeat this to reach the transitive closure; inefficient
+        $changed = 0;
+
+        print STDERR "loop\n";
+
+        my %dstBasePaths;
+        foreach my $q (keys %{$dstPatches}) {
+            foreach my $patch (@{$$dstPatches{$q}}) {
+                $dstBasePaths{$patch->{basePath}} = 1;
+            }
+        }
+
+        foreach my $p (keys %{$srcPatches}) {
+            my $patchList = $$srcPatches{$p};
+
+            my $include = 0;
+
+            # Is path $p included in the destination?  If so, include
+            # patches that produce it.
+            $include = 1 if defined $$dstNarFiles{$p};
+
+            # Is path $p a path that serves as a base for paths in the
+            # destination?  If so, include patches that produce it.
+            # !!! check baseHash
+            $include = 1 if defined $dstBasePaths{$p};
+
+            if ($include) {
+                foreach my $patch (@{$patchList}) {
+                    $changed = 1 if addPatch $dstPatches, $p, $patch;
+                }
+            }
+        
+        }
+    
+    } while $changed;
+}
+
+
+# Add all new patches in $srcPatches to $dstPatches.
+sub copyPatches {
+    my ($srcPatches, $dstPatches) = @_;
+    foreach my $p (keys %{$srcPatches}) {
+        addPatch $dstPatches, $p, $_ foreach @{$$srcPatches{$p}};
+    }
+}
+
+
+return 1;