diff options
Diffstat (limited to 'scripts/GeneratePatches.pm.in')
-rwxr-xr-x | scripts/GeneratePatches.pm.in | 334 |
1 files changed, 334 insertions, 0 deletions
diff --git a/scripts/GeneratePatches.pm.in b/scripts/GeneratePatches.pm.in new file mode 100755 index 000000000000..2d2653255e54 --- /dev/null +++ b/scripts/GeneratePatches.pm.in @@ -0,0 +1,334 @@ +#! @perl@ -w -I@libexecdir@/nix + +use strict; +use File::Temp qw(tempdir); + + +# Some patch generations options. + +# Max size of NAR archives to generate patches for. +my $maxNarSize = $ENV{"NIX_MAX_NAR_SIZE"}; +$maxNarSize = 160 * 1024 * 1024 if !defined $maxNarSize; + +# If patch is bigger than this fraction of full archive, reject. +my $maxPatchFraction = $ENV{"NIX_PATCH_FRACTION"}; +$maxPatchFraction = 0.60 if !defined $maxPatchFraction; + +my $timeLimit = $ENV{"NIX_BSDIFF_TIME_LIMIT"}; +$timeLimit = 180 if !defined $timeLimit; + +my $hashAlgo = "sha256"; + + +sub findOutputPaths { + my $narFiles = shift; + + my %outPaths; + + foreach my $p (keys %{$narFiles}) { + + # Ignore derivations. + next if ($p =~ /\.drv$/); + + # Ignore builders (too much ambiguity -- they're all called + # `builder.sh'). + next if ($p =~ /\.sh$/); + next if ($p =~ /\.patch$/); + + # Don't bother including tar files etc. + next if ($p =~ /\.tar$/ || $p =~ /\.tar\.(gz|bz2|Z|lzma|xz)$/ || $p =~ /\.zip$/ || $p =~ /\.bin$/ || $p =~ /\.tgz$/ || $p =~ /\.rpm$/ || $p =~ /cvs-export$/ || $p =~ /fetchhg$/); + + $outPaths{$p} = 1; + } + + return %outPaths; +} + + +sub getNameVersion { + my $p = shift; + $p =~ /\/[0-9a-z]+((?:-[a-zA-Z][^\/-]*)+)([^\/]*)$/; + my $name = $1; + my $version = $2; + return undef unless defined $name && defined $version; + $name =~ s/^-//; + $version =~ s/^-//; + return ($name, $version); +} + + +# A quick hack to get a measure of the `distance' between two +# versions: it's just the position of the first character that differs +# (or 999 if they are the same). +sub versionDiff { + my $s = shift; + my $t = shift; + my $i; + return 999 if $s eq $t; + for ($i = 0; $i < length $s; $i++) { + return $i if $i >= length $t or + substr($s, $i, 1) ne substr($t, $i, 1); + } + return $i; +} + + +sub getNarBz2 { + my $narPath = shift; + my $narFiles = shift; + my $storePath = shift; + + my $narFileList = $$narFiles{$storePath}; + die "missing path $storePath" unless defined $narFileList; + + my $narFile = @{$narFileList}[0]; + die unless defined $narFile; + + $narFile->{url} =~ /\/([^\/]+)$/; + die unless defined $1; + return "$narPath/$1"; +} + + +sub containsPatch { + my $patches = shift; + my $storePath = shift; + my $basePath = shift; + my $patchList = $$patches{$storePath}; + return 0 if !defined $patchList; + my $found = 0; + foreach my $patch (@{$patchList}) { + # !!! baseHash might differ + return 1 if $patch->{basePath} eq $basePath; + } + return 0; +} + + +sub generatePatches { + my ($srcNarFiles, $dstNarFiles, $srcPatches, $dstPatches, $narPath, $patchesPath, $patchesURL, $tmpDir) = @_; + + my %srcOutPaths = findOutputPaths $srcNarFiles; + my %dstOutPaths = findOutputPaths $dstNarFiles; + + # For each output path in the destination, see if we need to / can + # create a patch. + + print STDERR "creating patches...\n"; + + foreach my $p (keys %dstOutPaths) { + + # If exactly the same path already exists in the source, skip it. + next if defined $srcOutPaths{$p}; + + print " $p\n"; + + # If not, then we should find the paths in the source that are + # `most' likely to be present on a system that wants to + # install this path. + + (my $name, my $version) = getNameVersion $p; + next unless defined $name && defined $version; + + my @closest = (); + my $closestVersion; + my $minDist = -1; # actually, larger means closer + + # Find all source paths with the same name. + + foreach my $q (keys %srcOutPaths) { + (my $name2, my $version2) = getNameVersion $q; + next unless defined $name2 && defined $version2; + + if ($name eq $name2) { + + my $srcSystem = @{$$dstNarFiles{$p}}[0]->{system}; + my $dstSystem = @{$$srcNarFiles{$q}}[0]->{system}; + if (defined $srcSystem && defined $dstSystem && $srcSystem ne $dstSystem) { + print " SKIPPING $q due to different systems ($srcSystem vs. $dstSystem)\n"; + next; + } + + # If the sizes differ too much, then skip. This + # disambiguates between, e.g., a real component and a + # wrapper component (cf. Firefox in Nixpkgs). + my $srcSize = @{$$srcNarFiles{$q}}[0]->{size}; + my $dstSize = @{$$dstNarFiles{$p}}[0]->{size}; + my $ratio = $srcSize / $dstSize; + $ratio = 1 / $ratio if $ratio < 1; + # print " SIZE $srcSize $dstSize $ratio $q\n"; + + if ($ratio >= 3) { + print " SKIPPING $q due to size ratio $ratio ($srcSize vs. $dstSize)\n"; + next; + } + + # If there are multiple matching names, include the + # ones with the closest version numbers. + my $dist = versionDiff $version, $version2; + if ($dist > $minDist) { + $minDist = $dist; + @closest = ($q); + $closestVersion = $version2; + } elsif ($dist == $minDist) { + push @closest, $q; + } + } + } + + if (scalar(@closest) == 0) { + print " NO BASE: $p\n"; + next; + } + + foreach my $closest (@closest) { + + # Generate a patch between $closest and $p. + print STDERR " $p <- $closest\n"; + + # If the patch already exists, skip it. + if (containsPatch($srcPatches, $p, $closest) || + containsPatch($dstPatches, $p, $closest)) + { + print " skipping, already exists\n"; + next; + } + + my $srcNarBz2 = getNarBz2 $narPath, $srcNarFiles, $closest; + my $dstNarBz2 = getNarBz2 $narPath, $dstNarFiles, $p; + + if (! -f $srcNarBz2) { + warn "patch source archive $srcNarBz2 is missing\n"; + next; + } + + system("@bunzip2@ < $srcNarBz2 > $tmpDir/A") == 0 + or die "cannot unpack $srcNarBz2"; + + if ((stat "$tmpDir/A")[7] >= $maxNarSize) { + print " skipping, source is too large\n"; + next; + } + + system("@bunzip2@ < $dstNarBz2 > $tmpDir/B") == 0 + or die "cannot unpack $dstNarBz2"; + + if ((stat "$tmpDir/B")[7] >= $maxNarSize) { + print " skipping, destination is too large\n"; + next; + } + + my $time1 = time(); + my $res = system("ulimit -t $timeLimit; @libexecdir@/bsdiff $tmpDir/A $tmpDir/B $tmpDir/DIFF"); + my $time2 = time(); + if ($res) { + warn "binary diff computation aborted after ", $time2 - $time1, " seconds\n"; + next; + } + + my $baseHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/A` or die; + chomp $baseHash; + + my $narHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/B` or die; + chomp $narHash; + + my $narDiffHash = `@bindir@/nix-hash --flat --type $hashAlgo --base32 $tmpDir/DIFF` or die; + chomp $narDiffHash; + + my $narDiffSize = (stat "$tmpDir/DIFF")[7]; + my $dstNarBz2Size = (stat $dstNarBz2)[7]; + + print " size $narDiffSize; full size $dstNarBz2Size; ", $time2 - $time1, " seconds\n"; + + if ($narDiffSize >= $dstNarBz2Size) { + print " rejecting; patch bigger than full archive\n"; + next; + } + + if ($narDiffSize / $dstNarBz2Size >= $maxPatchFraction) { + print " rejecting; patch too large relative to full archive\n"; + next; + } + + my $finalName = "$narDiffHash.nar-bsdiff"; + + if (-e "$patchesPath/$finalName") { + print " not copying, already exists\n"; + } + + else { + system("cp '$tmpDir/DIFF' '$patchesPath/$finalName.tmp'") == 0 + or die "cannot copy diff"; + rename("$patchesPath/$finalName.tmp", "$patchesPath/$finalName") + or die "cannot rename $patchesPath/$finalName.tmp"; + } + + # Add the patch to the manifest. + addPatch $dstPatches, $p, + { url => "$patchesURL/$finalName", hash => "$hashAlgo:$narDiffHash" + , size => $narDiffSize, basePath => $closest, baseHash => "$hashAlgo:$baseHash" + , narHash => "$hashAlgo:$narHash", patchType => "nar-bsdiff" + }; + } + } +} + + +# Propagate useful patches from $srcPatches to $dstPatches. A patch +# is useful if it produces either paths in the $dstNarFiles or paths +# that can be used as the base for other useful patches. +sub propagatePatches { + my ($srcPatches, $dstNarFiles, $dstPatches) = @_; + + print STDERR "propagating patches...\n"; + + my $changed; + do { + # !!! we repeat this to reach the transitive closure; inefficient + $changed = 0; + + print STDERR "loop\n"; + + my %dstBasePaths; + foreach my $q (keys %{$dstPatches}) { + foreach my $patch (@{$$dstPatches{$q}}) { + $dstBasePaths{$patch->{basePath}} = 1; + } + } + + foreach my $p (keys %{$srcPatches}) { + my $patchList = $$srcPatches{$p}; + + my $include = 0; + + # Is path $p included in the destination? If so, include + # patches that produce it. + $include = 1 if defined $$dstNarFiles{$p}; + + # Is path $p a path that serves as a base for paths in the + # destination? If so, include patches that produce it. + # !!! check baseHash + $include = 1 if defined $dstBasePaths{$p}; + + if ($include) { + foreach my $patch (@{$patchList}) { + $changed = 1 if addPatch $dstPatches, $p, $patch; + } + } + + } + + } while $changed; +} + + +# Add all new patches in $srcPatches to $dstPatches. +sub copyPatches { + my ($srcPatches, $dstPatches) = @_; + foreach my $p (keys %{$srcPatches}) { + addPatch $dstPatches, $p, $_ foreach @{$$srcPatches{$p}}; + } +} + + +return 1; |