diff options
Diffstat (limited to 'scripts/download-using-manifests.pl.in')
-rwxr-xr-x | scripts/download-using-manifests.pl.in | 377 |
1 files changed, 377 insertions, 0 deletions
diff --git a/scripts/download-using-manifests.pl.in b/scripts/download-using-manifests.pl.in new file mode 100755 index 000000000000..b670163038e1 --- /dev/null +++ b/scripts/download-using-manifests.pl.in @@ -0,0 +1,377 @@ +#! @perl@ -w @perlFlags@ + +use strict; +use Nix::Config; +use Nix::Manifest; +use Nix::Store; +use Nix::Utils; +use POSIX qw(strftime); +use File::Temp qw(tempdir); + +STDOUT->autoflush(1); + +my $logFile = "$Nix::Config::logDir/downloads"; + +# For queries, skip expensive calls to nix-hash etc. We're just +# estimating the expected download size. +my $fast = 1; + +# ‘--insecure’ is fine because Nix verifies the hash of the result. +my $curl = "$Nix::Config::curl --fail --location --insecure"; + + +# Open the manifest cache and update it if necessary. +my $dbh = updateManifestDB(); +exit 0 unless defined $dbh; # exit if there are no manifests +print "\n"; + + +# $hashCache->{$algo}->{$path} yields the $algo-hash of $path. +my $hashCache; + + +sub parseHash { + my $hash = shift; + if ($hash =~ /^(.+):(.+)$/) { + return ($1, $2); + } else { + return ("md5", $hash); + } +} + + +# Compute the most efficient sequence of downloads to produce the +# given path. +sub computeSmallestDownload { + my $targetPath = shift; + + # Build a graph of all store paths that might contribute to the + # construction of $targetPath, and the special node "start". The + # edges are either patch operations, or downloads of full NAR + # files. The latter edges only occur between "start" and a store + # path. + my %graph; + + $graph{"start"} = {d => 0, pred => undef, edges => []}; + + my @queue = (); + my $queueFront = 0; + my %done; + + sub addNode { + my $graph = shift; + my $u = shift; + $$graph{$u} = {d => 999999999999, pred => undef, edges => []} + unless defined $$graph{$u}; + } + + sub addEdge { + my $graph = shift; + my $u = shift; + my $v = shift; + my $w = shift; + my $type = shift; + my $info = shift; + addNode $graph, $u; + push @{$$graph{$u}->{edges}}, + {weight => $w, start => $u, end => $v, type => $type, info => $info}; + my $n = scalar @{$$graph{$u}->{edges}}; + } + + push @queue, $targetPath; + + while ($queueFront < scalar @queue) { + my $u = $queue[$queueFront++]; + next if defined $done{$u}; + $done{$u} = 1; + + addNode \%graph, $u; + + # If the path already exists, it has distance 0 from the + # "start" node. + if (isValidPath($u)) { + addEdge \%graph, "start", $u, 0, "present", undef; + } + + else { + + # Add patch edges. + my $patchList = $dbh->selectall_arrayref( + "select * from Patches where storePath = ?", + { Slice => {} }, $u); + + foreach my $patch (@{$patchList}) { + if (isValidPath($patch->{basePath})) { + my ($baseHashAlgo, $baseHash) = parseHash $patch->{baseHash}; + + my $hash = $hashCache->{$baseHashAlgo}->{$patch->{basePath}}; + if (!defined $hash) { + $hash = $fast && $baseHashAlgo eq "sha256" + ? queryPathHash($patch->{basePath}) + : hashPath($baseHashAlgo, $baseHashAlgo ne "md5", $patch->{basePath}); + $hash =~ s/.*://; + $hashCache->{$baseHashAlgo}->{$patch->{basePath}} = $hash; + } + + next if $hash ne $baseHash; + } + push @queue, $patch->{basePath}; + addEdge \%graph, $patch->{basePath}, $u, $patch->{size}, "patch", $patch; + } + + # Add NAR file edges to the start node. + my $narFileList = $dbh->selectall_arrayref( + "select * from NARs where storePath = ?", + { Slice => {} }, $u); + + foreach my $narFile (@{$narFileList}) { + # !!! how to handle files whose size is not known in advance? + # For now, assume some arbitrary size (1 GB). + # This has the side-effect of preferring non-Hydra downloads. + addEdge \%graph, "start", $u, ($narFile->{size} || 1000000000), "narfile", $narFile; + } + } + } + + + # Run Dijkstra's shortest path algorithm to determine the shortest + # sequence of download and/or patch actions that will produce + # $targetPath. + + my @todo = keys %graph; + + while (scalar @todo > 0) { + + # Remove the closest element from the todo list. + # !!! inefficient, use a priority queue + @todo = sort { -($graph{$a}->{d} <=> $graph{$b}->{d}) } @todo; + my $u = pop @todo; + + my $u_ = $graph{$u}; + + foreach my $edge (@{$u_->{edges}}) { + my $v_ = $graph{$edge->{end}}; + if ($v_->{d} > $u_->{d} + $edge->{weight}) { + $v_->{d} = $u_->{d} + $edge->{weight}; + # Store the edge; to edge->start is actually the + # predecessor. + $v_->{pred} = $edge; + } + } + } + + + # Retrieve the shortest path from "start" to $targetPath. + my @path = (); + my $cur = $targetPath; + return () unless defined $graph{$targetPath}->{pred}; + while ($cur ne "start") { + push @path, $graph{$cur}->{pred}; + $cur = $graph{$cur}->{pred}->{start}; + } + + return @path; +} + + +# Parse the arguments. + +if ($ARGV[0] eq "--query") { + + while (<STDIN>) { + chomp; + my ($cmd, @args) = split " ", $_; + + if ($cmd eq "have") { + foreach my $storePath (@args) { + print "$storePath\n" if scalar @{$dbh->selectcol_arrayref("select 1 from NARs where storePath = ?", {}, $storePath)} > 0; + } + print "\n"; + } + + elsif ($cmd eq "info") { + foreach my $storePath (@args) { + + my $infos = $dbh->selectall_arrayref( + "select * from NARs where storePath = ?", + { Slice => {} }, $storePath); + + next unless scalar @{$infos} > 0; + my $info = @{$infos}[0]; + + print "$storePath\n"; + print "$info->{deriver}\n"; + my @references = split " ", $info->{refs}; + print scalar @references, "\n"; + print "$_\n" foreach @references; + + my @path = computeSmallestDownload $storePath; + + my $downloadSize = 0; + while (scalar @path > 0) { + my $edge = pop @path; + my $u = $edge->{start}; + my $v = $edge->{end}; + if ($edge->{type} eq "patch") { + $downloadSize += $edge->{info}->{size} || 0; + } + elsif ($edge->{type} eq "narfile") { + $downloadSize += $edge->{info}->{size} || 0; + } + } + + print "$downloadSize\n"; + + my $narSize = $info->{narSize} || 0; + print "$narSize\n"; + } + + print "\n"; + } + + else { die "unknown command `$cmd'"; } + } + + exit 0; +} + +elsif ($ARGV[0] ne "--substitute") { + die; +} + + +die unless scalar @ARGV == 3; +my $targetPath = $ARGV[1]; +my $destPath = $ARGV[2]; +$fast = 0; + + +# Create a temporary directory. +my $tmpDir = tempdir("nix-download.XXXXXX", CLEANUP => 1, TMPDIR => 1) + or die "cannot create a temporary directory"; + +my $tmpNar = "$tmpDir/nar"; +my $tmpNar2 = "$tmpDir/nar2"; + + +open LOGFILE, ">>$logFile" or die "cannot open log file $logFile"; + +my $date = strftime ("%F %H:%M:%S UTC", gmtime (time)); +print LOGFILE "$$ get $targetPath $date\n"; + +print STDERR "\n*** Trying to download/patch `$targetPath'\n"; + + +# Compute the shortest path. +my @path = computeSmallestDownload $targetPath; +die "don't know how to produce $targetPath\n" if scalar @path == 0; + + +# We don't need the manifest anymore, so close it as an optimisation: +# if we still have SQLite locks blocking other processes (we +# shouldn't), this gets rid of them. +$dbh->disconnect; + + +# Traverse the shortest path, perform the actions described by the +# edges. +my $curStep = 1; +my $maxStep = scalar @path; + +my $finalNarHash; + +while (scalar @path > 0) { + my $edge = pop @path; + my $u = $edge->{start}; + my $v = $edge->{end}; + + print STDERR "\n*** Step $curStep/$maxStep: "; + + if ($edge->{type} eq "present") { + print STDERR "using already present path `$v'\n"; + print LOGFILE "$$ present $v\n"; + + if ($curStep < $maxStep) { + # Since this is not the last step, the path will be used + # as a base to one or more patches. So turn the base path + # into a NAR archive, to which we can apply the patch. + print STDERR " packing base path...\n"; + system("$Nix::Config::binDir/nix-store --dump $v > $tmpNar") == 0 + or die "cannot dump `$v'"; + } + } + + elsif ($edge->{type} eq "patch") { + my $patch = $edge->{info}; + print STDERR "applying patch `$patch->{url}' to `$u' to create `$v'\n"; + + print LOGFILE "$$ patch $patch->{url} $patch->{size} $patch->{baseHash} $u $v\n"; + + # Download the patch. + print STDERR " downloading patch...\n"; + my $patchPath = "$tmpDir/patch"; + checkURL $patch->{url}; + system("$curl '$patch->{url}' -o $patchPath") == 0 + or die "cannot download patch `$patch->{url}'\n"; + + # Apply the patch to the NAR archive produced in step 1 (for + # the already present path) or a later step (for patch sequences). + print STDERR " applying patch...\n"; + system("$Nix::Config::libexecDir/nix/bspatch $tmpNar $tmpNar2 $patchPath") == 0 + or die "cannot apply patch `$patchPath' to $tmpNar\n"; + + if ($curStep < $maxStep) { + # The archive will be used as the base of the next patch. + rename "$tmpNar2", "$tmpNar" or die "cannot rename NAR archive: $!"; + } else { + # This was the last patch. Unpack the final NAR archive + # into the target path. + print STDERR " unpacking patched archive...\n"; + system("$Nix::Config::binDir/nix-store --restore $destPath < $tmpNar2") == 0 + or die "cannot unpack $tmpNar2 to `$v'\n"; + } + + $finalNarHash = $patch->{narHash}; + } + + elsif ($edge->{type} eq "narfile") { + my $narFile = $edge->{info}; + print STDERR "downloading `$narFile->{url}' to `$v'\n"; + + my $size = $narFile->{size} || -1; + print LOGFILE "$$ narfile $narFile->{url} $size $v\n"; + + checkURL $narFile->{url}; + + my $decompressor = + $narFile->{compressionType} eq "bzip2" ? "| $Nix::Config::bzip2 -d" : + $narFile->{compressionType} eq "xz" ? "| $Nix::Config::xz -d" : + $narFile->{compressionType} eq "none" ? "" : + die "unknown compression type `$narFile->{compressionType}'"; + + if ($curStep < $maxStep) { + # The archive will be used a base to a patch. + system("$curl '$narFile->{url}' $decompressor > $tmpNar") == 0 + or die "cannot download and unpack `$narFile->{url}' to `$v'\n"; + } else { + # Unpack the archive to the target path. + system("$curl '$narFile->{url}' $decompressor | $Nix::Config::binDir/nix-store --restore '$destPath'") == 0 + or die "cannot download and unpack `$narFile->{url}' to `$v'\n"; + } + + $finalNarHash = $narFile->{narHash}; + } + + $curStep++; +} + + +# Tell Nix about the expected hash so it can verify it. +die "cannot check integrity of the downloaded path since its hash is not known\n" + unless defined $finalNarHash; +print "$finalNarHash\n"; + + +print STDERR "\n"; +print LOGFILE "$$ success\n"; +close LOGFILE; |