about summary refs log tree commit diff
path: root/scripts/download-using-manifests.pl.in
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/download-using-manifests.pl.in')
-rwxr-xr-xscripts/download-using-manifests.pl.in376
1 files changed, 376 insertions, 0 deletions
diff --git a/scripts/download-using-manifests.pl.in b/scripts/download-using-manifests.pl.in
new file mode 100755
index 000000000000..e849a930e71e
--- /dev/null
+++ b/scripts/download-using-manifests.pl.in
@@ -0,0 +1,376 @@
+#! @perl@ -w @perlFlags@
+
+use utf8;
+use strict;
+use Nix::Config;
+use Nix::Manifest;
+use Nix::Store;
+use Nix::Utils;
+use POSIX qw(strftime);
+
+STDOUT->autoflush(1);
+
+my $logFile = "$Nix::Config::logDir/downloads";
+
+# For queries, skip expensive calls to nix-hash etc.  We're just
+# estimating the expected download size.
+my $fast = 1;
+
+# ‘--insecure’ is fine because Nix verifies the hash of the result.
+my $curl = "$Nix::Config::curl --fail --location --insecure";
+
+
+# Open the manifest cache and update it if necessary.
+my $dbh = updateManifestDB();
+exit 0 unless defined $dbh; # exit if there are no manifests
+print "\n";
+
+
+# $hashCache->{$algo}->{$path} yields the $algo-hash of $path.
+my $hashCache;
+
+
+sub parseHash {
+    my $hash = shift;
+    if ($hash =~ /^(.+):(.+)$/) {
+        return ($1, $2);
+    } else {
+        return ("md5", $hash);
+    }
+}
+
+
+# Compute the most efficient sequence of downloads to produce the
+# given path.
+sub computeSmallestDownload {
+    my $targetPath = shift;
+
+    # Build a graph of all store paths that might contribute to the
+    # construction of $targetPath, and the special node "start".  The
+    # edges are either patch operations, or downloads of full NAR
+    # files.  The latter edges only occur between "start" and a store
+    # path.
+    my %graph;
+
+    $graph{"start"} = {d => 0, pred => undef, edges => []};
+
+    my @queue = ();
+    my $queueFront = 0;
+    my %done;
+
+    sub addNode {
+        my $graph = shift;
+        my $u = shift;
+        $$graph{$u} = {d => 999999999999, pred => undef, edges => []}
+            unless defined $$graph{$u};
+    }
+
+    sub addEdge {
+        my $graph = shift;
+        my $u = shift;
+        my $v = shift;
+        my $w = shift;
+        my $type = shift;
+        my $info = shift;
+        addNode $graph, $u;
+        push @{$$graph{$u}->{edges}},
+            {weight => $w, start => $u, end => $v, type => $type, info => $info};
+        my $n = scalar @{$$graph{$u}->{edges}};
+    }
+
+    push @queue, $targetPath;
+
+    while ($queueFront < scalar @queue) {
+        my $u = $queue[$queueFront++];
+        next if defined $done{$u};
+        $done{$u} = 1;
+
+        addNode \%graph, $u;
+
+        # If the path already exists, it has distance 0 from the
+        # "start" node.
+        if (isValidPath($u)) {
+            addEdge \%graph, "start", $u, 0, "present", undef;
+        }
+
+        else {
+
+            # Add patch edges.
+            my $patchList = $dbh->selectall_arrayref(
+                "select * from Patches where storePath = ?",
+                { Slice => {} }, $u);
+
+            foreach my $patch (@{$patchList}) {
+                if (isValidPath($patch->{basePath})) {
+                    my ($baseHashAlgo, $baseHash) = parseHash $patch->{baseHash};
+
+                    my $hash = $hashCache->{$baseHashAlgo}->{$patch->{basePath}};
+                    if (!defined $hash) {
+                        $hash = $fast && $baseHashAlgo eq "sha256"
+                            ? queryPathHash($patch->{basePath})
+                            : hashPath($baseHashAlgo, $baseHashAlgo ne "md5", $patch->{basePath});
+                        $hash =~ s/.*://;
+                        $hashCache->{$baseHashAlgo}->{$patch->{basePath}} = $hash;
+                    }
+
+                    next if $hash ne $baseHash;
+                }
+                push @queue, $patch->{basePath};
+                addEdge \%graph, $patch->{basePath}, $u, $patch->{size}, "patch", $patch;
+            }
+
+            # Add NAR file edges to the start node.
+            my $narFileList = $dbh->selectall_arrayref(
+                "select * from NARs where storePath = ?",
+                { Slice => {} }, $u);
+
+            foreach my $narFile (@{$narFileList}) {
+                # !!! how to handle files whose size is not known in advance?
+                # For now, assume some arbitrary size (1 GB).
+                # This has the side-effect of preferring non-Hydra downloads.
+                addEdge \%graph, "start", $u, ($narFile->{size} || 1000000000), "narfile", $narFile;
+            }
+        }
+    }
+
+
+    # Run Dijkstra's shortest path algorithm to determine the shortest
+    # sequence of download and/or patch actions that will produce
+    # $targetPath.
+
+    my @todo = keys %graph;
+
+    while (scalar @todo > 0) {
+
+        # Remove the closest element from the todo list.
+        # !!! inefficient, use a priority queue
+        @todo = sort { -($graph{$a}->{d} <=> $graph{$b}->{d}) } @todo;
+        my $u = pop @todo;
+
+        my $u_ = $graph{$u};
+
+        foreach my $edge (@{$u_->{edges}}) {
+            my $v_ = $graph{$edge->{end}};
+            if ($v_->{d} > $u_->{d} + $edge->{weight}) {
+                $v_->{d} = $u_->{d} + $edge->{weight};
+                # Store the edge; to edge->start is actually the
+                # predecessor.
+                $v_->{pred} = $edge;
+            }
+        }
+    }
+
+
+    # Retrieve the shortest path from "start" to $targetPath.
+    my @path = ();
+    my $cur = $targetPath;
+    return () unless defined $graph{$targetPath}->{pred};
+    while ($cur ne "start") {
+        push @path, $graph{$cur}->{pred};
+        $cur = $graph{$cur}->{pred}->{start};
+    }
+
+    return @path;
+}
+
+
+# Parse the arguments.
+
+if ($ARGV[0] eq "--query") {
+
+    while (<STDIN>) {
+        chomp;
+        my ($cmd, @args) = split " ", $_;
+
+        if ($cmd eq "have") {
+            foreach my $storePath (@args) {
+                print "$storePath\n" if scalar @{$dbh->selectcol_arrayref("select 1 from NARs where storePath = ?", {}, $storePath)} > 0;
+            }
+            print "\n";
+        }
+
+        elsif ($cmd eq "info") {
+            foreach my $storePath (@args) {
+
+                my $infos = $dbh->selectall_arrayref(
+                    "select * from NARs where storePath = ?",
+                    { Slice => {} }, $storePath);
+
+                next unless scalar @{$infos} > 0;
+                my $info = @{$infos}[0];
+
+                print "$storePath\n";
+                print "$info->{deriver}\n";
+                my @references = split " ", $info->{refs};
+                print scalar @references, "\n";
+                print "$_\n" foreach @references;
+
+                my @path = computeSmallestDownload $storePath;
+
+                my $downloadSize = 0;
+                while (scalar @path > 0) {
+                    my $edge = pop @path;
+                    my $u = $edge->{start};
+                    my $v = $edge->{end};
+                    if ($edge->{type} eq "patch") {
+                        $downloadSize += $edge->{info}->{size} || 0;
+                    }
+                    elsif ($edge->{type} eq "narfile") {
+                        $downloadSize += $edge->{info}->{size} || 0;
+                    }
+                }
+
+                print "$downloadSize\n";
+
+                my $narSize = $info->{narSize} || 0;
+                print "$narSize\n";
+            }
+
+            print "\n";
+        }
+
+        else { die "unknown command ‘$cmd’"; }
+    }
+
+    exit 0;
+}
+
+elsif ($ARGV[0] ne "--substitute") {
+    die;
+}
+
+
+die unless scalar @ARGV == 3;
+my $targetPath = $ARGV[1];
+my $destPath = $ARGV[2];
+$fast = 0;
+
+
+# Create a temporary directory.
+my $tmpDir = mkTempDir("nix-download");
+
+my $tmpNar = "$tmpDir/nar";
+my $tmpNar2 = "$tmpDir/nar2";
+
+
+open LOGFILE, ">>$logFile" or die "cannot open log file $logFile";
+
+my $date = strftime ("%F %H:%M:%S UTC", gmtime (time));
+print LOGFILE "$$ get $targetPath $date\n";
+
+print STDERR "\n*** Trying to download/patch ‘$targetPath’\n";
+
+
+# Compute the shortest path.
+my @path = computeSmallestDownload $targetPath;
+die "don't know how to produce $targetPath\n" if scalar @path == 0;
+
+
+# We don't need the manifest anymore, so close it as an optimisation:
+# if we still have SQLite locks blocking other processes (we
+# shouldn't), this gets rid of them.
+$dbh->disconnect;
+
+
+# Traverse the shortest path, perform the actions described by the
+# edges.
+my $curStep = 1;
+my $maxStep = scalar @path;
+
+my $finalNarHash;
+
+while (scalar @path > 0) {
+    my $edge = pop @path;
+    my $u = $edge->{start};
+    my $v = $edge->{end};
+
+    print STDERR "\n*** Step $curStep/$maxStep: ";
+
+    if ($edge->{type} eq "present") {
+        print STDERR "using already present path ‘$v’\n";
+        print LOGFILE "$$ present $v\n";
+
+        if ($curStep < $maxStep) {
+            # Since this is not the last step, the path will be used
+            # as a base to one or more patches.  So turn the base path
+            # into a NAR archive, to which we can apply the patch.
+            print STDERR "  packing base path...\n";
+            system("$Nix::Config::binDir/nix-store --dump $v > $tmpNar") == 0
+                or die "cannot dump ‘$v’";
+        }
+    }
+
+    elsif ($edge->{type} eq "patch") {
+        my $patch = $edge->{info};
+        print STDERR "applying patch ‘$patch->{url}’ to ‘$u’ to create ‘$v’\n";
+
+        print LOGFILE "$$ patch $patch->{url} $patch->{size} $patch->{baseHash} $u $v\n";
+
+        # Download the patch.
+        print STDERR "  downloading patch...\n";
+        my $patchPath = "$tmpDir/patch";
+        checkURL $patch->{url};
+        system("$curl '$patch->{url}' -o $patchPath") == 0
+            or die "cannot download patch ‘$patch->{url}’\n";
+
+        # Apply the patch to the NAR archive produced in step 1 (for
+        # the already present path) or a later step (for patch sequences).
+        print STDERR "  applying patch...\n";
+        system("$Nix::Config::libexecDir/nix/bspatch $tmpNar $tmpNar2 $patchPath") == 0
+            or die "cannot apply patch ‘$patchPath’ to $tmpNar\n";
+
+        if ($curStep < $maxStep) {
+            # The archive will be used as the base of the next patch.
+            rename "$tmpNar2", "$tmpNar" or die "cannot rename NAR archive: $!";
+        } else {
+            # This was the last patch.  Unpack the final NAR archive
+            # into the target path.
+            print STDERR "  unpacking patched archive...\n";
+            system("$Nix::Config::binDir/nix-store --restore $destPath < $tmpNar2") == 0
+                or die "cannot unpack $tmpNar2 to ‘$v’\n";
+        }
+
+        $finalNarHash = $patch->{narHash};
+    }
+
+    elsif ($edge->{type} eq "narfile") {
+        my $narFile = $edge->{info};
+        print STDERR "downloading ‘$narFile->{url}’ to ‘$v’\n";
+
+        my $size = $narFile->{size} || -1;
+        print LOGFILE "$$ narfile $narFile->{url} $size $v\n";
+
+        checkURL $narFile->{url};
+
+        my $decompressor =
+            $narFile->{compressionType} eq "bzip2" ? "| $Nix::Config::bzip2 -d" :
+            $narFile->{compressionType} eq "xz" ? "| $Nix::Config::xz -d" :
+            $narFile->{compressionType} eq "none" ? "" :
+            die "unknown compression type ‘$narFile->{compressionType}’";
+
+        if ($curStep < $maxStep) {
+            # The archive will be used a base to a patch.
+            system("$curl '$narFile->{url}' $decompressor > $tmpNar") == 0
+                or die "cannot download and unpack ‘$narFile->{url}’ to ‘$v’\n";
+        } else {
+            # Unpack the archive to the target path.
+            system("$curl '$narFile->{url}' $decompressor | $Nix::Config::binDir/nix-store --restore '$destPath'") == 0
+                or die "cannot download and unpack ‘$narFile->{url}’ to ‘$v’\n";
+        }
+
+        $finalNarHash = $narFile->{narHash};
+    }
+
+    $curStep++;
+}
+
+
+# Tell Nix about the expected hash so it can verify it.
+die "cannot check integrity of the downloaded path since its hash is not known\n"
+    unless defined $finalNarHash;
+print "$finalNarHash\n";
+
+
+print STDERR "\n";
+print LOGFILE "$$ success\n";
+close LOGFILE;