about summary refs log tree commit diff
path: root/scripts/download-from-binary-cache.pl.in
diff options
context:
space:
mode:
authorEelco Dolstra <eelco.dolstra@logicblox.com>2012-07-11T21·53-0400
committerEelco Dolstra <eelco.dolstra@logicblox.com>2012-07-11T21·53-0400
commitb74d92755d1ca6a1538f292dcb5a906f66af7b51 (patch)
tree4369859a40dc8a13b58e2d507edace3c239078b3 /scripts/download-from-binary-cache.pl.in
parent09a6321aeb7393cdb4b5af62d2e4106d83124fdf (diff)
download-from-binary-cache: Use HEAD requests if possible
In "nix-env -qas", we don't need the substitute info, we just need to
know if it exists.  This can be done using a HTTP HEAD request, which
saves bandwidth.

Note however that curl currently has a bug that prevents it from
reusing HTTP connections if HEAD requests return a 404:

https://sourceforge.net/tracker/?func=detail&aid=3542731&group_id=976&atid=100976

Without the patch attached to the issue, using HEAD is actually quite
a bit slower than GET.
Diffstat (limited to 'scripts/download-from-binary-cache.pl.in')
-rw-r--r--scripts/download-from-binary-cache.pl.in91
1 files changed, 79 insertions, 12 deletions
diff --git a/scripts/download-from-binary-cache.pl.in b/scripts/download-from-binary-cache.pl.in
index 2a70e51933..93155e0b56 100644
--- a/scripts/download-from-binary-cache.pl.in
+++ b/scripts/download-from-binary-cache.pl.in
@@ -18,7 +18,7 @@ my @binaryCacheUrls = map { s/\/+$//; $_ } split(/ /,
 my $maxParallelRequests = int($Nix::Config::config{"binary-caches-parallel-connections"} // 150);
 $maxParallelRequests = 1 if $maxParallelRequests < 1;
 
-my ($dbh, $insertNAR, $queryNAR, $insertNegativeNAR, $queryNegativeNAR);
+my ($dbh, $insertNAR, $queryNAR, $insertNARExistence, $queryNARExistence);
 my %cacheIds;
 
 my $curlm = WWW::Curl::Multi->new;
@@ -30,7 +30,7 @@ my $caBundle = $ENV{"CURL_CA_BUNDLE"} // $ENV{"OPENSSL_X509_CERT_FILE"};
 
 
 sub addRequest {
-    my ($storePath, $url) = @_;
+    my ($storePath, $url, $head) = @_;
     
     my $curl = WWW::Curl::Easy->new;
     my $curlId = $curlIdCount++;
@@ -41,6 +41,7 @@ sub addRequest {
     $curl->setopt(CURLOPT_WRITEDATA, \$requests{$curlId}->{content});
     $curl->setopt(CURLOPT_FOLLOWLOCATION, 1);
     $curl->setopt(CURLOPT_CAINFO, $caBundle) if defined $caBundle;
+    $curl->setopt(CURLOPT_NOBODY, 1) if $head;
 
     if ($activeRequests >= $maxParallelRequests) {
         $scheduled{$curlId} = 1;
@@ -127,9 +128,10 @@ EOF
 EOF
 
     $dbh->do(<<EOF);
-        create table if not exists NegativeNARs (
+        create table if not exists NARExistence (
             cache            integer not null,
             storePath        text not null,
+            exist            integer not null,
             timestamp        integer not null,
             primary key (cache, storePath),
             foreign key (cache) references BinaryCaches(id) on delete cascade
@@ -142,17 +144,28 @@ EOF
 
     $queryNAR = $dbh->prepare("select * from NARs where cache = ? and storePath = ?") or die;
 
-    $insertNegativeNAR = $dbh->prepare(
-        "insert or replace into NegativeNARs(cache, storePath, timestamp) values (?, ?, ?)") or die;
+    $insertNARExistence = $dbh->prepare(
+        "insert or replace into NARExistence(cache, storePath, exist, timestamp) values (?, ?, ?, ?)") or die;
 
-    $queryNegativeNAR = $dbh->prepare("select 1 from NegativeNARs where cache = ? and storePath = ?") or die;
+    $queryNARExistence = $dbh->prepare("select exist from NARExistence where cache = ? and storePath = ?") or die;
 }
 
 
+
 sub negativeHit {
     my ($storePath, $binaryCacheUrl) = @_;
-    $queryNegativeNAR->execute(getCacheId($binaryCacheUrl), basename($storePath));
-    return @{$queryNegativeNAR->fetchall_arrayref()} != 0;
+    $queryNARExistence->execute(getCacheId($binaryCacheUrl), basename($storePath));
+    my $res = $queryNARExistence->fetchrow_hashref();
+    return defined $res && $res->{exist} == 0;
+}
+
+
+sub positiveHit {
+    my ($storePath, $binaryCacheUrl) = @_;
+    return 1 if defined getCachedInfoFrom($storePath, $binaryCacheUrl);
+    $queryNARExistence->execute(getCacheId($binaryCacheUrl), basename($storePath));
+    my $res = $queryNARExistence->fetchrow_hashref();
+    return defined $res && $res->{exist} == 1;
 }
 
 
@@ -166,7 +179,7 @@ sub processNARInfo {
             print STDERR "could not download ‘$request->{url}’ (" .
                 ($request->{result} != 0 ? "Curl error $request->{result}" : "HTTP status $request->{httpStatus}") . ")\n";
         } else {
-            $insertNegativeNAR->execute($cacheId, basename($storePath), time());
+            $insertNARExistence->execute($cacheId, basename($storePath), 0, time());
         }
         return undef;
     }
@@ -319,6 +332,61 @@ sub printInfoParallel {
 }
 
 
+sub printSubstitutablePaths {
+    my @paths = @_;
+
+    # First look for paths that have cached info.
+    my @left;
+    foreach my $storePath (@paths) {
+        my $found = 0;
+        foreach my $binaryCacheUrl (@binaryCacheUrls) {
+            if (positiveHit($storePath, $binaryCacheUrl)) {
+                print "$storePath\n";
+                $found = 1;
+                last;
+            }
+        }
+        push @left, $storePath if !$found;
+    }
+
+    return if scalar @left == 0;
+
+    # For remaining paths, do HEAD requests.
+    foreach my $binaryCacheUrl (@binaryCacheUrls) {
+        my $cacheId = getCacheId($binaryCacheUrl);
+
+        my @left2;
+        %requests = ();
+        foreach my $storePath (@left) {
+            if (negativeHit($storePath, $binaryCacheUrl)) {
+                push @left2, $storePath;
+                next;
+            }
+            addRequest($storePath, infoUrl($binaryCacheUrl, $storePath), 1);
+        }
+        
+        processRequests;
+
+        foreach my $request (values %requests) {
+            if ($request->{result} != 0 || $request->{httpStatus} != 200) {
+                if ($request->{httpStatus} != 404) {
+                    print STDERR "could not check ‘$request->{url}’ (" .
+                        ($request->{result} != 0 ? "Curl error $request->{result}" : "HTTP status $request->{httpStatus}") . ")\n";
+                } else {
+                    $insertNARExistence->execute($cacheId, basename($request->{storePath}), 0, time());
+                }
+                push @left2, $request->{storePath};
+            } else {
+                $insertNARExistence->execute($cacheId, basename($request->{storePath}), 1, time());
+                print "$request->{storePath}\n";
+            }
+        }
+
+        @left = @left2;
+    }
+}
+
+
 sub downloadBinary {
     my ($storePath) = @_;
     
@@ -371,9 +439,8 @@ if ($ARGV[0] eq "--query") {
         my ($cmd, @args) = split " ", $_;
         
         if ($cmd eq "have") {
-            my $storePath = <STDIN>; chomp $storePath;
-            # FIXME: want to give correct info here, but it's too slow.
-            print "0\n";
+            printSubstitutablePaths(@args);
+            print "\n";
         }
 
         elsif ($cmd eq "info") {