about summary refs log tree commit diff
path: root/scripts/build-remote.pl.in
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/build-remote.pl.in')
-rwxr-xr-xscripts/build-remote.pl.in325
1 files changed, 177 insertions, 148 deletions
diff --git a/scripts/build-remote.pl.in b/scripts/build-remote.pl.in
index c440b6a0f142..e943b0d9e304 100755
--- a/scripts/build-remote.pl.in
+++ b/scripts/build-remote.pl.in
@@ -3,7 +3,8 @@
 use Fcntl ':flock';
 use English '-no_match_vars';
 use IO::Handle;
-use ssh qw/sshOpts openSSHConnection/;
+use SSH qw/sshOpts openSSHConnection/;
+no warnings('once');
 
 
 # General operation:
@@ -31,57 +32,22 @@ $ENV{"DISPLAY"} = "";
 $ENV{"SSH_ASKPASS"} = "";
 
 
-my $loadIncreased = 0;
-
-my ($amWilling, $localSystem, $neededSystem, $drvPath, $maxSilentTime) = @ARGV;
-$maxSilentTime = 0 unless defined $maxSilentTime;
-
 sub sendReply {
     my $reply = shift;
     print STDERR "# $reply\n";
 }
 
-sub decline {
-    sendReply "decline";
-    exit 0;
-}
-
-my $currentLoad = $ENV{"NIX_CURRENT_LOAD"};
-decline unless defined $currentLoad;
-mkdir $currentLoad, 0777 or die unless -d $currentLoad;
-
-my $conf = $ENV{"NIX_REMOTE_SYSTEMS"};
-decline if !defined $conf || ! -e $conf;
-
-my $canBuildLocally = $amWilling && ($localSystem eq $neededSystem);
-
+sub all { $_ || return 0 for @_; 1 }
 
-# Read the list of machines.
-my @machines;
-open CONF, "< $conf" or die;
-
-while (<CONF>) {
-    chomp;
-    s/\#.*$//g;
-    next if /^\s*$/;
-    /^\s*(\S+)\s+(\S+)\s+(\S+)\s+(\d+)(\s+([0-9\.]+))?\s*$/ or die;
-    push @machines,
-        { hostName => $1
-        , systemTypes => [split(/,/, $2)]
-        , sshKeys => $3
-        , maxJobs => $4
-        , speedFactor => 1.0 * ($6 || 1)
-        , enabled => 1
-        };
-}
 
-close CONF;
+# Initialisation.
+my $loadIncreased = 0;
 
+my ($localSystem, $maxSilentTime, $printBuildTrace) = @ARGV;
+$maxSilentTime = 0 unless defined $maxSilentTime;
 
-# Acquire the exclusive lock on $currentLoad/main-lock.
-my $mainLock = "$currentLoad/main-lock";
-open MAINLOCK, ">>$mainLock" or die;
-flock(MAINLOCK, LOCK_EX) or die;
+my $currentLoad = $ENV{"NIX_CURRENT_LOAD"};
+my $conf = $ENV{"NIX_REMOTE_SYSTEMS"};
 
 
 sub openSlotLock {
@@ -91,150 +57,213 @@ sub openSlotLock {
     open $slotLock, ">>$slotLockFn" or die;
     return $slotLock;
 }
-    
 
-my $hostName;
-my $slotLock;
 
-while (1) {
+# Read the list of machines.
+my @machines;
+if (defined $conf && -e $conf) {
+    open CONF, "< $conf" or die;
+    while (<CONF>) {
+        chomp;
+        s/\#.*$//g;
+        next if /^\s*$/;
+        my @tokens = split /\s/, $_;
+        push @machines,
+            { hostName => $tokens[0]
+            , systemTypes => [ split(/,/, $tokens[1]) ]
+            , sshKeys => $tokens[2]
+            , maxJobs => int($tokens[3])
+            , speedFactor => 1.0 * (defined $tokens[4] ? int($tokens[4]) : 1)
+            , features => [ split(/,/, $tokens[5] || "") ]
+            , enabled => 1
+            };
+    }
+    close CONF;
+}
+
+
+
+# Wait for the calling process to ask us whether we can build some derivation.
+my ($drvPath, $hostName, $slotLock);
+
+REQ: while (1) {
+    $_ = <STDIN> || exit 0;
+    my ($amWilling, $neededSystem);
+    ($amWilling, $neededSystem, $drvPath, $requiredFeatures) = split;
+    my @requiredFeatures = split /,/, $requiredFeatures;
+
+    my $canBuildLocally = $amWilling && ($localSystem eq $neededSystem);
+
+    if (!defined $currentLoad) {
+        sendReply "decline";
+        next;
+    }
+    
+    # Acquire the exclusive lock on $currentLoad/main-lock.
+    mkdir $currentLoad, 0777 or die unless -d $currentLoad;
+    my $mainLock = "$currentLoad/main-lock";
+    open MAINLOCK, ">>$mainLock" or die;
+    flock(MAINLOCK, LOCK_EX) or die;
     
-    # Find all machine that can execute this build, i.e., that support
-    # builds for the given platform and are not at their job limit.
-    my $rightType = 0;
-    my @available = ();
-    LOOP: foreach my $cur (@machines) {
-        if ($cur->{enabled} && grep { $neededSystem eq $_ } @{$cur->{systemTypes}}) {
-            $rightType = 1;
-
-            # We have a machine of the right type.  Determine the load on
-            # the machine.
-            my $slot = 0;
-            my $load = 0;
-            my $free;
-            while ($slot < $cur->{maxJobs}) {
-                my $slotLock = openSlotLock($cur, $slot);
-                if (flock($slotLock, LOCK_EX | LOCK_NB)) {
-                    $free = $slot unless defined $free;
-                    flock($slotLock, LOCK_UN) or die;
-                } else {
-                    $load++;
+    
+    while (1) {
+        # Find all machine that can execute this build, i.e., that
+        # support builds for the given platform and features, and are
+        # not at their job limit.
+        my $rightType = 0;
+        my @available = ();
+        LOOP: foreach my $cur (@machines) {
+            if ($cur->{enabled}
+                && (grep { $neededSystem eq $_ } @{$cur->{systemTypes}})
+                && all(map { my $f = $_; 0 != grep { $f eq $_ } @{$cur->{features}} } @requiredFeatures))
+            {
+                $rightType = 1;
+
+                # We have a machine of the right type.  Determine the load on
+                # the machine.
+                my $slot = 0;
+                my $load = 0;
+                my $free;
+                while ($slot < $cur->{maxJobs}) {
+                    my $slotLock = openSlotLock($cur, $slot);
+                    if (flock($slotLock, LOCK_EX | LOCK_NB)) {
+                        $free = $slot unless defined $free;
+                        flock($slotLock, LOCK_UN) or die;
+                    } else {
+                        $load++;
+                    }
+                    close $slotLock;
+                    $slot++;
                 }
-                close $slotLock;
-                $slot++;
+                
+                push @available, { machine => $cur, load => $load, free => $free }
+                if $load < $cur->{maxJobs};
             }
-
-            push @available, { machine => $cur, load => $load, free => $free }
-            if $load < $cur->{maxJobs};
         }
-    }
 
-    if (defined $ENV{NIX_DEBUG_HOOK}) {
-        print STDERR "load on " . $_->{machine}->{hostName} . " = " . $_->{load} . "\n"
-            foreach @available;
-    }
+        if (defined $ENV{NIX_DEBUG_HOOK}) {
+            print STDERR "load on " . $_->{machine}->{hostName} . " = " . $_->{load} . "\n"
+                foreach @available;
+        }
 
 
-    # Didn't find any available machine?  Then decline or postpone.
-    if (scalar @available == 0) {
-        # Postpone if we have a machine of the right type, except if the
-        # local system can and wants to do the build.
-        if ($rightType && !$canBuildLocally) {
-            sendReply "postpone";
-            exit 0;
-        } else {
-            decline;
+        # Didn't find any available machine?  Then decline or postpone.
+        if (scalar @available == 0) {
+            # Postpone if we have a machine of the right type, except
+            # if the local system can and wants to do the build.
+            if ($rightType && !$canBuildLocally) {
+                sendReply "postpone";
+            } else {
+                sendReply "decline";                
+            }
+            close MAINLOCK;
+            next REQ;
         }
-    }
-
 
-    # Prioritise the available machines as follows:
-    # - First by load divided by speed factor, rounded to the nearest
-    #   integer.  This causes fast machines to be preferred over slow
-    #   machines with similar loads.
-    # - Then by speed factor.
-    # - Finally by load.
-    sub lf { my $x = shift; return int($x->{load} / $x->{machine}->{speedFactor} + 0.4999); }
-    @available = sort
-        { lf($a) <=> lf($b)
-              || $b->{machine}->{speedFactor} <=> $a->{machine}->{speedFactor}
-              || $a->{load} <=> $b->{load}
-        } @available;
 
+        # Prioritise the available machines as follows:
+        # - First by load divided by speed factor, rounded to the nearest
+        #   integer.  This causes fast machines to be preferred over slow
+        #   machines with similar loads.
+        # - Then by speed factor.
+        # - Finally by load.
+        sub lf { my $x = shift; return int($x->{load} / $x->{machine}->{speedFactor} + 0.4999); }
+        @available = sort
+            { lf($a) <=> lf($b)
+                  || $b->{machine}->{speedFactor} <=> $a->{machine}->{speedFactor}
+                  || $a->{load} <=> $b->{load}
+            } @available;
 
-    # Select the best available machine and lock a free slot.
-    my $selected = $available[0]; 
-    my $machine = $selected->{machine};
 
-    $slotLock = openSlotLock($machine, $selected->{free});
-    flock($slotLock, LOCK_EX | LOCK_NB) or die;
-    utime undef, undef, $slotLock;
+        # Select the best available machine and lock a free slot.
+        my $selected = $available[0]; 
+        my $machine = $selected->{machine};
+        
+        $slotLock = openSlotLock($machine, $selected->{free});
+        flock($slotLock, LOCK_EX | LOCK_NB) or die;
+        utime undef, undef, $slotLock;
 
-    close MAINLOCK;
+        close MAINLOCK;
 
 
-    # Connect to the selected machine.
-    @sshOpts = ("-i", $machine->{sshKeys}, "-x");
-    $hostName = $machine->{hostName};
-    last if openSSHConnection $hostName;
+        # Connect to the selected machine.
+        @sshOpts = ("-i", $machine->{sshKeys}, "-x");
+        $hostName = $machine->{hostName};
+        last REQ if openSSHConnection $hostName;
     
-    warn "unable to open SSH connection to $hostName, trying other available machines...\n";
-    $machine->{enabled} = 0;
+        warn "unable to open SSH connection to $hostName, trying other available machines...\n";
+        $machine->{enabled} = 0;
+    }
 }
 
 
 # Tell Nix we've accepted the build.
 sendReply "accept";
-my $x = <STDIN>;
-chomp $x;
-
-if ($x ne "okay") {
-    exit 0;
-}
+my @inputs = split /\s/, readline(STDIN);
+my @outputs = split /\s/, readline(STDIN);
 
 
-# Do the actual build.
 print STDERR "building `$drvPath' on `$hostName'\n";
+print STDERR "@ build-remote $drvPath $hostName\n" if $printBuildTrace;
 
-my $inputs = `cat inputs`; die if ($? != 0);
-$inputs =~ s/\n/ /g;
-
-my $outputs = `cat outputs`; die if ($? != 0);
-$outputs =~ s/\n/ /g;
-
-print "copying inputs...\n";
 
 my $maybeSign = "";
 $maybeSign = "--sign" if -e "/nix/etc/nix/signing-key.sec";
 
-system("NIX_SSHOPTS=\"@sshOpts\" @bindir@/nix-copy-closure $hostName $maybeSign $drvPath $inputs") == 0
+
+# Register the derivation as a temporary GC root.  Note that $PPID is
+# the PID of the remote SSH process, which, due to the use of a
+# persistant SSH connection, should be the same across all remote
+# command invocations for this session.
+my $rootsDir = "@localstatedir@/nix/gcroots/tmp";
+system("ssh $hostName @sshOpts 'mkdir -m 1777 -p $rootsDir; ln -sfn $drvPath $rootsDir/\$PPID.drv'");
+
+sub removeRoots {
+    system("ssh $hostName @sshOpts 'rm -f $rootsDir/\$PPID.drv $rootsDir/\$PPID.out'");
+}
+
+
+# Copy the derivation and its dependencies to the build machine.
+system("NIX_SSHOPTS=\"@sshOpts\" @bindir@/nix-copy-closure $hostName $maybeSign $drvPath @inputs") == 0
     or die "cannot copy inputs to $hostName: $?";
 
-print "building...\n";
-
-my $buildFlags = "--max-silent-time $maxSilentTime --fallback";
-
-# `-tt' forces allocation of a pseudo-terminal.  This is required to
-# make the remote nix-store process receive a signal when the
-# connection dies.  Without it, the remote process might continue to
-# run indefinitely (that is, until it next tries to write to
-# stdout/stderr).
-if (system("ssh $hostName @sshOpts -tt 'nix-store -r $drvPath $buildFlags > /dev/null'") != 0) {
-    # If we couldn't run ssh or there was an ssh problem (indicated by
-    # exit code 255), then we return exit code 1; otherwise we assume
-    # that the builder failed, which we indicate to Nix using exit
-    # code 100.  It's important to distinguish between the two because
-    # the first is a transient failure and the latter is permanent.
-    my $res = $? == -1 || ($? >> 8) == 255 ? 1 : 100;
-    print STDERR "build of `$drvPath' on `$hostName' failed with exit code $?\n";
+
+# Perform the build.
+my $buildFlags = "--max-silent-time $maxSilentTime --fallback --add-root $rootsDir/\$PPID.out --option verbosity 0";
+
+# We let the remote side kill its process group when the connection is
+# closed unexpectedly.  This is necessary to ensure that no processes
+# are left running on the remote system if the local Nix process is
+# killed.  (SSH itself doesn't kill child processes if the connection
+# is interrupted unless the `-tt' flag is used to force a pseudo-tty,
+# in which case every child receives SIGHUP; however, `-tt' doesn't
+# work on some platforms when connection sharing is used.)
+pipe STDIN, DUMMY; # make sure we have a readable STDIN
+if (system("ssh $hostName @sshOpts '(read; kill -INT -\$\$) <&0 & nix-store -r $drvPath $buildFlags > /dev/null' 2>&4") != 0) {
+    # Note that if we get exit code 100 from `nix-store -r', it
+    # denotes a permanent build failure (as opposed to an SSH problem
+    # or a temporary Nix problem).  We propagate this to the caller to
+    # allow it to distinguish between transient and permanent
+    # failures.
+    my $res = $? >> 8;
+    print STDERR "build of `$drvPath' on `$hostName' failed with exit code $res\n";
+    removeRoots;
     exit $res;
 }
 
-print "build of `$drvPath' on `$hostName' succeeded\n";
+#print "build of `$drvPath' on `$hostName' succeeded\n";
 
-foreach my $output (split '\n', $outputs) {
+
+# Copy the output from the build machine.
+foreach my $output (@outputs) {
     my $maybeSignRemote = "";
     $maybeSignRemote = "--sign" if $UID != 0;
     
-    system("ssh $hostName @sshOpts 'nix-store --export $maybeSignRemote $output' | @bindir@/nix-store --import > /dev/null") == 0
+    system("ssh $hostName @sshOpts 'nix-store --export $maybeSignRemote $output'" .
+           "| NIX_HELD_LOCKS=$output @bindir@/nix-store --import > /dev/null") == 0
 	or die "cannot copy $output from $hostName: $?";
 }
+
+
+# Get rid of the temporary GC roots.
+removeRoots;