about summary refs log tree commit diff
path: root/scripts/build-remote.pl.in
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/build-remote.pl.in')
-rwxr-xr-xscripts/build-remote.pl.in284
1 files changed, 284 insertions, 0 deletions
diff --git a/scripts/build-remote.pl.in b/scripts/build-remote.pl.in
new file mode 100755
index 000000000000..dcf245d7d55f
--- /dev/null
+++ b/scripts/build-remote.pl.in
@@ -0,0 +1,284 @@
+#! @perl@ -w @perlFlags@
+
+use utf8;
+use Fcntl qw(:DEFAULT :flock);
+use English '-no_match_vars';
+use IO::Handle;
+use Nix::Config;
+use Nix::SSH;
+use Nix::CopyClosure;
+use Nix::Store;
+use Encode;
+no warnings('once');
+
+STDERR->autoflush(1);
+binmode STDERR, ":encoding(utf8)";
+
+my $debug = defined $ENV{NIX_DEBUG_HOOK};
+
+
+# General operation:
+#
+# Try to find a free machine of type $neededSystem.  We do this as
+# follows:
+# - We acquire an exclusive lock on $currentLoad/main-lock.
+# - For each machine $machine of type $neededSystem and for each $slot
+#   less than the maximum load for that machine, we try to get an
+#   exclusive lock on $currentLoad/$machine-$slot (without blocking).
+#   If we get such a lock, we send "accept" to the caller.  Otherwise,
+#   we send "postpone" and exit.
+# - We release the exclusive lock on $currentLoad/main-lock.
+# - We perform the build on $neededSystem.
+# - We release the exclusive lock on $currentLoad/$machine-$slot.
+#
+# The nice thing about this scheme is that if we die prematurely, the
+# locks are released automatically.
+
+
+# Make sure that we don't get any SSH passphrase or host key popups -
+# if there is any problem it should fail, not do something
+# interactive.
+$ENV{"DISPLAY"} = "";
+$ENV{"SSH_ASKPASS"} = "";
+
+
+sub sendReply {
+    my $reply = shift;
+    print STDERR "# $reply\n";
+}
+
+sub all { $_ || return 0 for @_; 1 }
+
+
+# Initialisation.
+my $loadIncreased = 0;
+
+my ($localSystem, $maxSilentTime, $printBuildTrace, $buildTimeout) = @ARGV;
+
+my $currentLoad = $ENV{"NIX_CURRENT_LOAD"} // "/run/nix/current-load";
+my $conf = $ENV{"NIX_REMOTE_SYSTEMS"} // "@sysconfdir@/nix/machines";
+
+
+sub openSlotLock {
+    my ($machine, $slot) = @_;
+    my $slotLockFn = "$currentLoad/" . (join '+', @{$machine->{systemTypes}}) . "-" . $machine->{hostName} . "-$slot";
+    my $slotLock = new IO::Handle;
+    sysopen $slotLock, "$slotLockFn", O_RDWR|O_CREAT, 0600 or die;
+    return $slotLock;
+}
+
+
+# Read the list of machines.
+my @machines;
+if (defined $conf && -e $conf) {
+    open CONF, "<$conf" or die;
+    while (<CONF>) {
+        chomp;
+        s/\#.*$//g;
+        next if /^\s*$/;
+        my @tokens = split /\s/, $_;
+        my @supportedFeatures = split(/,/, $tokens[5] || "");
+        my @mandatoryFeatures = split(/,/, $tokens[6] || "");
+        push @machines,
+            { hostName => $tokens[0]
+            , systemTypes => [ split(/,/, $tokens[1]) ]
+            , sshKeys => $tokens[2]
+            , maxJobs => int($tokens[3])
+            , speedFactor => 1.0 * (defined $tokens[4] ? int($tokens[4]) : 1)
+            , supportedFeatures => [ @supportedFeatures, @mandatoryFeatures ]
+            , mandatoryFeatures => [ @mandatoryFeatures ]
+            , enabled => 1
+            };
+    }
+    close CONF;
+}
+
+
+
+# Wait for the calling process to ask us whether we can build some derivation.
+my ($drvPath, $hostName, $slotLock);
+my ($from, $to);
+
+REQ: while (1) {
+    $_ = <STDIN> || exit 0;
+    (my $amWilling, my $neededSystem, $drvPath, my $requiredFeatures) = split;
+    my @requiredFeatures = split /,/, $requiredFeatures;
+
+    my $canBuildLocally = $amWilling && ($localSystem eq $neededSystem);
+
+    if (!defined $currentLoad) {
+        sendReply "decline";
+        next;
+    }
+
+    # Acquire the exclusive lock on $currentLoad/main-lock.
+    mkdir $currentLoad, 0777 or die unless -d $currentLoad;
+    my $mainLock = "$currentLoad/main-lock";
+    sysopen MAINLOCK, "$mainLock", O_RDWR|O_CREAT, 0600 or die;
+    flock(MAINLOCK, LOCK_EX) or die;
+
+
+    while (1) {
+        # Find all machine that can execute this build, i.e., that
+        # support builds for the given platform and features, and are
+        # not at their job limit.
+        my $rightType = 0;
+        my @available = ();
+        LOOP: foreach my $cur (@machines) {
+            if ($cur->{enabled}
+                && (grep { $neededSystem eq $_ } @{$cur->{systemTypes}})
+                && all(map { my $f = $_; 0 != grep { $f eq $_ } @{$cur->{supportedFeatures}} } (@requiredFeatures, @mandatoryFeatures))
+                && all(map { my $f = $_; 0 != grep { $f eq $_ } @requiredFeatures } @{$cur->{mandatoryFeatures}})
+                )
+            {
+                $rightType = 1;
+
+                # We have a machine of the right type.  Determine the load on
+                # the machine.
+                my $slot = 0;
+                my $load = 0;
+                my $free;
+                while ($slot < $cur->{maxJobs}) {
+                    my $slotLock = openSlotLock($cur, $slot);
+                    if (flock($slotLock, LOCK_EX | LOCK_NB)) {
+                        $free = $slot unless defined $free;
+                        flock($slotLock, LOCK_UN) or die;
+                    } else {
+                        $load++;
+                    }
+                    close $slotLock;
+                    $slot++;
+                }
+
+                push @available, { machine => $cur, load => $load, free => $free }
+                if $load < $cur->{maxJobs};
+            }
+        }
+
+        if ($debug) {
+            print STDERR "load on " . $_->{machine}->{hostName} . " = " . $_->{load} . "\n"
+                foreach @available;
+        }
+
+
+        # Didn't find any available machine?  Then decline or postpone.
+        if (scalar @available == 0) {
+            # Postpone if we have a machine of the right type, except
+            # if the local system can and wants to do the build.
+            if ($rightType && !$canBuildLocally) {
+                sendReply "postpone";
+            } else {
+                sendReply "decline";
+            }
+            close MAINLOCK;
+            next REQ;
+        }
+
+
+        # Prioritise the available machines as follows:
+        # - First by load divided by speed factor, rounded to the nearest
+        #   integer.  This causes fast machines to be preferred over slow
+        #   machines with similar loads.
+        # - Then by speed factor.
+        # - Finally by load.
+        sub lf { my $x = shift; return int($x->{load} / $x->{machine}->{speedFactor} + 0.4999); }
+        @available = sort
+            { lf($a) <=> lf($b)
+                  || $b->{machine}->{speedFactor} <=> $a->{machine}->{speedFactor}
+                  || $a->{load} <=> $b->{load}
+            } @available;
+
+
+        # Select the best available machine and lock a free slot.
+        my $selected = $available[0];
+        my $machine = $selected->{machine};
+
+        $slotLock = openSlotLock($machine, $selected->{free});
+        flock($slotLock, LOCK_EX | LOCK_NB) or die;
+        utime undef, undef, $slotLock;
+
+        close MAINLOCK;
+
+
+        # Connect to the selected machine.
+        my @sshOpts = ("-i", $machine->{sshKeys});
+        $hostName = $machine->{hostName};
+        eval {
+            ($from, $to) = connectToRemoteNix($hostName, \@sshOpts, "2>&4");
+            # FIXME: check if builds are inhibited.
+        };
+        last REQ unless $@;
+        print STDERR "$@";
+        warn "unable to open SSH connection to ‘$hostName’, trying other available machines...\n";
+        $from = undef;
+        $to = undef;
+        $machine->{enabled} = 0;
+    }
+}
+
+
+# Tell Nix we've accepted the build.
+sendReply "accept";
+my @inputs = split /\s/, readline(STDIN);
+my @outputs = split /\s/, readline(STDIN);
+
+
+print STDERR "@ build-remote $drvPath $hostName\n" if $printBuildTrace;
+
+
+my $maybeSign = "";
+$maybeSign = "--sign" if -e "$Nix::Config::confDir/signing-key.sec";
+
+
+# Copy the derivation and its dependencies to the build machine.  This
+# is guarded by an exclusive lock per machine to prevent multiple
+# build-remote instances from copying to a machine simultaneously.
+# That's undesirable because we may end up with N instances uploading
+# the same missing path simultaneously, causing the effective network
+# bandwidth and target disk speed to be divided by N.
+my $uploadLock = "$currentLoad/$hostName.upload-lock";
+sysopen UPLOADLOCK, "$uploadLock", O_RDWR|O_CREAT, 0600 or die;
+eval {
+    local $SIG{ALRM} = sub { die "alarm\n" };
+    # Don't wait forever, so that a process that gets stuck while
+    # holding the lock doesn't block everybody else indefinitely.
+    # It's safe to continue after a timeout, just (potentially)
+    # inefficient.
+    alarm 15 * 60;
+    flock(UPLOADLOCK, LOCK_EX);
+    alarm 0;
+};
+if ($@) {
+    die unless $@ eq "alarm\n";
+    print STDERR "somebody is hogging $uploadLock, continuing...\n";
+    unlink $uploadLock;
+}
+Nix::CopyClosure::copyToOpen($from, $to, $hostName, [ $drvPath, @inputs ], 0, 0, $maybeSign ne "");
+close UPLOADLOCK;
+
+
+# Perform the build.
+print STDERR "building ‘$drvPath’ on ‘$hostName’\n";
+print STDERR "@ build-remote-start $drvPath $hostName\n" if $printBuildTrace;
+writeInt(6, $to) or die; # == cmdBuildPaths
+writeStrings([$drvPath], $to);
+writeInt($maxSilentTime, $to);
+writeInt($buildTimeout, $to);
+my $res = readInt($from);
+print STDERR "@ build-remote-done $drvPath $hostName\n" if $printBuildTrace;
+if ($res != 0) {
+    my $msg = decode("utf-8", readString($from));
+    print STDERR "error: $msg on ‘$hostName’\n";
+    exit $res;
+}
+
+
+# Copy the output from the build machine.
+my @outputs2 = grep { !isValidPath($_) } @outputs;
+if (scalar @outputs2 > 0) {
+    writeInt(5, $to); # == cmdExportPaths
+    writeInt(0, $to); # don't sign
+    writeStrings(\@outputs2, $to);
+    $ENV{'NIX_HELD_LOCKS'} = "@outputs2"; # FIXME: ugly
+    importPaths(fileno($from));
+}