diff options
Diffstat (limited to 'scripts/build-remote.pl.in')
-rwxr-xr-x | scripts/build-remote.pl.in | 284 |
1 files changed, 284 insertions, 0 deletions
diff --git a/scripts/build-remote.pl.in b/scripts/build-remote.pl.in new file mode 100755 index 000000000000..dcf245d7d55f --- /dev/null +++ b/scripts/build-remote.pl.in @@ -0,0 +1,284 @@ +#! @perl@ -w @perlFlags@ + +use utf8; +use Fcntl qw(:DEFAULT :flock); +use English '-no_match_vars'; +use IO::Handle; +use Nix::Config; +use Nix::SSH; +use Nix::CopyClosure; +use Nix::Store; +use Encode; +no warnings('once'); + +STDERR->autoflush(1); +binmode STDERR, ":encoding(utf8)"; + +my $debug = defined $ENV{NIX_DEBUG_HOOK}; + + +# General operation: +# +# Try to find a free machine of type $neededSystem. We do this as +# follows: +# - We acquire an exclusive lock on $currentLoad/main-lock. +# - For each machine $machine of type $neededSystem and for each $slot +# less than the maximum load for that machine, we try to get an +# exclusive lock on $currentLoad/$machine-$slot (without blocking). +# If we get such a lock, we send "accept" to the caller. Otherwise, +# we send "postpone" and exit. +# - We release the exclusive lock on $currentLoad/main-lock. +# - We perform the build on $neededSystem. +# - We release the exclusive lock on $currentLoad/$machine-$slot. +# +# The nice thing about this scheme is that if we die prematurely, the +# locks are released automatically. + + +# Make sure that we don't get any SSH passphrase or host key popups - +# if there is any problem it should fail, not do something +# interactive. +$ENV{"DISPLAY"} = ""; +$ENV{"SSH_ASKPASS"} = ""; + + +sub sendReply { + my $reply = shift; + print STDERR "# $reply\n"; +} + +sub all { $_ || return 0 for @_; 1 } + + +# Initialisation. +my $loadIncreased = 0; + +my ($localSystem, $maxSilentTime, $printBuildTrace, $buildTimeout) = @ARGV; + +my $currentLoad = $ENV{"NIX_CURRENT_LOAD"} // "/run/nix/current-load"; +my $conf = $ENV{"NIX_REMOTE_SYSTEMS"} // "@sysconfdir@/nix/machines"; + + +sub openSlotLock { + my ($machine, $slot) = @_; + my $slotLockFn = "$currentLoad/" . (join '+', @{$machine->{systemTypes}}) . "-" . $machine->{hostName} . "-$slot"; + my $slotLock = new IO::Handle; + sysopen $slotLock, "$slotLockFn", O_RDWR|O_CREAT, 0600 or die; + return $slotLock; +} + + +# Read the list of machines. +my @machines; +if (defined $conf && -e $conf) { + open CONF, "<$conf" or die; + while (<CONF>) { + chomp; + s/\#.*$//g; + next if /^\s*$/; + my @tokens = split /\s/, $_; + my @supportedFeatures = split(/,/, $tokens[5] || ""); + my @mandatoryFeatures = split(/,/, $tokens[6] || ""); + push @machines, + { hostName => $tokens[0] + , systemTypes => [ split(/,/, $tokens[1]) ] + , sshKeys => $tokens[2] + , maxJobs => int($tokens[3]) + , speedFactor => 1.0 * (defined $tokens[4] ? int($tokens[4]) : 1) + , supportedFeatures => [ @supportedFeatures, @mandatoryFeatures ] + , mandatoryFeatures => [ @mandatoryFeatures ] + , enabled => 1 + }; + } + close CONF; +} + + + +# Wait for the calling process to ask us whether we can build some derivation. +my ($drvPath, $hostName, $slotLock); +my ($from, $to); + +REQ: while (1) { + $_ = <STDIN> || exit 0; + (my $amWilling, my $neededSystem, $drvPath, my $requiredFeatures) = split; + my @requiredFeatures = split /,/, $requiredFeatures; + + my $canBuildLocally = $amWilling && ($localSystem eq $neededSystem); + + if (!defined $currentLoad) { + sendReply "decline"; + next; + } + + # Acquire the exclusive lock on $currentLoad/main-lock. + mkdir $currentLoad, 0777 or die unless -d $currentLoad; + my $mainLock = "$currentLoad/main-lock"; + sysopen MAINLOCK, "$mainLock", O_RDWR|O_CREAT, 0600 or die; + flock(MAINLOCK, LOCK_EX) or die; + + + while (1) { + # Find all machine that can execute this build, i.e., that + # support builds for the given platform and features, and are + # not at their job limit. + my $rightType = 0; + my @available = (); + LOOP: foreach my $cur (@machines) { + if ($cur->{enabled} + && (grep { $neededSystem eq $_ } @{$cur->{systemTypes}}) + && all(map { my $f = $_; 0 != grep { $f eq $_ } @{$cur->{supportedFeatures}} } (@requiredFeatures, @mandatoryFeatures)) + && all(map { my $f = $_; 0 != grep { $f eq $_ } @requiredFeatures } @{$cur->{mandatoryFeatures}}) + ) + { + $rightType = 1; + + # We have a machine of the right type. Determine the load on + # the machine. + my $slot = 0; + my $load = 0; + my $free; + while ($slot < $cur->{maxJobs}) { + my $slotLock = openSlotLock($cur, $slot); + if (flock($slotLock, LOCK_EX | LOCK_NB)) { + $free = $slot unless defined $free; + flock($slotLock, LOCK_UN) or die; + } else { + $load++; + } + close $slotLock; + $slot++; + } + + push @available, { machine => $cur, load => $load, free => $free } + if $load < $cur->{maxJobs}; + } + } + + if ($debug) { + print STDERR "load on " . $_->{machine}->{hostName} . " = " . $_->{load} . "\n" + foreach @available; + } + + + # Didn't find any available machine? Then decline or postpone. + if (scalar @available == 0) { + # Postpone if we have a machine of the right type, except + # if the local system can and wants to do the build. + if ($rightType && !$canBuildLocally) { + sendReply "postpone"; + } else { + sendReply "decline"; + } + close MAINLOCK; + next REQ; + } + + + # Prioritise the available machines as follows: + # - First by load divided by speed factor, rounded to the nearest + # integer. This causes fast machines to be preferred over slow + # machines with similar loads. + # - Then by speed factor. + # - Finally by load. + sub lf { my $x = shift; return int($x->{load} / $x->{machine}->{speedFactor} + 0.4999); } + @available = sort + { lf($a) <=> lf($b) + || $b->{machine}->{speedFactor} <=> $a->{machine}->{speedFactor} + || $a->{load} <=> $b->{load} + } @available; + + + # Select the best available machine and lock a free slot. + my $selected = $available[0]; + my $machine = $selected->{machine}; + + $slotLock = openSlotLock($machine, $selected->{free}); + flock($slotLock, LOCK_EX | LOCK_NB) or die; + utime undef, undef, $slotLock; + + close MAINLOCK; + + + # Connect to the selected machine. + my @sshOpts = ("-i", $machine->{sshKeys}); + $hostName = $machine->{hostName}; + eval { + ($from, $to) = connectToRemoteNix($hostName, \@sshOpts, "2>&4"); + # FIXME: check if builds are inhibited. + }; + last REQ unless $@; + print STDERR "$@"; + warn "unable to open SSH connection to ‘$hostName’, trying other available machines...\n"; + $from = undef; + $to = undef; + $machine->{enabled} = 0; + } +} + + +# Tell Nix we've accepted the build. +sendReply "accept"; +my @inputs = split /\s/, readline(STDIN); +my @outputs = split /\s/, readline(STDIN); + + +print STDERR "@ build-remote $drvPath $hostName\n" if $printBuildTrace; + + +my $maybeSign = ""; +$maybeSign = "--sign" if -e "$Nix::Config::confDir/signing-key.sec"; + + +# Copy the derivation and its dependencies to the build machine. This +# is guarded by an exclusive lock per machine to prevent multiple +# build-remote instances from copying to a machine simultaneously. +# That's undesirable because we may end up with N instances uploading +# the same missing path simultaneously, causing the effective network +# bandwidth and target disk speed to be divided by N. +my $uploadLock = "$currentLoad/$hostName.upload-lock"; +sysopen UPLOADLOCK, "$uploadLock", O_RDWR|O_CREAT, 0600 or die; +eval { + local $SIG{ALRM} = sub { die "alarm\n" }; + # Don't wait forever, so that a process that gets stuck while + # holding the lock doesn't block everybody else indefinitely. + # It's safe to continue after a timeout, just (potentially) + # inefficient. + alarm 15 * 60; + flock(UPLOADLOCK, LOCK_EX); + alarm 0; +}; +if ($@) { + die unless $@ eq "alarm\n"; + print STDERR "somebody is hogging $uploadLock, continuing...\n"; + unlink $uploadLock; +} +Nix::CopyClosure::copyToOpen($from, $to, $hostName, [ $drvPath, @inputs ], 0, 0, $maybeSign ne ""); +close UPLOADLOCK; + + +# Perform the build. +print STDERR "building ‘$drvPath’ on ‘$hostName’\n"; +print STDERR "@ build-remote-start $drvPath $hostName\n" if $printBuildTrace; +writeInt(6, $to) or die; # == cmdBuildPaths +writeStrings([$drvPath], $to); +writeInt($maxSilentTime, $to); +writeInt($buildTimeout, $to); +my $res = readInt($from); +print STDERR "@ build-remote-done $drvPath $hostName\n" if $printBuildTrace; +if ($res != 0) { + my $msg = decode("utf-8", readString($from)); + print STDERR "error: $msg on ‘$hostName’\n"; + exit $res; +} + + +# Copy the output from the build machine. +my @outputs2 = grep { !isValidPath($_) } @outputs; +if (scalar @outputs2 > 0) { + writeInt(5, $to); # == cmdExportPaths + writeInt(0, $to); # don't sign + writeStrings(\@outputs2, $to); + $ENV{'NIX_HELD_LOCKS'} = "@outputs2"; # FIXME: ugly + importPaths(fileno($from)); +} |