about summary refs log blame commit diff
path: root/scripts/build-remote.pl.in
blob: e821b40718b94e92e28e8ca0121c37a8399f3317 (plain) (tree)























                                                                      
                                                                                         
                                     
                                                 


















                                                       
                                                                    

                                          
             









                                                   




                          












                                                       
                                              



                                                                    
                                                                            
                                                                                                    
                                                     
                                                                                              
                                
             







                           
                                             
                        

                                                                    



















                             
                                                 






                                                                     
                                                  
                                                                  
                                                              







                                                                                         
                                           




















                                                              
                                                                                              

                      
                                                    
 



                                                                    
                                                                                                          







                                                                      
 
                                                   



                                             
                                                                                                                                 
                                                        
 
#! @perl@ -w

use strict;
use Fcntl ':flock';
use English '-no_match_vars';

# General operation:
#
# Try to find a free machine of type $neededSystem.  We do this as
# follows:
# - We acquire an exclusive lock on $currentLoad/main-lock.
# - For each machine $machine of type $neededSystem and for each $slot
#   less than the maximum load for that machine, we try to get an
#   exclusive lock on $currentLoad/$machine-$slot (without blocking).
#   If we get such a lock, we send "accept" to the caller.  Otherwise,
#   we send "postpone" and exit. 
# - We release the exclusive lock on $currentLoad/main-lock.
# - We perform the build on $neededSystem.
# - We release the exclusive lock on $currentLoad/$machine-$slot.
#
# The nice thing about this scheme is that if we die prematurely, the
# locks are released automatically.

my $loadIncreased = 0;

my ($amWilling, $localSystem, $neededSystem, $drvPath, $mustRun, $maxSilentTime) = @ARGV;
$mustRun = 0 unless defined $mustRun;
$maxSilentTime = 0 unless defined $maxSilentTime;

sub sendReply {
    my $reply = shift;
    open OUT, ">&3" or die;
    print OUT "$reply\n";
    close OUT;
}

sub decline {
    sendReply "decline";
    exit 0;
}

my $currentLoad = $ENV{"NIX_CURRENT_LOAD"};
decline unless defined $currentLoad;
mkdir $currentLoad, 0777 or die unless -d $currentLoad;

my $conf = $ENV{"NIX_REMOTE_SYSTEMS"};
decline if !defined $conf || ! -e $conf;

my $canBuildLocally = $amWilling && ($localSystem eq $neededSystem);


# Otherwise find a willing remote machine.
my @machines;
my %curJobs;


# Read the list of machines.
open CONF, "< $conf" or die;

while (<CONF>) {
    chomp;
    s/\#.*$//g;
    next if /^\s*$/;
    /^\s*(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\s*$/ or die;
    push @machines,
        { hostName => $1
        , systemType => $2
        , sshKeys => $3
        , maxJobs => $4
        };
}

close CONF;


# Acquire the exclusive lock on $currentLoad/main-lock.
my $mainLock = "$currentLoad/main-lock";
open MAINLOCK, ">>$mainLock" or die;
flock(MAINLOCK, LOCK_EX) or die;


# Find a suitable system.
my $rightType = 0;
my $machine;
LOOP: foreach my $cur (@machines) {
    if ($neededSystem eq $cur->{systemType}) {
        $rightType = 1;

        # We have a machine of the right type.  Try to get a lock on
        # one of the machine's lock files.
        my $slot = 0;
        while ($slot < $cur->{maxJobs} || ($mustRun && !$canBuildLocally)) {
            my $slotLock = "$currentLoad/" . $cur->{systemType} . "-" . $cur->{hostName} . "-$slot";
            open SLOTLOCK, ">>$slotLock" or die;
            if (flock(SLOTLOCK, LOCK_EX | LOCK_NB)) {
                print STDERR "warning: exceeding maximum load on " . $cur->{systemType} . "\n"
                    if $slot >= $cur->{maxJobs};
                $machine = $cur;
                last LOOP;
            }
            close SLOTLOCK;
            $slot++;
        }
    }
}

close MAINLOCK;


# Didn't find one?  Then decline or postpone.
if (!defined $machine) {
    # Postpone if we have a machine of the right type, except if the
    # local system can and wants to do the build.
    if ($rightType && !$canBuildLocally) {
        sendReply "postpone";
        exit 0;
    } else {
        decline;
    }
}

# Yes we did, accept.
sendReply "accept";
open IN, "<&4" or die;
my $x = <IN>;
chomp $x;
#print "got $x\n";  
close IN;

if ($x ne "okay") {
    exit 0;
}


# Do the actual job.
my $hostName = $machine->{hostName};
print "BUILDING REMOTE: $drvPath on $hostName\n";

# Make sure that we don't get any SSH passphrase or host key popups -
# if there is any problem it should fail, not do something
# interactive.
$ENV{"DISPLAY"} = "";
$ENV{"SSH_PASSWORD_FILE="} = "";
$ENV{"SSH_ASKPASS="} = "";

my $sshOpts = "-i " . $machine->{sshKeys} . " -x";

# Hack to support Cygwin: if we login without a password, we don't
# have exactly the same rights as when we do.  This causes the
# Microsoft C compiler to fail with certain flags:
#
#   http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=99676
#
# So as a workaround, we pass a verbatim password.  ssh tries to makes
# this very hard; the trick is to make it call SSH_ASKPASS to get the
# password.  (It only calls this command when there is no controlling
# terminal, but Nix ensures that is is the case.  When doing this
# manually, use setsid(1).)
if ($machine->{sshKeys} =~ /^password:/) {
    my $passwordFile = $machine->{sshKeys};
    $passwordFile =~ s/^password://;
    $sshOpts = "ssh -x";
    $ENV{"SSH_PASSWORD_FILE"} = $passwordFile;
    $ENV{"SSH_ASKPASS"} = "/tmp/writepass";

    open WRITEPASS, ">/tmp/writepass" or die;
    print WRITEPASS "#! /bin/sh\ncat \"\$SSH_PASSWORD_FILE\"";
    close WRITEPASS;
    chmod 0755, "/tmp/writepass" or die;
}

my $inputs = `cat inputs`; die if ($? != 0);
$inputs =~ s/\n/ /g;

my $outputs = `cat outputs`; die if ($? != 0);
$outputs =~ s/\n/ /g;

print "COPYING INPUTS...\n";

my $maybeSign = "";
$maybeSign = "--sign" if -e "/nix/etc/nix/signing-key.sec";

system("NIX_SSHOPTS=\"$sshOpts\" nix-copy-closure $hostName $maybeSign $drvPath $inputs") == 0
    or die "cannot copy inputs to $hostName: $?";

print "BUILDING...\n";

my $buildFlags = "--max-silent-time $maxSilentTime";

# `-tt' forces allocation of a pseudo-terminal.  This is required to
# make the remote nix-store process receive a signal when the
# connection dies.  Without it, the remote process might continue to
# run indefinitely (that is, until it next tries to write to
# stdout/stderr).
if (system("ssh -tt $sshOpts $hostName 'nix-store --realise -K $buildFlags $drvPath > /dev/null'") != 0) {
    # If we couldn't run ssh or there was an ssh problem (indicated by
    # exit code 255), then we return exit code 1; otherwise we assume
    # that the builder failed, which we indicated to Nix using exit
    # code 100.  It's important to distinguish between the two because
    # the first is a transient failure and the latter is permanent.
    my $res = $? == -1 || ($? >> 8) == 255 ? 1 : 100;
    print STDERR "remote build on $hostName failed: $?";
    exit $res;
}

print "REMOTE BUILD DONE: $drvPath on $hostName\n";

foreach my $output (split '\n', $outputs) {
    my $maybeSignRemote = "";
    $maybeSignRemote = "--sign" if $UID != 0;
    
    system("ssh $sshOpts $hostName 'nix-store --export $maybeSignRemote $output' | @bindir@/nix-store --import > /dev/null") == 0
	or die "cannot copy $output from $hostName: $?";
}