From a8306cb98ff920b38db5e5bbfcbe71bb36859b3f Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 13 May 2004 19:14:49 +0000 Subject: * The build hooks used to implement distributed builds can now be run in parallel. Hooks are more efficient: locks on output paths are only acquired when the hook says that it is willing to accept a build job. Hooks now work in two phases. First, they should first tell Nix whether they are willing to accept a job. Nix guarantuees that no two hooks will ever be in the first phase at the same time (this simplifies the implementation of hooks, since they don't have to perform locking (?)). Second, if they accept a job, they are then responsible for building it (on the remote system), and copying the result back. These can be run in parallel with other hooks and locally executed jobs. The implementation is a bit messy right now, though. * The directory `distributed' shows a (hacky) example of a hook that distributes build jobs over a set of machines listed in a configuration file. --- distributed/build-remote.pl | 119 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100755 distributed/build-remote.pl (limited to 'distributed/build-remote.pl') diff --git a/distributed/build-remote.pl b/distributed/build-remote.pl new file mode 100755 index 000000000000..f1a9b10f4d60 --- /dev/null +++ b/distributed/build-remote.pl @@ -0,0 +1,119 @@ +#! /usr/bin/perl -w + +use strict; + +my $amWilling = shift @ARGV; +my $localSystem = shift @ARGV; +my $neededSystem = shift @ARGV; +my $storeExpr = shift @ARGV; + +# Decline if the local system can do the build. +if ($amWilling && ($localSystem eq $neededSystem)) { + print "decline\n"; + exit 0; +} + +# Otherwise find a willing remote machine. +my %machines; +my %systemTypes; +my %sshKeys; +my %maxJobs; +my %curJobs; + +# Read the list of machines. +open CONF, "< /home/eelco/nix/distributed/remote-systems.conf" or die; + +while () { + chomp; + next if /^\s*$/; + /^\s*(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\s*$/ or die; + $machines{$1} = ""; + $systemTypes{$1} = $2; + $sshKeys{$1} = $3; + $maxJobs{$1} = $4; +} + +close CONF; + +# Read the current load status. +open LOAD, "< /home/eelco/nix/distributed/current-load" or die; +while () { + chomp; + next if /^\s*$/; + /^\s*(\S+)\s+(\d+)\s*$/ or die; + $curJobs{$1} = $2; +} +close LOAD; + +sub sendReply { + my $reply = shift; + open OUT, ">&3" or die; + print OUT "$reply\n"; + close OUT; +} + +# Find a suitable system. +my $rightType = 0; +my $machine; +foreach my $cur (keys %machines) { + if ($neededSystem eq $systemTypes{$cur}) { + $rightType = 1; + if (!defined $curJobs{$cur} or + ($curJobs{$cur} < $maxJobs{$cur})) + { + $machine = $cur; + last; + } + } +} + +if (!defined $machine) { + if ($rightType) { + sendReply "postpone"; + exit 0; + } else { + sendReply "decline"; + exit 0; + } +} + +sendReply "accept"; +open IN, "<&4" or die; +my $x = ; +chomp $x; +print "got $x\n"; +close IN; + +print "BUILDING REMOTE: $storeExpr on $machine\n"; + +my $ssh = "ssh -i $sshKeys{$machine} -x"; + +my $inputs = `cat inputs`; +$inputs =~ s/\n/ /g; + +my $outputs = `cat outputs`; +$outputs =~ s/\n/ /g; + +my $successors = `cat successors`; +$successors =~ s/\n/ /g; + +system "rsync -a -e '$ssh' $storeExpr $inputs $machine:/nix/store"; +die "cannot rsync inputs to $machine" if ($? != 0); + +system "$ssh $machine /nix/bin/nix-store --validpath $storeExpr $inputs"; +die "cannot set valid paths on $machine" if ($? != 0); + +system "$ssh $machine /nix/bin/nix-store --successor $successors"; +die "cannot set successors on $machine" if ($? != 0); + +print "BUILDING...\n"; + +system "$ssh $machine /nix/bin/nix-store -qnfvvvv $storeExpr"; +die "remote build on $machine failed" if ($? != 0); + +print "REMOTE BUILD DONE\n"; + +foreach my $output (split '\n', $outputs) { + system "rsync -a -e '$ssh' $machine:$output /nix/store"; + die "cannot rsync outputs from $machine" if ($? != 0); +} -- cgit 1.4.1