From 019176137f49eba826e08c6b0f0a5c35ecdde81d Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 13 Jan 2009 11:39:09 +0000 Subject: * When using a build hook, distinguish between transient failures (e.g. an SSH connection problem) and permanent failures (i.e. the builder failed). This matters to Hydra (it wants to know whether it makes sense to retry a build). --- scripts/build-remote.pl.in | 12 ++++++++++-- src/libstore/build.cc | 24 ++++++++++++++++++++---- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/scripts/build-remote.pl.in b/scripts/build-remote.pl.in index 930164599a04..53cf5364476b 100755 --- a/scripts/build-remote.pl.in +++ b/scripts/build-remote.pl.in @@ -192,8 +192,16 @@ my $buildFlags = "--max-silent-time $maxSilentTime"; # connection dies. Without it, the remote process might continue to # run indefinitely (that is, until it next tries to write to # stdout/stderr). -system("ssh -tt $sshOpts $hostName 'nix-store -rvvK $buildFlags $drvPath'") == 0 - or die "remote build on $hostName failed: $?"; +if (system("ssh -tt $sshOpts $hostName 'nix-store -rvvK $buildFlags $drvPath'") != 0) { + # If we couldn't run ssh or there was an ssh problem (indicated by + # exit code 255), then we return exit code 1; otherwise we assume + # that the builder failed, which we indicated to Nix using exit + # code 100. It's important to distinguish between the two because + # the first is a transient failure and the latter is permanent. + my $res = $? == -1 || ($? >> 8) == 255 ? 1 : 100; + print STDERR "remote build on $hostName failed: $?"; + exit $res; +} print "REMOTE BUILD DONE: $drvPath on $hostName\n"; diff --git a/src/libstore/build.cc b/src/libstore/build.cc index db2bed6d01f8..aea4ccdc437b 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -648,6 +648,9 @@ private: /* Pipe for the builder's standard output/error. */ Pipe logPipe; + /* Whether we're building using a build hook. */ + bool usingBuildHook; + /* Pipes for talking to the build hook (if any). */ Pipe toHook; Pipe fromHook; @@ -970,6 +973,7 @@ void DerivationGoal::tryToBuild() try { /* Is the build hook willing to accept this job? */ + usingBuildHook = true; switch (tryBuildHook()) { case rpAccept: /* Yes, it has started doing so. Wait until we get @@ -1003,6 +1007,7 @@ void DerivationGoal::tryToBuild() /* Acquire locks and such. If we then see that the build has been done by somebody else, we're done. */ + usingBuildHook = false; PrepareBuildReply preply = prepareBuild(); if (preply == prDone) { amDone(ecSuccess); @@ -1019,8 +1024,12 @@ void DerivationGoal::tryToBuild() } catch (BuildError & e) { printMsg(lvlError, e.msg()); if (printBuildTrace) { - printMsg(lvlError, format("@ build-failed %1% %2% %3% %4%") - % drvPath % drv.outputs["out"].path % 0 % e.msg()); + if (usingBuildHook) + printMsg(lvlError, format("@ hook-failed %1% %2% %3% %4%") + % drvPath % drv.outputs["out"].path % 0 % e.msg()); + else + printMsg(lvlError, format("@ build-failed %1% %2% %3% %4%") + % drvPath % drv.outputs["out"].path % 0 % e.msg()); } amDone(ecFailed); return; @@ -1122,8 +1131,15 @@ void DerivationGoal::buildDone() } catch (BuildError & e) { printMsg(lvlError, e.msg()); if (printBuildTrace) { - printMsg(lvlError, format("@ build-failed %1% %2% %3% %4%") - % drvPath % drv.outputs["out"].path % status % e.msg()); + /* When using a build hook, the hook will return a + remote build failure using exit code 100. Anything + else is a hook problem. */ + if (usingBuildHook && (!WIFEXITED(status) || WEXITSTATUS(status) != 100)) + printMsg(lvlError, format("@ hook-failed %1% %2% %3% %4%") + % drvPath % drv.outputs["out"].path % status % e.msg()); + else + printMsg(lvlError, format("@ build-failed %1% %2% %3% %4%") + % drvPath % drv.outputs["out"].path % status % e.msg()); } amDone(ecFailed); return; -- cgit 1.4.1