about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorEelco Dolstra <e.dolstra@tudelft.nl>2006-12-08T15·44+0000
committerEelco Dolstra <e.dolstra@tudelft.nl>2006-12-08T15·44+0000
commit9dbfe242e3bdbfc7728a36c8a2b9fbbea2c8ed68 (patch)
treedc27874c617eabfa591ac32eca387de7e82835ae /src
parentd3fe6ab024df7764f4de2a9dcf88e2daa981f786 (diff)
* Kill a build if it has gone for more than a certain number of
  seconds without producing output on stdout or stderr (NIX-65).  This
  timeout can be specified using the `--max-silent-time' option or the
  `build-max-silent-time' configuration setting.  The default is
  infinity (0).

* Fix a tricky race condition: if we kill the build user before the
  child has done its setuid() to the build user uid, then it won't be
  killed, and we'll potentially lock up in pid.wait().  So also send a
  conventional kill to the child.

Diffstat (limited to 'src')
-rw-r--r--src/libmain/shared.cc32
-rw-r--r--src/libstore/build.cc50
-rw-r--r--src/libstore/globals.cc10
-rw-r--r--src/libstore/globals.hh7
4 files changed, 79 insertions, 20 deletions
diff --git a/src/libmain/shared.cc b/src/libmain/shared.cc
index d7fb240192..f1a7db40dc 100644
--- a/src/libmain/shared.cc
+++ b/src/libmain/shared.cc
@@ -57,6 +57,18 @@ static void setLogType(string lt)
 }
 
 
+static unsigned int getIntArg(const string & opt,
+    Strings::iterator & i, const Strings::iterator & end)
+{
+    ++i;
+    if (i == end) throw UsageError(format("`%1%' requires an argument") % opt);
+    int n;
+    if (!string2Int(*i, n) || n < 0)
+        throw UsageError(format("`%1%' requires a non-negative integer") % opt);
+    return n;
+}
+
+
 struct RemoveTempRoots 
 {
     ~RemoveTempRoots()
@@ -91,12 +103,8 @@ static void initAndRun(int argc, char * * argv)
 
     /* Get some settings from the configuration file. */
     thisSystem = querySetting("system", SYSTEM);
-    {
-        int n;
-        if (!string2Int(querySetting("build-max-jobs", "1"), n) || n < 0)
-            throw Error("invalid value for configuration setting `build-max-jobs'");
-        maxBuildJobs = n;
-    }
+    maxBuildJobs = queryIntSetting("build-max-jobs", 1);
+    maxSilentTime = queryIntSetting("build-max-silent-time", 0);
 
     /* Catch SIGINT. */
     struct sigaction act, oact;
@@ -180,16 +188,12 @@ static void initAndRun(int argc, char * * argv)
             keepGoing = true;
         else if (arg == "--fallback")
             tryFallback = true;
-        else if (arg == "--max-jobs" || arg == "-j") {
-            ++i;
-            if (i == args.end()) throw UsageError("`--max-jobs' requires an argument");
-            int n;
-            if (!string2Int(*i, n) || n < 0)
-                throw UsageError(format("`--max-jobs' requires a non-negative integer"));
-            maxBuildJobs = n;
-        }
+        else if (arg == "--max-jobs" || arg == "-j")
+            maxBuildJobs = getIntArg(arg, i, args.end());
         else if (arg == "--readonly-mode")
             readOnlyMode = true;
+        else if (arg == "--max-silent-time")
+            maxSilentTime = getIntArg(arg, i, args.end());
         else remaining.push_back(arg);
     }
 
diff --git a/src/libstore/build.cc b/src/libstore/build.cc
index 033cc43d9e..cff114a182 100644
--- a/src/libstore/build.cc
+++ b/src/libstore/build.cc
@@ -13,6 +13,7 @@
 #include <boost/weak_ptr.hpp>
 #include <boost/enable_shared_from_this.hpp>
 
+#include <time.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -135,6 +136,7 @@ struct Child
     WeakGoalPtr goal;
     set<int> fds;
     bool inBuildSlot;
+    time_t lastOutput; /* time we last got output on stdout/stderr */
 };
 
 typedef map<pid_t, Child> Children;
@@ -660,9 +662,18 @@ DerivationGoal::~DerivationGoal()
             worker.childTerminated(pid);
 
             if (buildUser.enabled()) {
-                /* Can't let pid's destructor do it, since it may not
-                   have the appropriate privilege (i.e., the setuid
-                   helper should do it). */
+                /* Note that we can't let pid's destructor kill the
+                   the child process, since it may not have the
+                   appropriate privilege (i.e., the setuid helper
+                   should do it).
+
+                   However, if we're using a build user, then there is
+                   a tricky race condition: if we kill the build user
+                   before the child has done its setuid() to the build
+                   user uid, then it won't be killed, and we'll
+                   potentially lock up in pid.wait().  So also send a
+                   conventional kill to the child. */
+                ::kill(-pid, SIGKILL); /* ignore the result */
                 buildUser.kill();
                 pid.wait(true);
                 assert(pid == -1);
@@ -2156,6 +2167,7 @@ void Worker::childStarted(GoalPtr goal,
     Child child;
     child.goal = goal;
     child.fds = fds;
+    child.lastOutput = time(0);
     child.inBuildSlot = inBuildSlot;
     children[pid] = child;
     if (inBuildSlot) nrChildren++;
@@ -2255,6 +2267,24 @@ void Worker::waitForInput()
        the logger pipe of a build, we assume that the builder has
        terminated. */
 
+    /* If we're monitoring for silence on stdout/stderr, sleep until
+       the first deadline for any child. */
+    struct timeval timeout;
+    if (maxSilentTime != 0) {
+        time_t oldest = 0;
+        for (Children::iterator i = children.begin();
+             i != children.end(); ++i)
+        {
+            oldest = oldest == 0 || i->second.lastOutput < oldest
+                ? i->second.lastOutput : oldest;
+        }
+        time_t now = time(0);
+        timeout.tv_sec = (time_t) (oldest + maxSilentTime) <= now ? 0 :
+            oldest + maxSilentTime - now;
+        timeout.tv_usec = 0;
+        printMsg(lvlVomit, format("sleeping %1% seconds") % timeout.tv_sec);
+    }
+
     /* Use select() to wait for the input side of any logger pipe to
        become `available'.  Note that `available' (i.e., non-blocking)
        includes EOF. */
@@ -2272,11 +2302,13 @@ void Worker::waitForInput()
         }
     }
 
-    if (select(fdMax, &fds, 0, 0, 0) == -1) {
+    if (select(fdMax, &fds, 0, 0, maxSilentTime != 0 ? &timeout : 0) == -1) {
         if (errno == EINTR) return;
         throw SysError("waiting for input");
     }
 
+    time_t now = time(0);
+
     /* Process all available file descriptors. */
     for (Children::iterator i = children.begin();
          i != children.end(); ++i)
@@ -2284,9 +2316,9 @@ void Worker::waitForInput()
         checkInterrupt();
         GoalPtr goal = i->second.goal.lock();
         assert(goal);
+        
         set<int> fds2(i->second.fds);
-        for (set<int>::iterator j = fds2.begin(); j != fds2.end(); ++j)
-        {
+        for (set<int>::iterator j = fds2.begin(); j != fds2.end(); ++j) {
             if (FD_ISSET(*j, &fds)) {
                 unsigned char buffer[4096];
                 ssize_t rd = read(*j, buffer, sizeof(buffer));
@@ -2303,9 +2335,15 @@ void Worker::waitForInput()
                         % goal->getName() % rd);
                     string data((char *) buffer, rd);
                     goal->handleChildOutput(*j, data);
+                    i->second.lastOutput = now;
                 }
             }
         }
+
+        if (maxSilentTime != 0 &&
+            now - i->second.lastOutput >= (time_t) maxSilentTime)
+            throw Error(format("%1% timed out after %2% seconds of silence")
+                % goal->getName() % maxSilentTime);
     }
 }
 
diff --git a/src/libstore/globals.cc b/src/libstore/globals.cc
index e8c033db2e..b0316f77c2 100644
--- a/src/libstore/globals.cc
+++ b/src/libstore/globals.cc
@@ -24,6 +24,7 @@ Verbosity buildVerbosity = lvlInfo;
 unsigned int maxBuildJobs = 1;
 bool readOnlyMode = false;
 string thisSystem = "unset";
+unsigned int maxSilentTime = 0;
 
 
 static bool settingsRead = false;
@@ -104,5 +105,14 @@ bool queryBoolSetting(const string & name, bool def)
         % name % v);
 }
 
+
+unsigned int queryIntSetting(const string & name, unsigned int def)
+{
+    int n;
+    if (!string2Int(querySetting(name, int2String(def)), n) || n < 0)
+        throw Error(format("configuration setting `%1%' should have an integer value") % name);
+    return n;
+}
+
  
 }
diff --git a/src/libstore/globals.hh b/src/libstore/globals.hh
index fbb9e19d6a..51fa685947 100644
--- a/src/libstore/globals.hh
+++ b/src/libstore/globals.hh
@@ -62,6 +62,11 @@ extern bool readOnlyMode;
 /* The canonical system name, as returned by config.guess. */ 
 extern string thisSystem;
 
+/* The maximum time in seconds that a builer can go without producing
+   any output on stdout/stderr before it is killed.  0 means
+   infinity. */
+extern unsigned int maxSilentTime;
+
 
 Strings querySetting(const string & name, const Strings & def);
 
@@ -69,6 +74,8 @@ string querySetting(const string & name, const string & def);
 
 bool queryBoolSetting(const string & name, bool def);
 
+unsigned int queryIntSetting(const string & name, unsigned int def);
+
     
 }