about summary refs log tree commit diff
path: root/src/libstore
diff options
context:
space:
mode:
authorEelco Dolstra <e.dolstra@tudelft.nl>2007-08-28T11·36+0000
committerEelco Dolstra <e.dolstra@tudelft.nl>2007-08-28T11·36+0000
commitc970b28ba0f8866bde800849120d429d781ccb5d (patch)
tree2df160bfab85deeac18a34494aa6e65482c88f83 /src/libstore
parentbc0429b1cd94289ac8d8a51f562b920999002b89 (diff)
* Fix a race condition with parallel builds where multiple
  fixed-output derivations or substitutions try to build the same
  store path at the same time.  Locking generally catches this, but
  not between multiple goals in the same process.  This happened
  especially often (actually, only) in the build farm with fetchurl
  downloads of the same file being executed on multiple machines and
  then copied back to the main machine where they would clobber each
  other (NIXBF-13).

  Solution: if a goal notices that the output path is already locked,
  then go to sleep until another goal finishes (hopefully the one
  locking the path) and try again.

Diffstat (limited to 'src/libstore')
-rw-r--r--src/libstore/build.cc96
-rw-r--r--src/libstore/pathlocks.cc7
-rw-r--r--src/libstore/pathlocks.hh3
3 files changed, 96 insertions, 10 deletions
diff --git a/src/libstore/build.cc b/src/libstore/build.cc
index 4a2affdf59b3..5ad81050060c 100644
--- a/src/libstore/build.cc
+++ b/src/libstore/build.cc
@@ -181,6 +181,9 @@ private:
     WeakGoalMap derivationGoals;
     WeakGoalMap substitutionGoals;
 
+    /* Goals waiting for busy paths to be unlocked. */
+    WeakGoals waitingForAnyGoal;
+    
 public:
 
     Worker();
@@ -222,6 +225,10 @@ public:
        get info about `storePath' (with --query-info).  We combine
        substituter invocations to reduce overhead. */
     void waitForInfo(GoalPtr goal, Path sub, Path storePath);
+
+    /* Wait for any goal to finish.  Pretty indiscriminate way to
+       wait for some resource that some other goal is holding. */
+    void waitForAnyGoal(GoalPtr goal);
     
     /* Loop until the specified top-level goals have finished. */
     void run(const Goals & topGoals);
@@ -642,7 +649,7 @@ private:
     void buildDone();
 
     /* Is the build hook willing to perform the build? */
-    typedef enum {rpAccept, rpDecline, rpPostpone, rpDone} HookReply;
+    typedef enum {rpAccept, rpDecline, rpPostpone, rpDone, rpRestart} HookReply;
     HookReply tryBuildHook();
 
     /* Synchronously wait for a build hook to finish. */
@@ -651,9 +658,13 @@ private:
     /* Acquires locks on the output paths and gathers information
        about the build (e.g., the input closures).  During this
        process its possible that we find out that the build is
-       unnecessary, in which case we return false (this is not an
-       error condition!). */
-    bool prepareBuild();
+       unnecessary, in which case we return prDone.  It's also
+       possible that some other goal is already building/substituting
+       the output paths, in which case we return prRestart (go back to
+       the haveDerivation() state).  Otherwise, prProceed is
+       returned. */
+    typedef enum {prProceed, prDone, prRestart} PrepareBuildReply;
+    PrepareBuildReply prepareBuild();
 
     /* Start building a derivation. */
     void startBuilder();
@@ -791,6 +802,20 @@ void DerivationGoal::haveDerivation()
         return;
     }
 
+    /* If this is a fixed-output derivation, it is possible that some
+       other goal is already building the output paths.  (The case
+       where some other process is building it is handled through
+       normal locking mechanisms.)  So if any output paths are already
+       being built, put this goal to sleep. */
+    for (PathSet::iterator i = invalidOutputs.begin();
+         i != invalidOutputs.end(); ++i)
+        if (pathIsLockedByMe(*i)) {
+            /* Wait until any goal finishes (hopefully the one that is
+               locking *i), then retry haveDerivation(). */
+            worker.waitForAnyGoal(shared_from_this());
+            return;
+        }
+
     /* We are first going to try to create the invalid output paths
        through substitutes.  If that doesn't work, we'll build
        them. */
@@ -884,6 +909,12 @@ void DerivationGoal::tryToBuild()
                 /* Somebody else did it. */
                 amDone(ecSuccess);
                 return;
+            case rpRestart:
+                /* Somebody else is building this output path.
+                   Restart from haveDerivation(). */
+                state = &DerivationGoal::haveDerivation;
+                worker.waitForAnyGoal(shared_from_this());
+                return;
         }
 
         /* Make sure that we are allowed to start a build. */
@@ -894,9 +925,14 @@ void DerivationGoal::tryToBuild()
 
         /* Acquire locks and such.  If we then see that the build has
            been done by somebody else, we're done. */
-        if (!prepareBuild()) {
+        PrepareBuildReply preply = prepareBuild();
+        if (preply == prDone) {
             amDone(ecSuccess);
             return;
+        } else if (preply == prRestart) {
+            state = &DerivationGoal::haveDerivation;
+            worker.waitForAnyGoal(shared_from_this());
+            return;
         }
 
         /* Okay, we have to build. */
@@ -1176,11 +1212,12 @@ DerivationGoal::HookReply DerivationGoal::tryBuildHook()
 
         /* Acquire locks and such.  If we then see that the output
            paths are now valid, we're done. */
-        if (!prepareBuild()) {
+        PrepareBuildReply preply = prepareBuild();
+        if (preply == prDone || preply == prRestart) {
             /* Tell the hook to exit. */
             writeLine(toHook.writeSide, "cancel");
             terminateBuildHook();
-            return rpDone;
+            return preply == prDone ? rpDone : rpRestart;
         }
 
         printMsg(lvlInfo, format("running hook to build path(s) %1%")
@@ -1256,8 +1293,20 @@ void DerivationGoal::terminateBuildHook(bool kill)
 }
 
 
-bool DerivationGoal::prepareBuild()
+DerivationGoal::PrepareBuildReply DerivationGoal::prepareBuild()
 {
+    /* Check for the possibility that some other goal in this process
+       has locked the output since we checked in haveDerivation().
+       (It can't happen between here and the lockPaths() call below
+       because we're not allowing multi-threading.) */
+    for (DerivationOutputs::iterator i = drv.outputs.begin(); 
+         i != drv.outputs.end(); ++i)
+        if (pathIsLockedByMe(i->second.path)) {
+            debug(format("restarting derivation `%1%' because `%2%' is locked by another goal")
+                % drvPath % i->second.path);
+            return prRestart;
+        }
+    
     /* Obtain locks on all output paths.  The locks are automatically
        released when we exit this function or Nix crashes. */
     /* !!! BUG: this could block, which is not allowed. */
@@ -1276,7 +1325,7 @@ bool DerivationGoal::prepareBuild()
         debug(format("skipping build of derivation `%1%', someone beat us to it")
             % drvPath);
         outputLocks.setDeletion(true);
-        return false;
+        return prDone;
     }
 
     if (validPaths.size() > 0) {
@@ -1341,7 +1390,7 @@ bool DerivationGoal::prepareBuild()
 
     allPaths.insert(inputPaths.begin(), inputPaths.end());
 
-    return true;
+    return prProceed;
 }
 
 
@@ -2028,6 +2077,16 @@ void SubstitutionGoal::tryToRun()
         return;
     }
 
+    /* Maybe a derivation goal has already locked this path
+       (exceedingly unlikely, since it should have used a substitute
+       first, but let's be defensive). */
+    if (pathIsLockedByMe(storePath)) {
+        debug(format("restarting substitution of `%1%' because it's locked by another goal")
+            % storePath);
+        worker.waitForAnyGoal(shared_from_this());
+        return; /* restart in the tryToRun() state when another goal finishes */
+    }
+    
     /* Acquire a lock on the output path. */
     outputLock = boost::shared_ptr<PathLocks>(new PathLocks);
     outputLock->lockPaths(singleton<PathSet>(storePath),
@@ -2251,6 +2310,16 @@ void Worker::removeGoal(GoalPtr goal)
         if (goal->getExitCode() == Goal::ecFailed && !keepGoing)
             topGoals.clear();
     }
+
+    /* Wake up goals waiting for any goal to finish. */
+    for (WeakGoals::iterator i = waitingForAnyGoal.begin();
+         i != waitingForAnyGoal.end(); ++i)
+    {
+        GoalPtr goal = i->lock();
+        if (goal) wakeUp(goal);
+    }
+
+    waitingForAnyGoal.clear();
 }
 
 
@@ -2337,6 +2406,13 @@ void Worker::waitForInfo(GoalPtr goal, Path sub, Path storePath)
 }
 
 
+void Worker::waitForAnyGoal(GoalPtr goal)
+{
+    debug("wait for any goal");
+    waitingForAnyGoal.insert(goal);
+}
+
+
 void Worker::getInfo()
 {
     for (map<Path, PathSet>::iterator i = requestedInfo.begin();
diff --git a/src/libstore/pathlocks.cc b/src/libstore/pathlocks.cc
index 821d4d02feb0..9d582206dbb9 100644
--- a/src/libstore/pathlocks.cc
+++ b/src/libstore/pathlocks.cc
@@ -223,5 +223,12 @@ void PathLocks::setDeletion(bool deletePaths)
     this->deletePaths = deletePaths;
 }
 
+
+bool pathIsLockedByMe(const Path & path)
+{
+    Path lockPath = path + ".lock";
+    return lockedPaths.find(lockPath) != lockedPaths.end();
+}
+
  
 }
diff --git a/src/libstore/pathlocks.hh b/src/libstore/pathlocks.hh
index 87bb7bf3ef71..0ca1ce687849 100644
--- a/src/libstore/pathlocks.hh
+++ b/src/libstore/pathlocks.hh
@@ -40,6 +40,9 @@ public:
 };
 
 
+bool pathIsLockedByMe(const Path & path);
+
+
 }