about summary refs log tree commit diff
path: root/src/libstore/build.cc
diff options
context:
space:
mode:
authorEelco Dolstra <eelco.dolstra@logicblox.com>2014-08-21T12·08+0200
committerEelco Dolstra <eelco.dolstra@logicblox.com>2014-08-21T12·08+0200
commit524f89f1399724e596f61faba2c6861b1bb7b9c5 (patch)
treee652815f56cf7cf93dec282f1ec319511d5476a0 /src/libstore/build.cc
parentfefd3650d4aa0c378da2ae1f1cdda772d5afaf13 (diff)
Use unshare() instead of clone()
It turns out that using clone() to start a child process is unsafe in
a multithreaded program. It can cause the initialisation of a build
child process to hang in setgroups(), as seen several times in the
build farm:

The reason is that Glibc thinks that the other threads of the parent
exist in the child, so in setxid_mark_thread() it tries to get a futex
that has been acquired by another thread just before the clone(). With
fork(), Glibc runs pthread_atfork() handlers that take care of this
(in particular, __reclaim_stacks()). But clone() doesn't do that.

Fortunately, we can use fork()+unshare() instead of clone() to set up
private namespaces.

See also https://www.mail-archive.com/lxc-devel@lists.linuxcontainers.org/msg03434.html.
Diffstat (limited to 'src/libstore/build.cc')
-rw-r--r--src/libstore/build.cc90
1 files changed, 35 insertions, 55 deletions
diff --git a/src/libstore/build.cc b/src/libstore/build.cc
index 856e5f8200d6..b64c32d103db 100644
--- a/src/libstore/build.cc
+++ b/src/libstore/build.cc
@@ -1575,13 +1575,6 @@ void chmod_(const Path & path, mode_t mode)
 }
 
 
-int childEntry(void * arg)
-{
-    ((DerivationGoal *) arg)->initChild();
-    return 1;
-}
-
-
 void DerivationGoal::startBuilder()
 {
     startNest(nest, lvlInfo, format(
@@ -1897,48 +1890,10 @@ void DerivationGoal::startBuilder()
     /* Create a pipe to get the output of the builder. */
     builderOut.create();
 
-    /* Fork a child to build the package.  Note that while we
-       currently use forks to run and wait for the children, it
-       shouldn't be hard to use threads for this on systems where
-       fork() is unavailable or inefficient.
-
-       If we're building in a chroot, then also set up private
-       namespaces for the build:
-
-       - The PID namespace causes the build to start as PID 1.
-         Processes outside of the chroot are not visible to those on
-         the inside, but processes inside the chroot are visible from
-         the outside (though with different PIDs).
-
-       - The private mount namespace ensures that all the bind mounts
-         we do will only show up in this process and its children, and
-         will disappear automatically when we're done.
-
-       - The private network namespace ensures that the builder cannot
-         talk to the outside world (or vice versa).  It only has a
-         private loopback interface.
-
-       - The IPC namespace prevents the builder from communicating
-         with outside processes using SysV IPC mechanisms (shared
-         memory, message queues, semaphores).  It also ensures that
-         all IPC objects are destroyed when the builder exits.
-
-       - The UTS namespace ensures that builders see a hostname of
-         localhost rather than the actual hostname.
-    */
-#if CHROOT_ENABLED
-    if (useChroot) {
-        char stack[32 * 1024];
-        pid = clone(childEntry, stack + sizeof(stack) - 8,
-            CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | SIGCHLD, this);
-    } else
-#endif
-    {
-        pid = fork();
-        if (pid == 0) initChild();
-    }
-
-    if (pid == -1) throw SysError("unable to fork");
+    /* Fork a child to build the package. */
+    pid = startProcess([&]() {
+        initChild();
+    });
 
     /* parent */
     pid.setSeparatePG(true);
@@ -1965,14 +1920,41 @@ void DerivationGoal::initChild()
 
     try { /* child */
 
-        _writeToStderr = 0;
-
-        restoreAffinity();
-
         commonChildInit(builderOut);
 
 #if CHROOT_ENABLED
         if (useChroot) {
+
+            /* Set up private namespaces for the build:
+
+               - The PID namespace causes the build to start as PID 1.
+                 Processes outside of the chroot are not visible to
+                 those on the inside, but processes inside the chroot
+                 are visible from the outside (though with different
+                 PIDs).
+
+               - The private mount namespace ensures that all the bind
+                 mounts we do will only show up in this process and
+                 its children, and will disappear automatically when
+                 we're done.
+
+               - The private network namespace ensures that the
+                 builder cannot talk to the outside world (or vice
+                 versa).  It only has a private loopback interface.
+
+               - The IPC namespace prevents the builder from
+                 communicating with outside processes using SysV IPC
+                 mechanisms (shared memory, message queues,
+                 semaphores).  It also ensures that all IPC objects
+                 are destroyed when the builder exits.
+
+               - The UTS namespace ensures that builders see a
+                 hostname of localhost rather than the actual
+                 hostname.
+            */
+            if (unshare(CLONE_NEWNS | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS) == -1)
+                throw SysError("setting up private namespaces");
+
             /* Initialise the loopback interface. */
             AutoCloseFD fd(socket(PF_INET, SOCK_DGRAM, IPPROTO_IP));
             if (fd == -1) throw SysError("cannot open IP socket");
@@ -2169,8 +2151,6 @@ void DerivationGoal::initChild()
         writeToStderr("while setting up the build environment: " + string(e.what()) + "\n");
         _exit(1);
     }
-
-    abort(); /* never reached */
 }