about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEelco Dolstra <e.dolstra@tudelft.nl>2003-10-14T15·33+0000
committerEelco Dolstra <e.dolstra@tudelft.nl>2003-10-14T15·33+0000
commitc190f051ac34b2df51402bf593150de97f491d86 (patch)
treeb08f3b05b2f39e1d1c48ce833711516671dc568a
parent1d61e473c88568fae7ef5edebc77acd53e4126f9 (diff)
* Automatically recover the database in case of a crash.
-rw-r--r--src/db.cc181
-rw-r--r--src/db.hh4
-rw-r--r--src/fix.cc4
-rw-r--r--src/nix.cc4
-rw-r--r--src/pathlocks.cc41
-rw-r--r--src/pathlocks.hh5
6 files changed, 201 insertions, 38 deletions
diff --git a/src/db.cc b/src/db.cc
index ffb1999fe56b..f9d618b3b33f 100644
--- a/src/db.cc
+++ b/src/db.cc
@@ -1,8 +1,13 @@
-#include "db.hh"
-#include "util.hh"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 
 #include <memory>
 
+#include "db.hh"
+#include "util.hh"
+#include "pathlocks.hh"
+
 
 /* Wrapper class to ensure proper destruction. */
 class DestroyDbc 
@@ -89,51 +94,179 @@ Database::Database()
 
 Database::~Database()
 {
-    if (env) {
-        debug(format("closing database environment"));
+    close();
+}
 
-        try {
 
-            for (map<TableId, Db *>::iterator i = tables.begin();
-                 i != tables.end(); i++)
-            {
-                debug(format("closing table %1%") % i->first);
-                Db * db = i->second;
-                db->close(0);
-                delete db;
-            }
+int getAccessorCount(int fd)
+{
+    if (lseek(fd, 0, SEEK_SET) == -1)
+        throw SysError("seeking accessor count");
+    char buf[128];
+    int len;
+    if ((len = read(fd, buf, sizeof(buf) - 1)) == -1)
+        throw SysError("reading accessor count");
+    buf[len] = 0;
+    int count;
+    if (sscanf(buf, "%d", &count) != 1) {
+        debug(format("accessor count is invalid: `%1%'") % buf);
+        return -1;
+    }
+    return count;
+}
 
-            env->txn_checkpoint(0, 0, 0);
-            env->close(0);
 
-        } catch (DbException e) { rethrow(e); }
+void setAccessorCount(int fd, int n)
+{
+    if (lseek(fd, 0, SEEK_SET) == -1)
+        throw SysError("seeking accessor count");
+    string s = (format("%1%") % n).str();
+    const char * s2 = s.c_str();
+    if (write(fd, s2, strlen(s2)) != (ssize_t) strlen(s2) ||
+        ftruncate(fd, strlen(s2)) != 0)
+        throw SysError("writing accessor count");
+}
 
-        delete env;
-    }
+
+void openEnv(DbEnv * env, const string & path, u_int32_t flags)
+{
+    env->open(path.c_str(),
+        DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN |
+        DB_CREATE | flags,
+        0666);
 }
 
 
 void Database::open(const string & path)
 {
+    if (env) throw Error(format("environment already open"));
+
     try {
-        
-        if (env) throw Error(format("environment already open"));
 
+        debug(format("opening database environment"));
+
+
+        /* Create the database environment object. */
         env = new DbEnv(0);
 
         env->set_lg_bsize(32 * 1024); /* default */
         env->set_lg_max(256 * 1024); /* must be > 4 * lg_bsize */
         env->set_lk_detect(DB_LOCK_DEFAULT);
         
-        env->open(path.c_str(),
-            DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN |
-            DB_CREATE,
-            0666);
+
+        /* The following code provides automatic recovery of the
+           database environment.  Recovery is necessary when a process
+           dies while it has the database open.  To detect this,
+           processes atomically increment a counter when the open the
+           database, and decrement it when they close it.  If we see
+           that counter is > 0 but no processes are accessing the
+           database---determined by attempting to obtain a write lock
+           on a lock file on which all accessors have a read lock---we
+           must run recovery.  Note that this also ensures that we
+           only run recovery when there are no other accessors (which
+           could cause database corruption). */
+
+        /* !!! close fdAccessors / fdLock on exception */
+
+        /* Open the accessor count file. */
+        string accessorsPath = path + "/accessor_count";
+        fdAccessors = ::open(accessorsPath.c_str(), O_RDWR | O_CREAT, 0666);
+        if (fdAccessors == -1)
+            throw SysError(format("opening file `%1%'") % accessorsPath);
+
+        /* Open the lock file. */
+        string lockPath = path + "/access_lock";
+        fdLock = ::open(lockPath.c_str(), O_RDWR | O_CREAT, 0666);
+        if (fdLock == -1)
+            throw SysError(format("opening lock file `%1%'") % lockPath);
+
+        /* Try to acquire a write lock. */
+        debug(format("attempting write lock on `%1%'") % lockPath);
+        if (lockFile(fdLock, ltWrite, false)) { /* don't wait */
+
+            debug(format("write lock granted"));
+
+            /* We have a write lock, which means that there are no
+               other readers or writers. */
+
+            int n = getAccessorCount(fdAccessors);
+                setAccessorCount(fdAccessors, 1);
+
+            if (n != 0) {
+                msg(lvlTalkative, format("accessor count is %1%, running recovery") % n);
+
+                /* Open the environment after running recovery. */
+                openEnv(env, path, DB_RECOVER);
+            }
+            
+            else 
+                /* Open the environment normally. */
+                openEnv(env, path, 0);
+
+            /* Downgrade to a read lock. */
+            debug(format("downgrading to read lock on `%1%'") % lockPath);
+            lockFile(fdLock, ltRead, true);
+
+        } else {
+            /* There are other accessors. */ 
+            debug(format("write lock refused"));
+
+            /* Acquire a read lock. */
+            debug(format("acquiring read lock on `%1%'") % lockPath);
+            lockFile(fdLock, ltRead, true); /* wait indefinitely */
+
+            /* Increment the accessor count. */
+            lockFile(fdAccessors, ltWrite, true);
+            int n = getAccessorCount(fdAccessors) + 1;
+            setAccessorCount(fdAccessors, n);
+            debug(format("incremented accessor count to %1%") % n);
+            lockFile(fdAccessors, ltNone, true);
+
+            /* Open the environment normally. */
+            openEnv(env, path, 0);
+        }
 
     } catch (DbException e) { rethrow(e); }
 }
 
 
+void Database::close()
+{
+    if (!env) return;
+
+    /* Close the database environment. */
+    debug(format("closing database environment"));
+
+    try {
+
+        for (map<TableId, Db *>::iterator i = tables.begin();
+             i != tables.end(); i++)
+        {
+            debug(format("closing table %1%") % i->first);
+            Db * db = i->second;
+            db->close(0);
+            delete db;
+        }
+
+        env->txn_checkpoint(0, 0, 0);
+        env->close(0);
+
+    } catch (DbException e) { rethrow(e); }
+
+    delete env;
+
+    /* Decrement the accessor count. */
+    lockFile(fdAccessors, ltWrite, true);
+    int n = getAccessorCount(fdAccessors) - 1;
+    setAccessorCount(fdAccessors, n);
+    debug(format("decremented accessor count to %1%") % n);
+    lockFile(fdAccessors, ltNone, true);
+
+    ::close(fdAccessors);
+    ::close(fdLock);
+}
+
+
 TableId Database::openTable(const string & tableName)
 {
     requireEnv();
diff --git a/src/db.hh b/src/db.hh
index 4bac943e554d..e3dc7ce7afe5 100644
--- a/src/db.hh
+++ b/src/db.hh
@@ -45,6 +45,9 @@ class Database
 private:
     DbEnv * env;
 
+    int fdLock;
+    int fdAccessors;
+
     TableId nextId;
     map<TableId, Db *> tables;
 
@@ -57,6 +60,7 @@ public:
     ~Database();
     
     void open(const string & path);
+    void close();
 
     TableId openTable(const string & table);
 
diff --git a/src/fix.cc b/src/fix.cc
index 71fd068775be..cbf759b31763 100644
--- a/src/fix.cc
+++ b/src/fix.cc
@@ -438,8 +438,6 @@ static void printNixExpr(EvalState & state, Expr e)
 
 void run(Strings args)
 {
-    openDB();
-
     EvalState state;
     Strings files;
     bool readStdin = false;
@@ -467,6 +465,8 @@ void run(Strings args)
             files.push_back(arg);
     }
 
+    openDB();
+
     if (readStdin) {
         Expr e = evalStdin(state);
         printNixExpr(state, e);
diff --git a/src/nix.cc b/src/nix.cc
index 9bbbf4ae8c85..1012780af844 100644
--- a/src/nix.cc
+++ b/src/nix.cc
@@ -267,8 +267,6 @@ static void opVerify(Strings opFlags, Strings opArgs)
    list. */
 void run(Strings args)
 {
-    openDB();
-
     Strings opFlags, opArgs;
     Operation op = 0;
 
@@ -315,6 +313,8 @@ void run(Strings args)
 
     if (!op) throw UsageError("no operation specified");
 
+    openDB();
+
     op(opFlags, opArgs);
 }
 
diff --git a/src/pathlocks.cc b/src/pathlocks.cc
index ff0226c84e5c..3ecbbbcbafd5 100644
--- a/src/pathlocks.cc
+++ b/src/pathlocks.cc
@@ -1,10 +1,39 @@
 #include <cerrno>
 
+#include <sys/types.h>
+#include <sys/stat.h>
 #include <fcntl.h>
 
 #include "pathlocks.hh"
 
 
+bool lockFile(int fd, LockType lockType, bool wait)
+{
+    struct flock lock;
+    if (lockType == ltRead) lock.l_type = F_RDLCK;
+    else if (lockType == ltWrite) lock.l_type = F_WRLCK;
+    else if (lockType == ltNone) lock.l_type = F_UNLCK;
+    else abort();
+    lock.l_whence = SEEK_SET;
+    lock.l_start = 0;
+    lock.l_len = 0; /* entire file */
+
+    if (wait) {
+        while (fcntl(fd, F_SETLKW, &lock) != 0)
+            if (errno != EINTR)
+                throw SysError(format("acquiring/releasing lock"));
+    } else {
+        while (fcntl(fd, F_SETLK, &lock) != 0) {
+            if (errno == EACCES || errno == EAGAIN) return false;
+            if (errno != EINTR) 
+                throw SysError(format("acquiring/releasing lock"));
+        }
+    }
+
+    return true;
+}
+
+
 /* This enables us to check whether are not already holding a lock on
    a file ourselves.  POSIX locks (fcntl) suck in this respect: if we
    close a descriptor, the previous lock will be closed as well.  And
@@ -43,16 +72,8 @@ PathLocks::PathLocks(const PathSet & _paths)
         fds.push_back(fd);
         this->paths.push_back(lockPath);
 
-        /* Lock it. */
-        struct flock lock;
-        lock.l_type = F_WRLCK; /* exclusive lock */
-        lock.l_whence = SEEK_SET;
-        lock.l_start = 0;
-        lock.l_len = 0; /* entire file */
-
-        while (fcntl(fd, F_SETLKW, &lock) == -1)
-            if (errno != EINTR)
-                throw SysError(format("acquiring lock on `%1%'") % lockPath);
+        /* Acquire an exclusive lock. */
+        lockFile(fd, ltWrite, true);
 
         lockedPaths.insert(lockPath);
     }
diff --git a/src/pathlocks.hh b/src/pathlocks.hh
index 6c36b9b1d17f..ce61386d6df0 100644
--- a/src/pathlocks.hh
+++ b/src/pathlocks.hh
@@ -4,6 +4,11 @@
 #include "util.hh"
 
 
+typedef enum LockType { ltRead, ltWrite, ltNone };
+
+bool lockFile(int fd, LockType lockType, bool wait);
+
+
 class PathLocks 
 {
 private: