diff options
Diffstat (limited to 'src/libnix/db.cc')
-rw-r--r-- | src/libnix/db.cc | 425 |
1 files changed, 425 insertions, 0 deletions
diff --git a/src/libnix/db.cc b/src/libnix/db.cc new file mode 100644 index 000000000000..2f53ca3b5231 --- /dev/null +++ b/src/libnix/db.cc @@ -0,0 +1,425 @@ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include <memory> + +#include "db.hh" +#include "util.hh" +#include "pathlocks.hh" + + +/* Wrapper class to ensure proper destruction. */ +class DestroyDbc +{ + Dbc * dbc; +public: + DestroyDbc(Dbc * _dbc) : dbc(_dbc) { } + ~DestroyDbc() { dbc->close(); /* close() frees dbc */ } +}; + + +static void rethrow(DbException & e) +{ + throw Error(e.what()); +} + + +Transaction::Transaction() + : txn(0) +{ +} + + +Transaction::Transaction(Database & db) +{ + db.requireEnv(); + try { + db.env->txn_begin(0, &txn, 0); + } catch (DbException e) { rethrow(e); } +} + + +Transaction::~Transaction() +{ + if (txn) abort(); +} + + +void Transaction::commit() +{ + if (!txn) throw Error("commit called on null transaction"); + debug(format("committing transaction %1%") % (void *) txn); + DbTxn * txn2 = txn; + txn = 0; + try { + txn2->commit(0); + } catch (DbException e) { rethrow(e); } +} + + +void Transaction::abort() +{ + if (!txn) throw Error("abort called on null transaction"); + debug(format("aborting transaction %1%") % (void *) txn); + DbTxn * txn2 = txn; + txn = 0; + try { + txn2->abort(); + } catch (DbException e) { rethrow(e); } +} + + +void Transaction::moveTo(Transaction & t) +{ + if (t.txn) throw Error("target txn already exists"); + t.txn = txn; + txn = 0; +} + + +void Database::requireEnv() +{ + if (!env) throw Error("database environment not open"); +} + + +Db * Database::getDb(TableId table) +{ + map<TableId, Db *>::iterator i = tables.find(table); + if (i == tables.end()) + throw Error("unknown table id"); + return i->second; +} + + +Database::Database() + : env(0) + , nextId(1) +{ +} + + +Database::~Database() +{ + close(); +} + + +int getAccessorCount(int fd) +{ + if (lseek(fd, 0, SEEK_SET) == -1) + throw SysError("seeking accessor count"); + char buf[128]; + int len; + if ((len = read(fd, buf, sizeof(buf) - 1)) == -1) + throw SysError("reading accessor count"); + buf[len] = 0; + int count; + if (sscanf(buf, "%d", &count) != 1) { + debug(format("accessor count is invalid: `%1%'") % buf); + return -1; + } + return count; +} + + +void setAccessorCount(int fd, int n) +{ + if (lseek(fd, 0, SEEK_SET) == -1) + throw SysError("seeking accessor count"); + string s = (format("%1%") % n).str(); + const char * s2 = s.c_str(); + if (write(fd, s2, strlen(s2)) != (ssize_t) strlen(s2) || + ftruncate(fd, strlen(s2)) != 0) + throw SysError("writing accessor count"); +} + + +void openEnv(DbEnv * env, const string & path, u_int32_t flags) +{ + env->open(path.c_str(), + DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | + DB_CREATE | flags, + 0666); +} + + +void Database::open(const string & path) +{ + if (env) throw Error(format("environment already open")); + + try { + + debug(format("opening database environment")); + + + /* Create the database environment object. */ + env = new DbEnv(0); + + env->set_lg_bsize(32 * 1024); /* default */ + env->set_lg_max(256 * 1024); /* must be > 4 * lg_bsize */ + env->set_lk_detect(DB_LOCK_DEFAULT); + env->set_flags(DB_TXN_WRITE_NOSYNC, 1); + + + /* The following code provides automatic recovery of the + database environment. Recovery is necessary when a process + dies while it has the database open. To detect this, + processes atomically increment a counter when the open the + database, and decrement it when they close it. If we see + that counter is > 0 but no processes are accessing the + database---determined by attempting to obtain a write lock + on a lock file on which all accessors have a read lock---we + must run recovery. Note that this also ensures that we + only run recovery when there are no other accessors (which + could cause database corruption). */ + + /* !!! close fdAccessors / fdLock on exception */ + + /* Open the accessor count file. */ + string accessorsPath = path + "/accessor_count"; + fdAccessors = ::open(accessorsPath.c_str(), O_RDWR | O_CREAT, 0666); + if (fdAccessors == -1) + throw SysError(format("opening file `%1%'") % accessorsPath); + + /* Open the lock file. */ + string lockPath = path + "/access_lock"; + fdLock = ::open(lockPath.c_str(), O_RDWR | O_CREAT, 0666); + if (fdLock == -1) + throw SysError(format("opening lock file `%1%'") % lockPath); + + /* Try to acquire a write lock. */ + debug(format("attempting write lock on `%1%'") % lockPath); + if (lockFile(fdLock, ltWrite, false)) { /* don't wait */ + + debug(format("write lock granted")); + + /* We have a write lock, which means that there are no + other readers or writers. */ + + int n = getAccessorCount(fdAccessors); + setAccessorCount(fdAccessors, 1); + + if (n != 0) { + msg(lvlTalkative, format("accessor count is %1%, running recovery") % n); + + /* Open the environment after running recovery. */ + openEnv(env, path, DB_RECOVER); + } + + else + /* Open the environment normally. */ + openEnv(env, path, 0); + + /* Downgrade to a read lock. */ + debug(format("downgrading to read lock on `%1%'") % lockPath); + lockFile(fdLock, ltRead, true); + + } else { + /* There are other accessors. */ + debug(format("write lock refused")); + + /* Acquire a read lock. */ + debug(format("acquiring read lock on `%1%'") % lockPath); + lockFile(fdLock, ltRead, true); /* wait indefinitely */ + + /* Increment the accessor count. */ + lockFile(fdAccessors, ltWrite, true); + int n = getAccessorCount(fdAccessors) + 1; + setAccessorCount(fdAccessors, n); + debug(format("incremented accessor count to %1%") % n); + lockFile(fdAccessors, ltNone, true); + + /* Open the environment normally. */ + openEnv(env, path, 0); + } + + } catch (DbException e) { rethrow(e); } +} + + +void Database::close() +{ + if (!env) return; + + /* Close the database environment. */ + debug(format("closing database environment")); + + try { + + for (map<TableId, Db *>::iterator i = tables.begin(); + i != tables.end(); i++) + { + debug(format("closing table %1%") % i->first); + Db * db = i->second; + db->close(DB_NOSYNC); + delete db; + } + +// env->txn_checkpoint(0, 0, 0); + env->close(0); + + } catch (DbException e) { rethrow(e); } + + delete env; + + /* Decrement the accessor count. */ + lockFile(fdAccessors, ltWrite, true); + int n = getAccessorCount(fdAccessors) - 1; + setAccessorCount(fdAccessors, n); + debug(format("decremented accessor count to %1%") % n); + lockFile(fdAccessors, ltNone, true); + + ::close(fdAccessors); + ::close(fdLock); +} + + +TableId Database::openTable(const string & tableName) +{ + requireEnv(); + TableId table = nextId++; + + try { + + Db * db = new Db(env, 0); + + try { + db->open(0, tableName.c_str(), 0, + DB_HASH, DB_CREATE | DB_AUTO_COMMIT, 0666); + } catch (...) { + delete db; + throw; + } + + tables[table] = db; + + } catch (DbException e) { rethrow(e); } + + return table; +} + + +bool Database::queryString(const Transaction & txn, TableId table, + const string & key, string & data) +{ + try { + Db * db = getDb(table); + + Dbt kt((void *) key.c_str(), key.length()); + Dbt dt; + + int err = db->get(txn.txn, &kt, &dt, 0); + if (err) return false; + + if (!dt.get_data()) + data = ""; + else + data = string((char *) dt.get_data(), dt.get_size()); + + } catch (DbException e) { rethrow(e); } + + return true; +} + + +bool Database::queryStrings(const Transaction & txn, TableId table, + const string & key, Strings & data) +{ + string d; + + if (!queryString(txn, table, key, d)) + return false; + + string::iterator it = d.begin(); + + while (it != d.end()) { + + if (it + 4 > d.end()) + throw Error(format("short db entry: `%1%'") % d); + + unsigned int len; + len = (unsigned char) *it++; + len |= ((unsigned char) *it++) << 8; + len |= ((unsigned char) *it++) << 16; + len |= ((unsigned char) *it++) << 24; + + if (it + len > d.end()) + throw Error(format("short db entry: `%1%'") % d); + + string s; + while (len--) s += *it++; + + data.push_back(s); + } + + return true; +} + + +void Database::setString(const Transaction & txn, TableId table, + const string & key, const string & data) +{ + try { + Db * db = getDb(table); + Dbt kt((void *) key.c_str(), key.length()); + Dbt dt((void *) data.c_str(), data.length()); + db->put(txn.txn, &kt, &dt, 0); + } catch (DbException e) { rethrow(e); } +} + + +void Database::setStrings(const Transaction & txn, TableId table, + const string & key, const Strings & data) +{ + string d; + + for (Strings::const_iterator it = data.begin(); + it != data.end(); it++) + { + string s = *it; + unsigned int len = s.size(); + + d += len & 0xff; + d += (len >> 8) & 0xff; + d += (len >> 16) & 0xff; + d += (len >> 24) & 0xff; + + d += s; + } + + setString(txn, table, key, d); +} + + +void Database::delPair(const Transaction & txn, TableId table, + const string & key) +{ + try { + Db * db = getDb(table); + Dbt kt((void *) key.c_str(), key.length()); + db->del(txn.txn, &kt, 0); + /* Non-existence of a pair with the given key is not an + error. */ + } catch (DbException e) { rethrow(e); } +} + + +void Database::enumTable(const Transaction & txn, TableId table, + Strings & keys) +{ + try { + Db * db = getDb(table); + + Dbc * dbc; + db->cursor(txn.txn, &dbc, 0); + DestroyDbc destroyDbc(dbc); + + Dbt kt, dt; + while (dbc->get(&kt, &dt, DB_NEXT) != DB_NOTFOUND) + keys.push_back( + string((char *) kt.get_data(), kt.get_size())); + + } catch (DbException e) { rethrow(e); } +} |