From b7fd2c28224a69476434d69b5d9da3d150c07226 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 18 Jul 2012 14:59:03 -0400 Subject: Use "#pragma once" to prevent repeated header file inclusion --- src/libstore/local-store.hh | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'src/libstore/local-store.hh') diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index 65ee029c261e..eb7705219ac7 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -1,5 +1,4 @@ -#ifndef __LOCAL_STORE_H -#define __LOCAL_STORE_H +#pragma once #include @@ -302,6 +301,3 @@ void deletePathWrapped(const Path & path, void deletePathWrapped(const Path & path); } - - -#endif /* !__LOCAL_STORE_H */ -- cgit 1.4.1 From 564fb7d9fa80d06397a88d69f26439727cb922c5 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 23 Jul 2012 12:08:34 -0400 Subject: optimiseStore(): Use a content-addressed file store in /nix/store/.links optimiseStore() now creates persistent, content-addressed hard links in /nix/store/.links. For instance, if it encounters a file P with hash H, it will create a hard link P' = /nix/store/.link/ to P if P' doesn't already exist; if P' exist, then P is replaced by a hard link to P'. This is better than the previous in-memory map, because it had the tendency to unnecessarily replace hard links with a hard link to whatever happened to be the first file with a given hash it encountered. It also allows on-the-fly, incremental optimisation. --- src/libstore/local-store.hh | 2 +- src/libstore/optimise-store.cc | 213 +++++++++++++++++++++-------------------- src/nix-store/nix-store.cc | 10 +- 3 files changed, 110 insertions(+), 115 deletions(-) (limited to 'src/libstore/local-store.hh') diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index eb7705219ac7..1bb47fb3bad5 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -167,7 +167,7 @@ public: /* Optimise the disk space usage of the Nix store by hard-linking files with the same contents. */ - void optimiseStore(bool dryRun, OptimiseStats & stats); + void optimiseStore(OptimiseStats & stats); /* Check the integrity of the Nix store. */ void verifyStore(bool checkContents); diff --git a/src/libstore/optimise-store.cc b/src/libstore/optimise-store.cc index 2ca98f46ddf4..0893db9d3130 100644 --- a/src/libstore/optimise-store.cc +++ b/src/libstore/optimise-store.cc @@ -1,6 +1,7 @@ #include "util.hh" #include "local-store.hh" #include "immutable.hh" +#include "globals.hh" #include #include @@ -12,9 +13,6 @@ namespace nix { -typedef std::map > HashToPath; - - static void makeWritable(const Path & path) { struct stat st; @@ -51,132 +49,135 @@ struct MakeImmutable }; -static void hashAndLink(bool dryRun, HashToPath & hashToPath, - OptimiseStats & stats, const Path & path) +const string linksDir = ".links"; + + +static void hashAndLink(OptimiseStats & stats, const Path & path) { struct stat st; if (lstat(path.c_str(), &st)) throw SysError(format("getting attributes of path `%1%'") % path); + if (S_ISDIR(st.st_mode)) { + Strings names = readDirectory(path); + foreach (Strings::iterator, i, names) + hashAndLink(stats, path + "/" + *i); + return; + } + + /* We can hard link regular files and symlinks. */ + if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) return; + /* Sometimes SNAFUs can cause files in the Nix store to be modified, in particular when running programs as root under NixOS (example: $fontconfig/var/cache being modified). Skip - those files. */ + those files. FIXME: check the modification time. */ if (S_ISREG(st.st_mode) && (st.st_mode & S_IWUSR)) { printMsg(lvlError, format("skipping suspicious writable file `%1%'") % path); return; } - /* We can hard link regular files and symlinks. */ - if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) { - - /* Hash the file. Note that hashPath() returns the hash over - the NAR serialisation, which includes the execute bit on - the file. Thus, executable and non-executable files with - the same contents *won't* be linked (which is good because - otherwise the permissions would be screwed up). - - Also note that if `path' is a symlink, then we're hashing - the contents of the symlink (i.e. the result of - readlink()), not the contents of the target (which may not - even exist). */ - Hash hash = hashPath(htSHA256, path).first; - stats.totalFiles++; - printMsg(lvlDebug, format("`%1%' has hash `%2%'") % path % printHash(hash)); - - std::pair prevPath = hashToPath[hash]; - - if (prevPath.first == "") { - hashToPath[hash] = std::pair(path, st.st_ino); - return; - } - - /* Yes! We've seen a file with the same contents. Replace - the current file with a hard link to that file. */ - stats.sameContents++; - if (prevPath.second == st.st_ino) { - printMsg(lvlDebug, format("`%1%' is already linked to `%2%'") % path % prevPath.first); - return; - } + /* Hash the file. Note that hashPath() returns the hash over the + NAR serialisation, which includes the execute bit on the file. + Thus, executable and non-executable files with the same + contents *won't* be linked (which is good because otherwise the + permissions would be screwed up). + + Also note that if `path' is a symlink, then we're hashing the + contents of the symlink (i.e. the result of readlink()), not + the contents of the target (which may not even exist). */ + Hash hash = hashPath(htSHA256, path).first; + stats.totalFiles++; + printMsg(lvlDebug, format("`%1%' has hash `%2%'") % path % printHash(hash)); + + /* Check if this is a known hash. */ + Path linkPath = nixStore + "/" + linksDir + "/" + printHash32(hash); + + if (!pathExists(linkPath)) { + /* Nope, create a hard link in the links directory. */ + makeMutable(path); + MakeImmutable mk1(path); + + if (link(path.c_str(), linkPath.c_str()) == -1) + throw SysError(format("cannot link `%1%' to `%2%'") % linkPath % path); + + return; + } + + /* Yes! We've seen a file with the same contents. Replace the + current file with a hard link to that file. */ + struct stat stLink; + if (lstat(linkPath.c_str(), &stLink)) + throw SysError(format("getting attributes of path `%1%'") % linkPath); + + stats.sameContents++; + if (st.st_ino == stLink.st_ino) { + printMsg(lvlDebug, format("`%1%' is already linked to `%2%'") % path % linkPath); + return; + } - if (!dryRun) { - - printMsg(lvlTalkative, format("linking `%1%' to `%2%'") % path % prevPath.first); + printMsg(lvlTalkative, format("linking `%1%' to `%2%'") % path % linkPath); - Path tempLink = (format("%1%.tmp-%2%-%3%") - % path % getpid() % rand()).str(); + Path tempLink = (format("%1%/.tmp-link-%2%-%3%") + % nixStore % getpid() % rand()).str(); - /* Make the containing directory writable, but only if - it's not the store itself (we don't want or need to - mess with its permissions). */ - bool mustToggle = !isStorePath(path); - if (mustToggle) makeWritable(dirOf(path)); - - /* When we're done, make the directory read-only again and - reset its timestamp back to 0. */ - MakeReadOnly makeReadOnly(mustToggle ? dirOf(path) : ""); - - /* If ‘prevPath’ is immutable, we can't create hard links - to it, so make it mutable first (and make it immutable - again when we're done). We also have to make ‘path’ - mutable, otherwise rename() will fail to delete it. */ - makeMutable(prevPath.first); - MakeImmutable mk1(prevPath.first); + /* Make the containing directory writable, but only if it's not + the store itself (we don't want or need to mess with its + permissions). */ + bool mustToggle = !isStorePath(path); + if (mustToggle) makeWritable(dirOf(path)); - makeMutable(path); - MakeImmutable mk2(path); - - if (link(prevPath.first.c_str(), tempLink.c_str()) == -1) { - if (errno == EMLINK) { - /* Too many links to the same file (>= 32000 on - most file systems). This is likely to happen - with empty files. Just start over, creating - links to the current file. */ - printMsg(lvlInfo, format("`%1%' has maximum number of links") % prevPath.first); - hashToPath[hash] = std::pair(path, st.st_ino); - return; - } - throw SysError(format("cannot link `%1%' to `%2%'") - % tempLink % prevPath.first); - } - - /* Atomically replace the old file with the new hard link. */ - if (rename(tempLink.c_str(), path.c_str()) == -1) { - if (errno == EMLINK) { - /* Some filesystems generate too many links on the - rename, rather than on the original link. - (Probably it temporarily increases the st_nlink - field before decreasing it again.) */ - printMsg(lvlInfo, format("`%1%' has maximum number of links") % prevPath.first); - hashToPath[hash] = std::pair(path, st.st_ino); - - /* Unlink the temp link. */ - if (unlink(tempLink.c_str()) == -1) - printMsg(lvlError, format("unable to unlink `%1%'") % tempLink); - return; - } - throw SysError(format("cannot rename `%1%' to `%2%'") - % tempLink % path); - } - } else - printMsg(lvlTalkative, format("would link `%1%' to `%2%'") % path % prevPath.first); - - stats.filesLinked++; - stats.bytesFreed += st.st_size; - stats.blocksFreed += st.st_blocks; + /* When we're done, make the directory read-only again and reset + its timestamp back to 0. */ + MakeReadOnly makeReadOnly(mustToggle ? dirOf(path) : ""); + + /* If ‘linkPath’ is immutable, we can't create hard links to it, + so make it mutable first (and make it immutable again when + we're done). We also have to make ‘path’ mutable, otherwise + rename() will fail to delete it. */ + makeMutable(linkPath); + MakeImmutable mk1(linkPath); + + makeMutable(path); + MakeImmutable mk2(path); + + if (link(linkPath.c_str(), tempLink.c_str()) == -1) { + if (errno == EMLINK) { + /* Too many links to the same file (>= 32000 on most file + systems). This is likely to happen with empty files. + Just shrug and ignore. */ + printMsg(lvlInfo, format("`%1%' has maximum number of links") % linkPath); + return; + } + throw SysError(format("cannot link `%1%' to `%2%'") % tempLink % linkPath); } - if (S_ISDIR(st.st_mode)) { - Strings names = readDirectory(path); - foreach (Strings::iterator, i, names) - hashAndLink(dryRun, hashToPath, stats, path + "/" + *i); + /* Atomically replace the old file with the new hard link. */ + if (rename(tempLink.c_str(), path.c_str()) == -1) { + if (errno == EMLINK) { + /* Some filesystems generate too many links on the rename, + rather than on the original link. (Probably it + temporarily increases the st_nlink field before + decreasing it again.) */ + printMsg(lvlInfo, format("`%1%' has maximum number of links") % linkPath); + + /* Unlink the temp link. */ + if (unlink(linkPath.c_str()) == -1) + printMsg(lvlError, format("unable to unlink `%1%'") % linkPath); + return; + } + throw SysError(format("cannot rename `%1%' to `%2%'") % tempLink % path); } + + stats.filesLinked++; + stats.bytesFreed += st.st_size; + stats.blocksFreed += st.st_blocks; } -void LocalStore::optimiseStore(bool dryRun, OptimiseStats & stats) +void LocalStore::optimiseStore(OptimiseStats & stats) { - HashToPath hashToPath; + createDirs(nixStore + "/" + linksDir); PathSet paths = queryValidPaths(); @@ -184,7 +185,7 @@ void LocalStore::optimiseStore(bool dryRun, OptimiseStats & stats) addTempRoot(*i); if (!isValidPath(*i)) continue; /* path was GC'ed, probably */ startNest(nest, lvlChatty, format("hashing files in `%1%'") % *i); - hashAndLink(dryRun, hashToPath, stats, *i); + hashAndLink(stats, *i); } } diff --git a/src/nix-store/nix-store.cc b/src/nix-store/nix-store.cc index 23863525fe8a..82e08fecf22a 100644 --- a/src/nix-store/nix-store.cc +++ b/src/nix-store/nix-store.cc @@ -746,18 +746,12 @@ static void showOptimiseStats(OptimiseStats & stats) files with the same contents. */ static void opOptimise(Strings opFlags, Strings opArgs) { - if (!opArgs.empty()) + if (!opArgs.empty() || !opFlags.empty()) throw UsageError("no arguments expected"); - bool dryRun = false; - - foreach (Strings::iterator, i, opFlags) - if (*i == "--dry-run") dryRun = true; - else throw UsageError(format("unknown flag `%1%'") % *i); - OptimiseStats stats; try { - ensureLocalStore().optimiseStore(dryRun, stats); + ensureLocalStore().optimiseStore(stats); } catch (...) { showOptimiseStats(stats); throw; -- cgit 1.4.1 From 619310571002fc74e428824bd603604d1055b61b Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 23 Jul 2012 15:02:52 -0400 Subject: Automatically optimise the Nix store when a new path is added Auto-optimisation is enabled by default. It can be turned off by setting auto-optimise-store to false in nix.conf. --- doc/manual/conf-file.xml | 15 ++++++++++++++- src/libstore/build.cc | 4 ++++ src/libstore/local-store.cc | 7 +++++++ src/libstore/local-store.hh | 7 +++++++ src/libstore/optimise-store.cc | 22 +++++++++++++--------- 5 files changed, 45 insertions(+), 10 deletions(-) (limited to 'src/libstore/local-store.hh') diff --git a/doc/manual/conf-file.xml b/doc/manual/conf-file.xml index 1b19e56b5714..c095a001c169 100644 --- a/doc/manual/conf-file.xml +++ b/doc/manual/conf-file.xml @@ -337,7 +337,20 @@ build-use-chroot = /dev /proc /bin true. - + + + auto-optimise-store + + If set to true (the default), + Nix automatically detects files in the store that have identical + contents, and replaces them with hard links to a single copy. + This saves disk space. If set to false, you + can still run nix-store --optimise to get rid + of duplicate files. + + + + diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 26268f6ddb7d..a3bde3462364 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -2093,6 +2093,8 @@ void DerivationGoal::computeClosure() if (allowed.find(*i) == allowed.end()) throw BuildError(format("output is not allowed to refer to path `%1%'") % *i); } + + worker.store.optimisePath(path); // FIXME: combine with scanForReferences() } /* Register each output path as valid, and register the sets of @@ -2546,6 +2548,8 @@ void SubstitutionGoal::finished() HashResult hash = hashPath(htSHA256, storePath); + worker.store.optimisePath(storePath); // FIXME: combine with hashPath() + ValidPathInfo info2; info2.path = storePath; info2.hash = hash.first; diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index e009191b6c8b..05b2b9c6e542 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -209,6 +209,7 @@ LocalStore::LocalStore(bool reserveSpace) /* Create missing state directories if they don't already exist. */ createDirs(nixStore); + createDirs(linksDir = nixStore + "/.links"); Path profilesDir = nixStateDir + "/profiles"; createDirs(nixStateDir + "/profiles"); createDirs(nixStateDir + "/temproots"); @@ -1116,6 +1117,8 @@ Path LocalStore::addToStoreFromDump(const string & dump, const string & name, hash.second = dump.size(); } else hash = hashPath(htSHA256, dstPath); + + optimisePath(dstPath); // FIXME: combine with hashPath() ValidPathInfo info; info.path = dstPath; @@ -1170,6 +1173,8 @@ Path LocalStore::addTextToStore(const string & name, const string & s, canonicalisePathMetaData(dstPath); HashResult hash = hashPath(htSHA256, dstPath); + + optimisePath(dstPath); ValidPathInfo info; info.path = dstPath; @@ -1405,6 +1410,8 @@ Path LocalStore::importPath(bool requireSignature, Source & source) /* !!! if we were clever, we could prevent the hashPath() here. */ HashResult hash = hashPath(htSHA256, dstPath); + + optimisePath(dstPath); // FIXME: combine with hashPath() ValidPathInfo info; info.path = dstPath; diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index 1bb47fb3bad5..7d30a2d408ae 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -85,6 +85,8 @@ private: typedef std::map RunningSubstituters; RunningSubstituters runningSubstituters; + + Path linksDir; public: @@ -169,6 +171,9 @@ public: files with the same contents. */ void optimiseStore(OptimiseStats & stats); + /* Optimise a single store path. */ + void optimisePath(const Path & path); + /* Check the integrity of the Nix store. */ void verifyStore(bool checkContents); @@ -267,6 +272,8 @@ private: Path importPath(bool requireSignature, Source & source); void checkDerivationOutputs(const Path & drvPath, const Derivation & drv); + + void optimisePath_(OptimiseStats & stats, const Path & path); }; diff --git a/src/libstore/optimise-store.cc b/src/libstore/optimise-store.cc index 0893db9d3130..a7aa14fb49ab 100644 --- a/src/libstore/optimise-store.cc +++ b/src/libstore/optimise-store.cc @@ -49,10 +49,7 @@ struct MakeImmutable }; -const string linksDir = ".links"; - - -static void hashAndLink(OptimiseStats & stats, const Path & path) +void LocalStore::optimisePath_(OptimiseStats & stats, const Path & path) { struct stat st; if (lstat(path.c_str(), &st)) @@ -61,7 +58,7 @@ static void hashAndLink(OptimiseStats & stats, const Path & path) if (S_ISDIR(st.st_mode)) { Strings names = readDirectory(path); foreach (Strings::iterator, i, names) - hashAndLink(stats, path + "/" + *i); + optimisePath_(stats, path + "/" + *i); return; } @@ -91,7 +88,7 @@ static void hashAndLink(OptimiseStats & stats, const Path & path) printMsg(lvlDebug, format("`%1%' has hash `%2%'") % path % printHash(hash)); /* Check if this is a known hash. */ - Path linkPath = nixStore + "/" + linksDir + "/" + printHash32(hash); + Path linkPath = linksDir + "/" + printHash32(hash); if (!pathExists(linkPath)) { /* Nope, create a hard link in the links directory. */ @@ -177,15 +174,22 @@ static void hashAndLink(OptimiseStats & stats, const Path & path) void LocalStore::optimiseStore(OptimiseStats & stats) { - createDirs(nixStore + "/" + linksDir); - PathSet paths = queryValidPaths(); foreach (PathSet::iterator, i, paths) { addTempRoot(*i); if (!isValidPath(*i)) continue; /* path was GC'ed, probably */ startNest(nest, lvlChatty, format("hashing files in `%1%'") % *i); - hashAndLink(stats, *i); + optimisePath_(stats, *i); + } +} + + +void LocalStore::optimisePath(const Path & path) +{ + if (queryBoolSetting("auto-optimise-store", true)) { + OptimiseStats stats; + optimisePath_(stats, path); } } -- cgit 1.4.1 From 680ab6f83def2b636200204542ca352631a46f85 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 23 Jul 2012 15:48:30 -0400 Subject: Garbage collect unused links in /nix/store/.links Incremental optimisation requires creating links in /nix/store/.links to all files in the store. However, this means that if we delete a store path, no files are actually deleted because links in /nix/store/.links still exists. So we need to check /nix/store/.links for files with a link count of 1 and delete them. --- src/libstore/gc.cc | 37 +++++++++++++++++++++++++++++++++++++ src/libstore/local-store.hh | 2 ++ 2 files changed, 39 insertions(+) (limited to 'src/libstore/local-store.hh') diff --git a/src/libstore/gc.cc b/src/libstore/gc.cc index f6ed7dd2264e..874efe4d32d9 100644 --- a/src/libstore/gc.cc +++ b/src/libstore/gc.cc @@ -436,6 +436,8 @@ bool LocalStore::tryToDelete(GCState & state, const Path & path) { checkInterrupt(); + if (path == linksDir) return true; + struct stat st; if (lstat(path.c_str(), &st)) { if (errno == ENOENT) return true; @@ -569,6 +571,37 @@ bool LocalStore::tryToDelete(GCState & state, const Path & path) } +/* Unlink all files in /nix/store/.links that have a link count of 1, + which indicates that there are no other links and so they can be + safely deleted. FIXME: race condition with optimisePath(): we + might see a link count of 1 just before optimisePath() increases + the link count. */ +void LocalStore::removeUnusedLinks() +{ + AutoCloseDir dir = opendir(linksDir.c_str()); + if (!dir) throw SysError(format("opening directory `%1%'") % linksDir); + + struct dirent * dirent; + while (errno = 0, dirent = readdir(dir)) { + checkInterrupt(); + string name = dirent->d_name; + if (name == "." || name == "..") continue; + Path path = linksDir + "/" + name; + + struct stat st; + if (lstat(path.c_str(), &st) == -1) + throw SysError(format("statting `%1%'") % path); + + if (st.st_nlink != 1) continue; + + printMsg(lvlTalkative, format("deleting unused link `%1%'") % path); + + if (unlink(path.c_str()) == -1) + throw SysError(format("deleting `%1%'") % path); + } +} + + void LocalStore::collectGarbage(const GCOptions & options, GCResults & results) { GCState state(results); @@ -682,6 +715,10 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results) released. */ foreach (PathSet::iterator, i, state.invalidated) deleteGarbage(state, *i); + + /* Clean up the links directory. */ + printMsg(lvlError, format("deleting unused links...")); + removeUnusedLinks(); } diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index 7d30a2d408ae..50910f353ad1 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -264,6 +264,8 @@ private: int openGCLock(LockType lockType); + void removeUnusedLinks(); + void startSubstituter(const Path & substituter, RunningSubstituter & runningSubstituter); -- cgit 1.4.1