about summary refs log tree commit diff
path: root/src/libstore
diff options
context:
space:
mode:
authorEelco Dolstra <eelco.dolstra@logicblox.com>2012-07-23T16·08-0400
committerEelco Dolstra <eelco.dolstra@logicblox.com>2012-07-23T21·14-0400
commit564fb7d9fa80d06397a88d69f26439727cb922c5 (patch)
treeb6944bacbc3f4d8cbac9d886686572aa6fdbbe8c /src/libstore
parented59bf7a181bb382dea7dd72da52bf91f60deb8d (diff)
optimiseStore(): Use a content-addressed file store in /nix/store/.links
optimiseStore() now creates persistent, content-addressed hard links
in /nix/store/.links.  For instance, if it encounters a file P with
hash H, it will create a hard link

  P' = /nix/store/.link/<H>

to P if P' doesn't already exist; if P' exist, then P is replaced by a
hard link to P'.  This is better than the previous in-memory map,
because it had the tendency to unnecessarily replace hard links with a
hard link to whatever happened to be the first file with a given hash
it encountered.  It also allows on-the-fly, incremental optimisation.
Diffstat (limited to 'src/libstore')
-rw-r--r--src/libstore/local-store.hh2
-rw-r--r--src/libstore/optimise-store.cc213
2 files changed, 108 insertions, 107 deletions
diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh
index eb7705219ac7..1bb47fb3bad5 100644
--- a/src/libstore/local-store.hh
+++ b/src/libstore/local-store.hh
@@ -167,7 +167,7 @@ public:
 
     /* Optimise the disk space usage of the Nix store by hard-linking
        files with the same contents. */
-    void optimiseStore(bool dryRun, OptimiseStats & stats);
+    void optimiseStore(OptimiseStats & stats);
 
     /* Check the integrity of the Nix store. */
     void verifyStore(bool checkContents);
diff --git a/src/libstore/optimise-store.cc b/src/libstore/optimise-store.cc
index 2ca98f46ddf4..0893db9d3130 100644
--- a/src/libstore/optimise-store.cc
+++ b/src/libstore/optimise-store.cc
@@ -1,6 +1,7 @@
 #include "util.hh"
 #include "local-store.hh"
 #include "immutable.hh"
+#include "globals.hh"
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -12,9 +13,6 @@
 namespace nix {
 
 
-typedef std::map<Hash, std::pair<Path, ino_t> > HashToPath;
-
-
 static void makeWritable(const Path & path)
 {
     struct stat st;
@@ -51,132 +49,135 @@ struct MakeImmutable
 };
 
 
-static void hashAndLink(bool dryRun, HashToPath & hashToPath,
-    OptimiseStats & stats, const Path & path)
+const string linksDir = ".links";
+
+
+static void hashAndLink(OptimiseStats & stats, const Path & path)
 {
     struct stat st;
     if (lstat(path.c_str(), &st))
 	throw SysError(format("getting attributes of path `%1%'") % path);
 
+    if (S_ISDIR(st.st_mode)) {
+        Strings names = readDirectory(path);
+	foreach (Strings::iterator, i, names)
+	    hashAndLink(stats, path + "/" + *i);
+        return;
+    }
+    
+    /* We can hard link regular files and symlinks. */
+    if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) return;
+        
     /* Sometimes SNAFUs can cause files in the Nix store to be
        modified, in particular when running programs as root under
        NixOS (example: $fontconfig/var/cache being modified).  Skip
-       those files. */
+       those files.  FIXME: check the modification time. */
     if (S_ISREG(st.st_mode) && (st.st_mode & S_IWUSR)) {
         printMsg(lvlError, format("skipping suspicious writable file `%1%'") % path);
         return;
     }
 
-    /* We can hard link regular files and symlinks. */
-    if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) {
-
-        /* Hash the file.  Note that hashPath() returns the hash over
-           the NAR serialisation, which includes the execute bit on
-           the file.  Thus, executable and non-executable files with
-           the same contents *won't* be linked (which is good because
-           otherwise the permissions would be screwed up).
-
-           Also note that if `path' is a symlink, then we're hashing
-           the contents of the symlink (i.e. the result of
-           readlink()), not the contents of the target (which may not
-           even exist). */
-        Hash hash = hashPath(htSHA256, path).first;
-        stats.totalFiles++;
-        printMsg(lvlDebug, format("`%1%' has hash `%2%'") % path % printHash(hash));
-
-        std::pair<Path, ino_t> prevPath = hashToPath[hash];
-        
-        if (prevPath.first == "") {
-            hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino);
-            return;
-        }
-            
-        /* Yes!  We've seen a file with the same contents.  Replace
-           the current file with a hard link to that file. */
-        stats.sameContents++;
-        if (prevPath.second == st.st_ino) {
-            printMsg(lvlDebug, format("`%1%' is already linked to `%2%'") % path % prevPath.first);
-            return;
-        }
+    /* Hash the file.  Note that hashPath() returns the hash over the
+       NAR serialisation, which includes the execute bit on the file.
+       Thus, executable and non-executable files with the same
+       contents *won't* be linked (which is good because otherwise the
+       permissions would be screwed up).
+
+       Also note that if `path' is a symlink, then we're hashing the
+       contents of the symlink (i.e. the result of readlink()), not
+       the contents of the target (which may not even exist). */
+    Hash hash = hashPath(htSHA256, path).first;
+    stats.totalFiles++;
+    printMsg(lvlDebug, format("`%1%' has hash `%2%'") % path % printHash(hash));
+
+    /* Check if this is a known hash. */
+    Path linkPath = nixStore + "/" + linksDir + "/" + printHash32(hash);
+
+    if (!pathExists(linkPath)) {
+        /* Nope, create a hard link in the links directory. */
+        makeMutable(path);
+        MakeImmutable mk1(path);
+
+        if (link(path.c_str(), linkPath.c_str()) == -1)
+            throw SysError(format("cannot link `%1%' to `%2%'") % linkPath % path);
+
+        return;
+    }
+
+    /* Yes!  We've seen a file with the same contents.  Replace the
+       current file with a hard link to that file. */
+    struct stat stLink;
+    if (lstat(linkPath.c_str(), &stLink))
+	throw SysError(format("getting attributes of path `%1%'") % linkPath);
+    
+    stats.sameContents++;
+    if (st.st_ino == stLink.st_ino) {
+        printMsg(lvlDebug, format("`%1%' is already linked to `%2%'") % path % linkPath);
+        return;
+    }
         
-        if (!dryRun) {
-            
-            printMsg(lvlTalkative, format("linking `%1%' to `%2%'") % path % prevPath.first);
+    printMsg(lvlTalkative, format("linking `%1%' to `%2%'") % path % linkPath);
 
-            Path tempLink = (format("%1%.tmp-%2%-%3%")
-                % path % getpid() % rand()).str();
+    Path tempLink = (format("%1%/.tmp-link-%2%-%3%")
+        % nixStore % getpid() % rand()).str();
 
-            /* Make the containing directory writable, but only if
-               it's not the store itself (we don't want or need to
-               mess with its permissions). */
-            bool mustToggle = !isStorePath(path);
-            if (mustToggle) makeWritable(dirOf(path));
-            
-            /* When we're done, make the directory read-only again and
-               reset its timestamp back to 0. */
-            MakeReadOnly makeReadOnly(mustToggle ? dirOf(path) : "");
-
-            /* If ‘prevPath’ is immutable, we can't create hard links
-               to it, so make it mutable first (and make it immutable
-               again when we're done).  We also have to make ‘path’
-               mutable, otherwise rename() will fail to delete it. */
-            makeMutable(prevPath.first);
-            MakeImmutable mk1(prevPath.first);
+    /* Make the containing directory writable, but only if it's not
+       the store itself (we don't want or need to mess with its
+       permissions). */
+    bool mustToggle = !isStorePath(path);
+    if (mustToggle) makeWritable(dirOf(path));
             
-            makeMutable(path);
-            MakeImmutable mk2(path);
-
-            if (link(prevPath.first.c_str(), tempLink.c_str()) == -1) {
-                if (errno == EMLINK) {
-                    /* Too many links to the same file (>= 32000 on
-                       most file systems).  This is likely to happen
-                       with empty files.  Just start over, creating
-                       links to the current file. */
-                    printMsg(lvlInfo, format("`%1%' has maximum number of links") % prevPath.first);
-                    hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino);
-                    return;
-                }
-                throw SysError(format("cannot link `%1%' to `%2%'")
-                    % tempLink % prevPath.first);
-            }
-
-            /* Atomically replace the old file with the new hard link. */
-            if (rename(tempLink.c_str(), path.c_str()) == -1) {
-                if (errno == EMLINK) {
-                    /* Some filesystems generate too many links on the
-                       rename, rather than on the original link.
-                       (Probably it temporarily increases the st_nlink
-                       field before decreasing it again.) */
-                    printMsg(lvlInfo, format("`%1%' has maximum number of links") % prevPath.first);
-                    hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino);
-
-                    /* Unlink the temp link. */
-                    if (unlink(tempLink.c_str()) == -1)
-                        printMsg(lvlError, format("unable to unlink `%1%'") % tempLink);
-                    return;
-                }
-                throw SysError(format("cannot rename `%1%' to `%2%'")
-                    % tempLink % path);
-            }
-        } else
-            printMsg(lvlTalkative, format("would link `%1%' to `%2%'") % path % prevPath.first);
-        
-        stats.filesLinked++;
-        stats.bytesFreed += st.st_size;
-        stats.blocksFreed += st.st_blocks;
+    /* When we're done, make the directory read-only again and reset
+       its timestamp back to 0. */
+    MakeReadOnly makeReadOnly(mustToggle ? dirOf(path) : "");
+
+    /* If ‘linkPath’ is immutable, we can't create hard links to it,
+       so make it mutable first (and make it immutable again when
+       we're done).  We also have to make ‘path’ mutable, otherwise
+       rename() will fail to delete it. */
+    makeMutable(linkPath);
+    MakeImmutable mk1(linkPath);
+
+    makeMutable(path);
+    MakeImmutable mk2(path);
+
+    if (link(linkPath.c_str(), tempLink.c_str()) == -1) {
+        if (errno == EMLINK) {
+            /* Too many links to the same file (>= 32000 on most file
+               systems).  This is likely to happen with empty files.
+               Just shrug and ignore. */
+            printMsg(lvlInfo, format("`%1%' has maximum number of links") % linkPath);
+            return;
+        }
+        throw SysError(format("cannot link `%1%' to `%2%'") % tempLink % linkPath);
     }
 
-    if (S_ISDIR(st.st_mode)) {
-        Strings names = readDirectory(path);
-	foreach (Strings::iterator, i, names)
-	    hashAndLink(dryRun, hashToPath, stats, path + "/" + *i);
+    /* Atomically replace the old file with the new hard link. */
+    if (rename(tempLink.c_str(), path.c_str()) == -1) {
+        if (errno == EMLINK) {
+            /* Some filesystems generate too many links on the rename,
+               rather than on the original link.  (Probably it
+               temporarily increases the st_nlink field before
+               decreasing it again.) */
+            printMsg(lvlInfo, format("`%1%' has maximum number of links") % linkPath);
+
+            /* Unlink the temp link. */
+            if (unlink(linkPath.c_str()) == -1)
+                printMsg(lvlError, format("unable to unlink `%1%'") % linkPath);
+            return;
+        }
+        throw SysError(format("cannot rename `%1%' to `%2%'") % tempLink % path);
     }
+
+    stats.filesLinked++;
+    stats.bytesFreed += st.st_size;
+    stats.blocksFreed += st.st_blocks;
 }
 
 
-void LocalStore::optimiseStore(bool dryRun, OptimiseStats & stats)
+void LocalStore::optimiseStore(OptimiseStats & stats)
 {
-    HashToPath hashToPath;
+    createDirs(nixStore + "/" + linksDir);
 
     PathSet paths = queryValidPaths();
 
@@ -184,7 +185,7 @@ void LocalStore::optimiseStore(bool dryRun, OptimiseStats & stats)
         addTempRoot(*i);
         if (!isValidPath(*i)) continue; /* path was GC'ed, probably */
         startNest(nest, lvlChatty, format("hashing files in `%1%'") % *i);
-        hashAndLink(dryRun, hashToPath, stats, *i);
+        hashAndLink(stats, *i);
     }
 }