diff options
Diffstat (limited to 'third_party/nix/src/libstore/optimise-store.cc')
-rw-r--r-- | third_party/nix/src/libstore/optimise-store.cc | 296 |
1 files changed, 296 insertions, 0 deletions
diff --git a/third_party/nix/src/libstore/optimise-store.cc b/third_party/nix/src/libstore/optimise-store.cc new file mode 100644 index 000000000000..eb24633c181c --- /dev/null +++ b/third_party/nix/src/libstore/optimise-store.cc @@ -0,0 +1,296 @@ +#include <cerrno> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <regex> +#include <utility> + +#include <glog/logging.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "libstore/globals.hh" +#include "libstore/local-store.hh" +#include "libutil/util.hh" + +namespace nix { + +static void makeWritable(const Path& path) { + struct stat st; + if (lstat(path.c_str(), &st) != 0) { + throw SysError(format("getting attributes of path '%1%'") % path); + } + if (chmod(path.c_str(), st.st_mode | S_IWUSR) == -1) { + throw SysError(format("changing writability of '%1%'") % path); + } +} + +struct MakeReadOnly { + Path path; + explicit MakeReadOnly(Path path) : path(std::move(path)) {} + ~MakeReadOnly() { + try { + /* This will make the path read-only. */ + if (!path.empty()) { + canonicaliseTimestampAndPermissions(path); + } + } catch (...) { + ignoreException(); + } + } +}; + +LocalStore::InodeHash LocalStore::loadInodeHash() { + DLOG(INFO) << "loading hash inodes in memory"; + InodeHash inodeHash; + + AutoCloseDir dir(opendir(linksDir.c_str())); + if (!dir) { + throw SysError(format("opening directory '%1%'") % linksDir); + } + + struct dirent* dirent; + while (errno = 0, dirent = readdir(dir.get())) { /* sic */ + checkInterrupt(); + // We don't care if we hit non-hash files, anything goes + inodeHash.insert(dirent->d_ino); + } + if (errno) { + throw SysError(format("reading directory '%1%'") % linksDir); + } + + DLOG(INFO) << "loaded " << inodeHash.size() << " hash inodes"; + + return inodeHash; +} + +Strings LocalStore::readDirectoryIgnoringInodes(const Path& path, + const InodeHash& inodeHash) { + Strings names; + + AutoCloseDir dir(opendir(path.c_str())); + if (!dir) { + throw SysError(format("opening directory '%1%'") % path); + } + + struct dirent* dirent; + while (errno = 0, dirent = readdir(dir.get())) { /* sic */ + checkInterrupt(); + + if (inodeHash.count(dirent->d_ino) != 0u) { + DLOG(WARNING) << dirent->d_name << " is already linked"; + continue; + } + + std::string name = dirent->d_name; + if (name == "." || name == "..") { + continue; + } + names.push_back(name); + } + if (errno) { + throw SysError(format("reading directory '%1%'") % path); + } + + return names; +} + +void LocalStore::optimisePath_(OptimiseStats& stats, const Path& path, + InodeHash& inodeHash) { + checkInterrupt(); + + struct stat st; + if (lstat(path.c_str(), &st) != 0) { + throw SysError(format("getting attributes of path '%1%'") % path); + } + + if (S_ISDIR(st.st_mode)) { + Strings names = readDirectoryIgnoringInodes(path, inodeHash); + for (auto& i : names) { + optimisePath_(stats, path + "/" + i, inodeHash); + } + return; + } + + /* We can hard link regular files and maybe symlinks. */ + if (!S_ISREG(st.st_mode) +#if CAN_LINK_SYMLINK + && !S_ISLNK(st.st_mode) +#endif + ) + return; + + /* Sometimes SNAFUs can cause files in the Nix store to be + modified, in particular when running programs as root under + NixOS (example: $fontconfig/var/cache being modified). Skip + those files. FIXME: check the modification time. */ + if (S_ISREG(st.st_mode) && ((st.st_mode & S_IWUSR) != 0u)) { + LOG(WARNING) << "skipping suspicious writable file '" << path << "'"; + return; + } + + /* This can still happen on top-level files. */ + if (st.st_nlink > 1 && (inodeHash.count(st.st_ino) != 0u)) { + DLOG(INFO) << path << " is already linked, with " << (st.st_nlink - 2) + << " other file(s)"; + return; + } + + /* Hash the file. Note that hashPath() returns the hash over the + NAR serialisation, which includes the execute bit on the file. + Thus, executable and non-executable files with the same + contents *won't* be linked (which is good because otherwise the + permissions would be screwed up). + + Also note that if `path' is a symlink, then we're hashing the + contents of the symlink (i.e. the result of readlink()), not + the contents of the target (which may not even exist). */ + Hash hash = hashPath(htSHA256, path).first; + LOG(INFO) << path << " has hash " << hash.to_string(); + + /* Check if this is a known hash. */ + Path linkPath = linksDir + "/" + hash.to_string(Base32, false); + +retry: + if (!pathExists(linkPath)) { + /* Nope, create a hard link in the links directory. */ + if (link(path.c_str(), linkPath.c_str()) == 0) { + inodeHash.insert(st.st_ino); + return; + } + + switch (errno) { + case EEXIST: + /* Fall through if another process created ‘linkPath’ before + we did. */ + break; + + case ENOSPC: + /* On ext4, that probably means the directory index is + full. When that happens, it's fine to ignore it: we + just effectively disable deduplication of this + file. */ + LOG(WARNING) << "cannot link '" << linkPath << " to " << path << ": " + << strerror(errno); + + return; + + default: + throw SysError("cannot link '%1%' to '%2%'", linkPath, path); + } + } + + /* Yes! We've seen a file with the same contents. Replace the + current file with a hard link to that file. */ + struct stat stLink; + if (lstat(linkPath.c_str(), &stLink) != 0) { + throw SysError(format("getting attributes of path '%1%'") % linkPath); + } + + if (st.st_ino == stLink.st_ino) { + DLOG(INFO) << path << " is already linked to " << linkPath; + return; + } + + if (st.st_size != stLink.st_size) { + LOG(WARNING) << "removing corrupted link '" << linkPath << "'"; + unlink(linkPath.c_str()); + goto retry; + } + + DLOG(INFO) << "linking '" << path << "' to '" << linkPath << "'"; + + /* Make the containing directory writable, but only if it's not + the store itself (we don't want or need to mess with its + permissions). */ + bool mustToggle = dirOf(path) != realStoreDir; + if (mustToggle) { + makeWritable(dirOf(path)); + } + + /* When we're done, make the directory read-only again and reset + its timestamp back to 0. */ + MakeReadOnly makeReadOnly(mustToggle ? dirOf(path) : ""); + + Path tempLink = + (format("%1%/.tmp-link-%2%-%3%") % realStoreDir % getpid() % random()) + .str(); + + if (link(linkPath.c_str(), tempLink.c_str()) == -1) { + if (errno == EMLINK) { + /* Too many links to the same file (>= 32000 on most file + systems). This is likely to happen with empty files. + Just shrug and ignore. */ + if (st.st_size != 0) { + LOG(WARNING) << linkPath << " has maximum number of links"; + } + return; + } + throw SysError("cannot link '%1%' to '%2%'", tempLink, linkPath); + } + + /* Atomically replace the old file with the new hard link. */ + if (rename(tempLink.c_str(), path.c_str()) == -1) { + if (unlink(tempLink.c_str()) == -1) { + LOG(ERROR) << "unable to unlink '" << tempLink << "'"; + } + if (errno == EMLINK) { + /* Some filesystems generate too many links on the rename, + rather than on the original link. (Probably it + temporarily increases the st_nlink field before + decreasing it again.) */ + DLOG(WARNING) << "'" << linkPath + << "' has reached maximum number of links"; + return; + } + throw SysError(format("cannot rename '%1%' to '%2%'") % tempLink % path); + } + + stats.filesLinked++; + stats.bytesFreed += st.st_size; + stats.blocksFreed += st.st_blocks; +} + +void LocalStore::optimiseStore(OptimiseStats& stats) { + PathSet paths = queryAllValidPaths(); + InodeHash inodeHash = loadInodeHash(); + + uint64_t done = 0; + + for (auto& i : paths) { + addTempRoot(i); + if (!isValidPath(i)) { + continue; + } /* path was GC'ed, probably */ + { + LOG(INFO) << "optimising path '" << i << "'"; + optimisePath_(stats, realStoreDir + "/" + baseNameOf(i), inodeHash); + } + done++; + } +} + +static std::string showBytes(unsigned long long bytes) { + return (format("%.2f MiB") % (bytes / (1024.0 * 1024.0))).str(); +} + +void LocalStore::optimiseStore() { + OptimiseStats stats; + + optimiseStore(stats); + + LOG(INFO) << showBytes(stats.bytesFreed) << " freed by hard-linking " + << stats.filesLinked << " files"; +} + +void LocalStore::optimisePath(const Path& path) { + OptimiseStats stats; + InodeHash inodeHash; + + if (settings.autoOptimiseStore) { + optimisePath_(stats, path, inodeHash); + } +} + +} // namespace nix |