From 2df9cbeb47177d6a21606b4b509ebcf6bd0437a6 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 7 Dec 2017 00:50:46 +0100 Subject: Provide random access to cached NARs E.g. $ time nix cat-store --store https://cache.nixos.org?local-nar-cache=/tmp/nars \ /nix/store/b0w2hafndl09h64fhb86kw6bmhbmnpm1-blender-2.79/share/icons/hicolor/scalable/apps/blender.svg > /dev/null real 0m4.139s $ time nix cat-store --store https://cache.nixos.org?local-nar-cache=/tmp/nars \ /nix/store/b0w2hafndl09h64fhb86kw6bmhbmnpm1-blender-2.79/share/icons/hicolor/scalable/apps/blender.svg > /dev/null real 0m0.024s (Before, the second call took ~0.220s.) This will use a NAR listing in /tmp/nars/b0w2hafndl09h64fhb86kw6bmhbmnpm1.ls containing all metadata, including the offsets of regular files inside the NAR. Thus, we don't need to read the entire NAR. (We do read the entire listing, but that's generally pretty small. We could use a SQLite DB by borrowing some more code from nixos-channel-scripts/file-cache.hh.) This is primarily useful when Hydra is serving files from an S3 binary cache, in particular when you have giant NARs. E.g. we had some 12 GiB NARs, so accessing individuals files was pretty slow. --- src/libstore/binary-cache-store.cc | 12 +-- src/libstore/nar-accessor.cc | 193 +++++++++++++++++++++++-------------- src/libstore/nar-accessor.hh | 10 ++ src/libstore/remote-fs-accessor.cc | 82 ++++++++++++---- src/libstore/remote-fs-accessor.hh | 5 +- 5 files changed, 198 insertions(+), 104 deletions(-) diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index 68af85bf16d9..ab971dd8b6d9 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -129,10 +129,8 @@ void BinaryCacheStore::addToStore(const ValidPathInfo & info, const refnars.emplace(info.path, narAccessor); - accessor_->addToCache(info.path, *nar); - } + if (accessor_) + accessor_->addToCache(info.path, *nar, narAccessor); { auto res = jsonRoot.placeholder("root"); @@ -144,10 +142,8 @@ void BinaryCacheStore::addToStore(const ValidPathInfo & info, const refnars.emplace(info.path, makeNarAccessor(nar)); - accessor_->addToCache(info.path, *nar); - } + if (accessor_) + accessor_->addToCache(info.path, *nar, makeNarAccessor(nar)); } /* Compress the NAR. */ diff --git a/src/libstore/nar-accessor.cc b/src/libstore/nar-accessor.cc index d6e9757c2cbf..b74480684f2a 100644 --- a/src/libstore/nar-accessor.cc +++ b/src/libstore/nar-accessor.cc @@ -6,6 +6,8 @@ #include #include +#include + namespace nix { struct NarMember @@ -24,83 +26,127 @@ struct NarMember std::map children; }; -struct NarIndexer : ParseSink, StringSource +struct NarAccessor : public FSAccessor { - NarMember root; - std::stack parents; + std::shared_ptr nar; - std::string currentStart; - bool isExec = false; + GetNarBytes getNarBytes; - NarIndexer(const std::string & nar) : StringSource(nar) + NarMember root; + + struct NarIndexer : ParseSink, StringSource { - } + NarAccessor & acc; - void createMember(const Path & path, NarMember member) { - size_t level = std::count(path.begin(), path.end(), '/'); - while(parents.size() > level) { - parents.pop(); - } + std::stack parents; - if(parents.empty()) { - root = std::move(member); - parents.push(&root); - } else { - if(parents.top()->type != FSAccessor::Type::tDirectory) { - throw Error(format("NAR file missing parent directory of path '%1%'") % path); + std::string currentStart; + bool isExec = false; + + NarIndexer(NarAccessor & acc, const std::string & nar) + : StringSource(nar), acc(acc) + { } + + void createMember(const Path & path, NarMember member) { + size_t level = std::count(path.begin(), path.end(), '/'); + while (parents.size() > level) parents.pop(); + + if (parents.empty()) { + acc.root = std::move(member); + parents.push(&acc.root); + } else { + if (parents.top()->type != FSAccessor::Type::tDirectory) + throw Error("NAR file missing parent directory of path '%s'", path); + auto result = parents.top()->children.emplace(baseNameOf(path), std::move(member)); + parents.push(&result.first->second); } - auto result = parents.top()->children.emplace(baseNameOf(path), std::move(member)); - parents.push(&result.first->second); } - } - void createDirectory(const Path & path) override - { - createMember(path, {FSAccessor::Type::tDirectory, false, 0, 0 }); - } + void createDirectory(const Path & path) override + { + createMember(path, {FSAccessor::Type::tDirectory, false, 0, 0}); + } - void createRegularFile(const Path & path) override - { - createMember(path, {FSAccessor::Type::tRegular, false, 0, 0 }); - } + void createRegularFile(const Path & path) override + { + createMember(path, {FSAccessor::Type::tRegular, false, 0, 0}); + } - void isExecutable() override - { - parents.top()->isExecutable = true; - } + void isExecutable() override + { + parents.top()->isExecutable = true; + } - void preallocateContents(unsigned long long size) override - { - currentStart = string(s, pos, 16); - assert(size <= std::numeric_limits::max()); - parents.top()->size = (size_t)size; - parents.top()->start = pos; - } + void preallocateContents(unsigned long long size) override + { + currentStart = string(s, pos, 16); + assert(size <= std::numeric_limits::max()); + parents.top()->size = (size_t)size; + parents.top()->start = pos; + } - void receiveContents(unsigned char * data, unsigned int len) override - { - // Sanity check - if (!currentStart.empty()) { - assert(len < 16 || currentStart == string((char *) data, 16)); - currentStart.clear(); + void receiveContents(unsigned char * data, unsigned int len) override + { + // Sanity check + if (!currentStart.empty()) { + assert(len < 16 || currentStart == string((char *) data, 16)); + currentStart.clear(); + } + } + + void createSymlink(const Path & path, const string & target) override + { + createMember(path, + NarMember{FSAccessor::Type::tSymlink, false, 0, 0, target}); } + }; + + NarAccessor(ref nar) : nar(nar) + { + NarIndexer indexer(*this, *nar); + parseDump(indexer, indexer); } - void createSymlink(const Path & path, const string & target) override + NarAccessor(const std::string & listing, GetNarBytes getNarBytes) + : getNarBytes(getNarBytes) { - createMember(path, - NarMember{FSAccessor::Type::tSymlink, false, 0, 0, target}); + using json = nlohmann::json; + + std::function recurse; + + recurse = [&](NarMember & member, json & v) { + std::string type = v["type"]; + + if (type == "directory") { + member.type = FSAccessor::Type::tDirectory; + for (auto i = v["entries"].begin(); i != v["entries"].end(); ++i) { + std::string name = i.key(); + recurse(member.children[name], i.value()); + } + } else if (type == "regular") { + member.type = FSAccessor::Type::tRegular; + member.size = v["size"]; + member.isExecutable = v.value("executable", false); + member.start = v["narOffset"]; + } else if (type == "symlink") { + member.type = FSAccessor::Type::tSymlink; + member.target = v.value("target", ""); + } else return; + }; + + json v = json::parse(listing); + recurse(root, v); } - NarMember* find(const Path & path) + NarMember * find(const Path & path) { Path canon = path == "" ? "" : canonPath(path); - NarMember* current = &root; + NarMember * current = &root; auto end = path.end(); - for(auto it = path.begin(); it != end; ) { + for (auto it = path.begin(); it != end; ) { // because it != end, the remaining component is non-empty so we need // a directory - if(current->type != FSAccessor::Type::tDirectory) return nullptr; + if (current->type != FSAccessor::Type::tDirectory) return nullptr; // skip slash (canonPath above ensures that this is always a slash) assert(*it == '/'); @@ -109,7 +155,7 @@ struct NarIndexer : ParseSink, StringSource // lookup current component auto next = std::find(it, end, '/'); auto child = current->children.find(std::string(it, next)); - if(child == current->children.end()) return nullptr; + if (child == current->children.end()) return nullptr; current = &child->second; it = next; @@ -118,28 +164,16 @@ struct NarIndexer : ParseSink, StringSource return current; } - NarMember& at(const Path & path) { + NarMember & get(const Path & path) { auto result = find(path); - if(result == nullptr) { - throw Error(format("NAR file does not contain path '%1%'") % path); - } + if (result == nullptr) + throw Error("NAR file does not contain path '%1%'", path); return *result; } -}; - -struct NarAccessor : public FSAccessor -{ - ref nar; - NarIndexer indexer; - - NarAccessor(ref nar) : nar(nar), indexer(*nar) - { - parseDump(indexer, indexer); - } Stat stat(const Path & path) override { - auto i = indexer.find(path); + auto i = find(path); if (i == nullptr) return {FSAccessor::Type::tMissing, 0, false}; return {i->type, i->size, i->isExecutable, i->start}; @@ -147,30 +181,33 @@ struct NarAccessor : public FSAccessor StringSet readDirectory(const Path & path) override { - auto i = indexer.at(path); + auto i = get(path); if (i.type != FSAccessor::Type::tDirectory) throw Error(format("path '%1%' inside NAR file is not a directory") % path); StringSet res; - for(auto&& child : i.children) { + for (auto & child : i.children) res.insert(child.first); - } return res; } std::string readFile(const Path & path) override { - auto i = indexer.at(path); + auto i = get(path); if (i.type != FSAccessor::Type::tRegular) throw Error(format("path '%1%' inside NAR file is not a regular file") % path); + + if (getNarBytes) return getNarBytes(i.start, i.size); + + assert(nar); return std::string(*nar, i.start, i.size); } std::string readLink(const Path & path) override { - auto i = indexer.at(path); + auto i = get(path); if (i.type != FSAccessor::Type::tSymlink) throw Error(format("path '%1%' inside NAR file is not a symlink") % path); return i.target; @@ -182,6 +219,12 @@ ref makeNarAccessor(ref nar) return make_ref(nar); } +ref makeLazyNarAccessor(const std::string & listing, + GetNarBytes getNarBytes) +{ + return make_ref(listing, getNarBytes); +} + void listNar(JSONPlaceholder & res, ref accessor, const Path & path, bool recurse) { diff --git a/src/libstore/nar-accessor.hh b/src/libstore/nar-accessor.hh index ed8fe15cad23..1903355a236e 100644 --- a/src/libstore/nar-accessor.hh +++ b/src/libstore/nar-accessor.hh @@ -8,6 +8,16 @@ namespace nix { file. */ ref makeNarAccessor(ref nar); +/* Create a NAR accessor from a NAR listing (in the format produced by + listNar()). The callback getNarBytes(offset, length) is used by the + readFile() method of the accessor to get the contents of files + inside the NAR. */ +typedef std::function GetNarBytes; + +ref makeLazyNarAccessor( + const std::string & listing, + GetNarBytes getNarBytes); + class JSONPlaceholder; /* Write a JSON representation of the contents of a NAR (except file diff --git a/src/libstore/remote-fs-accessor.cc b/src/libstore/remote-fs-accessor.cc index ba9620a175bb..5233fb2c239b 100644 --- a/src/libstore/remote-fs-accessor.cc +++ b/src/libstore/remote-fs-accessor.cc @@ -1,5 +1,10 @@ #include "remote-fs-accessor.hh" #include "nar-accessor.hh" +#include "json.hh" + +#include +#include +#include namespace nix { @@ -11,20 +16,30 @@ RemoteFSAccessor::RemoteFSAccessor(ref store, const Path & cacheDir) createDirs(cacheDir); } -Path RemoteFSAccessor::makeCacheFile(const Path & storePath) +Path RemoteFSAccessor::makeCacheFile(const Path & storePath, const std::string & ext) { assert(cacheDir != ""); - return fmt("%s/%s.nar", cacheDir, storePathToHash(storePath)); + return fmt("%s/%s.%s", cacheDir, storePathToHash(storePath), ext); } -void RemoteFSAccessor::addToCache(const Path & storePath, const std::string & nar) +void RemoteFSAccessor::addToCache(const Path & storePath, const std::string & nar, + ref narAccessor) { - try { - if (cacheDir == "") return; - /* FIXME: do this asynchronously. */ - writeFile(makeCacheFile(storePath), nar); - } catch (...) { - ignoreException(); + nars.emplace(storePath, narAccessor); + + if (cacheDir != "") { + try { + std::ostringstream str; + JSONPlaceholder jsonRoot(str); + listNar(jsonRoot, narAccessor, "", true); + writeFile(makeCacheFile(storePath, "ls"), str.str()); + + /* FIXME: do this asynchronously. */ + writeFile(makeCacheFile(storePath, "nar"), nar); + + } catch (...) { + ignoreException(); + } } } @@ -42,20 +57,49 @@ std::pair, Path> RemoteFSAccessor::fetch(const Path & path_) if (i != nars.end()) return {i->second, restPath}; StringSink sink; + std::string listing; + Path cacheFile; + + if (cacheDir != "" && pathExists(cacheFile = makeCacheFile(storePath, "nar"))) { + + try { + listing = nix::readFile(makeCacheFile(storePath, "ls")); + + auto narAccessor = makeLazyNarAccessor(listing, + [cacheFile](uint64_t offset, uint64_t length) { + + AutoCloseFD fd = open(cacheFile.c_str(), O_RDONLY | O_CLOEXEC); + if (!fd) + throw SysError("opening NAR cache file '%s'", cacheFile); + + if (lseek(fd.get(), offset, SEEK_SET) != (off_t) offset) + throw SysError("seeking in '%s'", cacheFile); + + std::string buf(length, 0); + readFull(fd.get(), (unsigned char *) buf.data(), length); + + return buf; + }); + + nars.emplace(storePath, narAccessor); + return {narAccessor, restPath}; + + } catch (SysError &) { } + + try { + *sink.s = nix::readFile(cacheFile); - try { - if (cacheDir != "") - *sink.s = nix::readFile(makeCacheFile(storePath)); - } catch (SysError &) { } + auto narAccessor = makeNarAccessor(sink.s); + nars.emplace(storePath, narAccessor); + return {narAccessor, restPath}; - if (sink.s->empty()) { - store->narFromPath(storePath, sink); - addToCache(storePath, *sink.s); + } catch (SysError &) { } } - auto accessor = makeNarAccessor(sink.s); - nars.emplace(storePath, accessor); - return {accessor, restPath}; + store->narFromPath(storePath, sink); + auto narAccessor = makeNarAccessor(sink.s); + addToCache(storePath, *sink.s, narAccessor); + return {narAccessor, restPath}; } FSAccessor::Stat RemoteFSAccessor::stat(const Path & path) diff --git a/src/libstore/remote-fs-accessor.hh b/src/libstore/remote-fs-accessor.hh index 2a3fc01eff58..4afb3be95736 100644 --- a/src/libstore/remote-fs-accessor.hh +++ b/src/libstore/remote-fs-accessor.hh @@ -18,9 +18,10 @@ class RemoteFSAccessor : public FSAccessor friend class BinaryCacheStore; - Path makeCacheFile(const Path & storePath); + Path makeCacheFile(const Path & storePath, const std::string & ext); - void addToCache(const Path & storePath, const std::string & nar); + void addToCache(const Path & storePath, const std::string & nar, + ref narAccessor); public: -- cgit 1.4.1