From 75a1d9849d7355c227ce76be17809a71852956b3 Mon Sep 17 00:00:00 2001 From: Benno Fünfstück Date: Mon, 15 May 2017 10:17:53 +0200 Subject: nar-accessor: use tree, fixes readDirectory missing children Previously, if a directory `foo` existed and a file `foo-` (where `-` is any character that is sorted before `/`), then `readDirectory` would return an empty list. To fix this, we now use a tree where we can just access the children of the node, and do not need to rely on sorting behavior to list the contents of a directory. --- src/libstore/nar-accessor.cc | 109 ++++++++++++++++++++++++++++++------------- 1 file changed, 76 insertions(+), 33 deletions(-) diff --git a/src/libstore/nar-accessor.cc b/src/libstore/nar-accessor.cc index 4cb5de7449ea..ee1cf385c084 100644 --- a/src/libstore/nar-accessor.cc +++ b/src/libstore/nar-accessor.cc @@ -2,6 +2,8 @@ #include "archive.hh" #include +#include +#include namespace nix { @@ -16,16 +18,36 @@ struct NarMember size_t start, size; std::string target; + + /* If this is a directory, all the children of the directory. */ + std::map children; + + NarMember* find(const Path & path) + { + if(path == "") return this; + + if(type != FSAccessor::Type::tDirectory) { + return nullptr; + } + + auto split = std::find(path.begin() + 1, path.end(), '/'); + std::string child_name(path.begin() + 1, split); + std::string remaining(split, path.end()); + + auto child = children.find(child_name); + if(child == children.end()) return nullptr; + + return child->second.find(remaining); + } + }; struct NarIndexer : ParseSink, StringSource { - // FIXME: should store this as a tree. Now we're vulnerable to - // O(nm) memory consumption (e.g. for x_0/.../x_n/{y_0..y_m}). - typedef std::map Members; - Members members; + NarMember root; + std::stack parents; - Path currentPath; + std::string currentName; std::string currentStart; bool isExec = false; @@ -33,28 +55,45 @@ struct NarIndexer : ParseSink, StringSource { } + void createMember(const Path & path, NarMember member) { + size_t level = std::count(path.begin(), path.end(), '/'); + while(parents.size() > level) { + parents.pop(); + } + + if(parents.empty()) { + root = std::move(member); + parents.push(&root); + } else { + if(parents.top()->type != FSAccessor::Type::tDirectory) { + throw Error(format("NAR file missing parent directory of path ‘%1%’") % path); + } + auto result = parents.top()->children.emplace(baseNameOf(path), std::move(member)); + parents.push(&result.first->second); + } + } + void createDirectory(const Path & path) override { - members.emplace(path, - NarMember{FSAccessor::Type::tDirectory, false, 0, 0}); + createMember(path, {FSAccessor::Type::tDirectory, false, 0, 0 }); } void createRegularFile(const Path & path) override { - currentPath = path; + createMember(path, {FSAccessor::Type::tRegular, false, 0, 0 }); } void isExecutable() override { - isExec = true; + parents.top()->isExecutable = true; } void preallocateContents(unsigned long long size) override { currentStart = string(s, pos, 16); assert(size <= std::numeric_limits::max()); - members.emplace(currentPath, - NarMember{FSAccessor::Type::tRegular, isExec, pos, (size_t) size}); + parents.top()->size = (size_t)size; + parents.top()->start = pos; } void receiveContents(unsigned char * data, unsigned int len) override @@ -68,16 +107,23 @@ struct NarIndexer : ParseSink, StringSource void createSymlink(const Path & path, const string & target) override { - members.emplace(path, + createMember(path, NarMember{FSAccessor::Type::tSymlink, false, 0, 0, target}); } - Members::iterator find(const Path & path) + NarMember* find(const Path & path) { - auto i = members.find(path); - if (i == members.end()) + Path canon = path == "" ? "" : canonPath(path); + NarMember* result = root.find(canon); + return result; + } + + NarMember& at(const Path & path) { + auto result = find(path); + if(result == nullptr) { throw Error(format("NAR file does not contain path ‘%1%’") % path); - return i; + } + return *result; } }; @@ -93,44 +139,41 @@ struct NarAccessor : public FSAccessor Stat stat(const Path & path) override { - auto i = indexer.members.find(path); - if (i == indexer.members.end()) + auto i = indexer.find(path); + if (i == nullptr) return {FSAccessor::Type::tMissing, 0, false}; - return {i->second.type, i->second.size, i->second.isExecutable}; + return {i->type, i->size, i->isExecutable}; } StringSet readDirectory(const Path & path) override { - auto i = indexer.find(path); + auto i = indexer.at(path); - if (i->second.type != FSAccessor::Type::tDirectory) + if (i.type != FSAccessor::Type::tDirectory) throw Error(format("path ‘%1%’ inside NAR file is not a directory") % path); - ++i; StringSet res; - while (i != indexer.members.end() && isInDir(i->first, path)) { - // FIXME: really bad performance. - if (i->first.find('/', path.size() + 1) == std::string::npos) - res.insert(std::string(i->first, path.size() + 1)); - ++i; + for(auto&& child : i.children) { + res.insert(child.first); + } return res; } std::string readFile(const Path & path) override { - auto i = indexer.find(path); - if (i->second.type != FSAccessor::Type::tRegular) + auto i = indexer.at(path); + if (i.type != FSAccessor::Type::tRegular) throw Error(format("path ‘%1%’ inside NAR file is not a regular file") % path); - return std::string(*nar, i->second.start, i->second.size); + return std::string(*nar, i.start, i.size); } std::string readLink(const Path & path) override { - auto i = indexer.find(path); - if (i->second.type != FSAccessor::Type::tSymlink) + auto i = indexer.at(path); + if (i.type != FSAccessor::Type::tSymlink) throw Error(format("path ‘%1%’ inside NAR file is not a symlink") % path); - return i->second.target; + return i.target; } }; -- cgit 1.4.1 From 4412f7c08367b17b3be723ee42df159100d93922 Mon Sep 17 00:00:00 2001 From: Benno Fünfstück Date: Mon, 15 May 2017 12:23:21 +0200 Subject: nar-archive.cc: add tests for the nar index --- tests/local.mk | 3 ++- tests/nar-index.nix | 23 +++++++++++++++++++++++ tests/nar-index.sh | 23 +++++++++++++++++++++++ 3 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 tests/nar-index.nix create mode 100644 tests/nar-index.sh diff --git a/tests/local.mk b/tests/local.mk index 108e3febdb0c..7d99a0fc7675 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -13,7 +13,8 @@ nix_tests = \ check-reqs.sh pass-as-file.sh tarball.sh restricted.sh \ placeholders.sh nix-shell.sh \ linux-sandbox.sh \ - build-remote.sh + build-remote.sh \ + nar-index.sh # parallel.sh install-tests += $(foreach x, $(nix_tests), tests/$(x)) diff --git a/tests/nar-index.nix b/tests/nar-index.nix new file mode 100644 index 000000000000..0e2a7f721135 --- /dev/null +++ b/tests/nar-index.nix @@ -0,0 +1,23 @@ +with import ./config.nix; + +rec { + a = mkDerivation { + name = "nar-index-a"; + builder = builtins.toFile "builder.sh" + '' + mkdir $out + mkdir $out/foo + touch $out/foo-x + touch $out/foo/bar + touch $out/foo/baz + touch $out/qux + mkdir $out/zyx + + cat >$out/foo/data < $narFile + +echo "check that find and ls-nar match" +( cd $storePath; find . | sort ) > files.find +nix ls-nar -R -d $narFile "" | sort > files.ls-nar +diff -u files.find files.ls-nar + +echo "check that file contents of data match" +nix cat-nar $narFile /foo/data > data.cat-nar +diff -u data.cat-nar $storePath/foo/data + +echo "check that file contents of baz match" +nix cat-nar $narFile /foo/baz > baz.cat-nar +diff -u baz.cat-nar $storePath/foo/baz \ No newline at end of file -- cgit 1.4.1 From 5ee06e612a93a30bfa3b2129a3951e0c36f95602 Mon Sep 17 00:00:00 2001 From: Benno Fünfstück Date: Mon, 15 May 2017 19:32:51 +0200 Subject: nar-accessor: non-recursive NarMember::find This avoids a possible stack overflow if directories are very deeply nested. --- src/libstore/nar-accessor.cc | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/src/libstore/nar-accessor.cc b/src/libstore/nar-accessor.cc index ee1cf385c084..c84bb1dea321 100644 --- a/src/libstore/nar-accessor.cc +++ b/src/libstore/nar-accessor.cc @@ -21,25 +21,6 @@ struct NarMember /* If this is a directory, all the children of the directory. */ std::map children; - - NarMember* find(const Path & path) - { - if(path == "") return this; - - if(type != FSAccessor::Type::tDirectory) { - return nullptr; - } - - auto split = std::find(path.begin() + 1, path.end(), '/'); - std::string child_name(path.begin() + 1, split); - std::string remaining(split, path.end()); - - auto child = children.find(child_name); - if(child == children.end()) return nullptr; - - return child->second.find(remaining); - } - }; struct NarIndexer : ParseSink, StringSource @@ -114,8 +95,27 @@ struct NarIndexer : ParseSink, StringSource NarMember* find(const Path & path) { Path canon = path == "" ? "" : canonPath(path); - NarMember* result = root.find(canon); - return result; + NarMember* current = &root; + auto end = path.end(); + for(auto it = path.begin(); it != end; ) { + // because it != end, the remaining component is non-empty so we need + // a directory + if(current->type != FSAccessor::Type::tDirectory) return nullptr; + + // skip slash (canonPath above ensures that this is always a slash) + assert(*it == '/'); + it += 1; + + // lookup current component + auto next = std::find(it, end, '/'); + auto child = current->children.find(std::string(it, next)); + if(child == current->children.end()) return nullptr; + current = &child->second; + + it = next; + } + + return current; } NarMember& at(const Path & path) { -- cgit 1.4.1 From a1f428b13bd003caaf3a1d1da6e934d52b6ea6dc Mon Sep 17 00:00:00 2001 From: Benno Fünfstück Date: Mon, 15 May 2017 19:35:36 +0200 Subject: nar-accessor.cc: remove unused member NarIndexer::currentName --- src/libstore/nar-accessor.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/libstore/nar-accessor.cc b/src/libstore/nar-accessor.cc index c84bb1dea321..82595e76a9b5 100644 --- a/src/libstore/nar-accessor.cc +++ b/src/libstore/nar-accessor.cc @@ -28,7 +28,6 @@ struct NarIndexer : ParseSink, StringSource NarMember root; std::stack parents; - std::string currentName; std::string currentStart; bool isExec = false; @@ -56,7 +55,7 @@ struct NarIndexer : ParseSink, StringSource void createDirectory(const Path & path) override { - createMember(path, {FSAccessor::Type::tDirectory, false, 0, 0 }); + createMember(path, {FSAccessor::Type::tDirectory, false, 0, 0 }); } void createRegularFile(const Path & path) override -- cgit 1.4.1