From 08ec757726e5ef47e71bf16ed0b252b288bcf0f3 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 27 Mar 2018 23:12:31 +0200 Subject: Make LocalBinaryCacheStore::narFromPath() run in constant memory This reduces memory consumption of nix copy --from file://... --to ~/my-nix /nix/store/95cwv4q54dc6giaqv6q6p4r02ia2km35-blender-2.79 from 514 MiB to 18 MiB for an uncompressed binary cache, and from 192 MiB to 53 MiB for a bzipped binary cache. It may also be faster because fetching can happen concurrently with decompression/writing. Continuation of 48662d151bdf4a38670897beacea9d1bd750376a. Issue https://github.com/NixOS/nix/issues/1681. --- src/libstore/binary-cache-store.cc | 36 ++++++++++++++++++++++++-------- src/libstore/binary-cache-store.hh | 14 ++++++++++--- src/libstore/local-binary-cache-store.cc | 11 +++++----- src/libstore/s3-binary-cache-store.cc | 24 ++++++++++----------- src/libutil/util.cc | 10 ++++++++- src/libutil/util.hh | 1 + 6 files changed, 65 insertions(+), 31 deletions(-) diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index 45be490765a4..11fa3cae27a5 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -54,7 +54,15 @@ void BinaryCacheStore::init() } } -std::shared_ptr BinaryCacheStore::getFile(const std::string & path) +void BinaryCacheStore::getFile(const std::string & path, + Callback> callback) +{ + try { + callback(getFile(path)); + } catch (...) { callback.rethrow(); } +} + +void BinaryCacheStore::getFile(const std::string & path, Sink & sink) { std::promise> promise; getFile(path, @@ -65,7 +73,19 @@ std::shared_ptr BinaryCacheStore::getFile(const std::string & path) promise.set_exception(std::current_exception()); } }}); - return promise.get_future().get(); + auto data = promise.get_future().get(); + sink((unsigned char *) data->data(), data->size()); +} + +std::shared_ptr BinaryCacheStore::getFile(const std::string & path) +{ + StringSink sink; + try { + getFile(path, sink); + } catch (NoSuchBinaryCacheFile &) { + return nullptr; + } + return sink.s; } Path BinaryCacheStore::narInfoFileFor(const Path & storePath) @@ -197,23 +217,21 @@ void BinaryCacheStore::narFromPath(const Path & storePath, Sink & sink) { auto info = queryPathInfo(storePath).cast(); - auto nar = getFile(info->url); - - if (!nar) throw Error(format("file '%s' missing from binary cache") % info->url); + auto source = sinkToSource([this, url{info->url}](Sink & sink) { + getFile(url, sink); + }); stats.narRead++; - stats.narReadCompressedBytes += nar->size(); + //stats.narReadCompressedBytes += nar->size(); // FIXME uint64_t narSize = 0; - StringSource source(*nar); - LambdaSink wrapperSink([&](const unsigned char * data, size_t len) { sink(data, len); narSize += len; }); - decompress(info->compression, source, wrapperSink); + decompress(info->compression, *source, wrapperSink); stats.narReadBytes += narSize; } diff --git a/src/libstore/binary-cache-store.hh b/src/libstore/binary-cache-store.hh index fcde666beec6..6bc83fc50ca1 100644 --- a/src/libstore/binary-cache-store.hh +++ b/src/libstore/binary-cache-store.hh @@ -38,10 +38,16 @@ public: const std::string & data, const std::string & mimeType) = 0; - /* Return the contents of the specified file, or null if it - doesn't exist. */ + /* Note: subclasses must implement at least one of the two + following getFile() methods. */ + + /* Dump the contents of the specified file to a sink. */ + virtual void getFile(const std::string & path, Sink & sink); + + /* Fetch the specified file and call the specified callback with + the result. A subclass may implement this asynchronously. */ virtual void getFile(const std::string & path, - Callback> callback) = 0; + Callback> callback); std::shared_ptr getFile(const std::string & path); @@ -129,4 +135,6 @@ public: }; +MakeError(NoSuchBinaryCacheFile, Error); + } diff --git a/src/libstore/local-binary-cache-store.cc b/src/libstore/local-binary-cache-store.cc index ae0ffa6a56ef..b7001795be4d 100644 --- a/src/libstore/local-binary-cache-store.cc +++ b/src/libstore/local-binary-cache-store.cc @@ -34,15 +34,14 @@ protected: const std::string & data, const std::string & mimeType) override; - void getFile(const std::string & path, - Callback> callback) override + void getFile(const std::string & path, Sink & sink) override { try { - // FIXME: O(n) space - callback(std::make_shared(readFile(binaryCacheDir + "/" + path))); + readFile(binaryCacheDir + "/" + path, sink); } catch (SysError & e) { - if (e.errNo == ENOENT) callback(nullptr); else callback.rethrow(); - } catch (...) { callback.rethrow(); } + if (e.errNo == ENOENT) + throw NoSuchBinaryCacheFile("file '%s' does not exist in binary cache", path); + } } PathSet queryAllValidPaths() override diff --git a/src/libstore/s3-binary-cache-store.cc b/src/libstore/s3-binary-cache-store.cc index f2e8efc16e60..239739bae832 100644 --- a/src/libstore/s3-binary-cache-store.cc +++ b/src/libstore/s3-binary-cache-store.cc @@ -364,23 +364,23 @@ struct S3BinaryCacheStoreImpl : public S3BinaryCacheStore uploadFile(path, data, mimeType, ""); } - void getFile(const std::string & path, - Callback> callback) override + void getFile(const std::string & path, Sink & sink) override { - try { - stats.get++; + stats.get++; - auto res = s3Helper.getObject(bucketName, path); + // FIXME: stream output to sink. + auto res = s3Helper.getObject(bucketName, path); - stats.getBytes += res.data ? res.data->size() : 0; - stats.getTimeMs += res.durationMs; + stats.getBytes += res.data ? res.data->size() : 0; + stats.getTimeMs += res.durationMs; - if (res.data) - printTalkative("downloaded 's3://%s/%s' (%d bytes) in %d ms", - bucketName, path, res.data->size(), res.durationMs); + if (res.data) { + printTalkative("downloaded 's3://%s/%s' (%d bytes) in %d ms", + bucketName, path, res.data->size(), res.durationMs); - callback(std::move(res.data)); - } catch (...) { callback.rethrow(); } + sink((unsigned char *) res.data->data(), res.data->size()); + } else + throw NoSuchBinaryCacheFile("file '%s' does not exist in binary cache '%s'", path, getUri()); } PathSet queryAllValidPaths() override diff --git a/src/libutil/util.cc b/src/libutil/util.cc index 15962236ec65..22753f4b7eca 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -311,6 +311,14 @@ string readFile(const Path & path, bool drain) } +void readFile(const Path & path, Sink & sink) +{ + AutoCloseFD fd = open(path.c_str(), O_RDONLY | O_CLOEXEC); + if (!fd) throw SysError("opening file '%s'", path); + drainFD(fd.get(), sink); +} + + void writeFile(const Path & path, const string & s, mode_t mode) { AutoCloseFD fd = open(path.c_str(), O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC, mode); @@ -593,7 +601,7 @@ void drainFD(int fd, Sink & sink, bool block) throw SysError("making file descriptor non-blocking"); } - std::vector buf(4096); + std::vector buf(64 * 1024); while (1) { checkInterrupt(); ssize_t rd = read(fd, buf.data(), buf.size()); diff --git a/src/libutil/util.hh b/src/libutil/util.hh index 215c7cecafef..af97b76d90bc 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -98,6 +98,7 @@ unsigned char getFileType(const Path & path); /* Read the contents of a file into a string. */ string readFile(int fd); string readFile(const Path & path, bool drain = false); +void readFile(const Path & path, Sink & sink); /* Write a string to a file. */ void writeFile(const Path & path, const string & s, mode_t mode = 0666); -- cgit 1.4.1