about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEelco Dolstra <edolstra@gmail.com>2018-08-06T13·40+0200
committerEelco Dolstra <edolstra@gmail.com>2018-08-06T13·40+0200
commitd3761f5f8bce1e4c8dcfdff3fa77c173157c0346 (patch)
tree1dae65410f1977d58ad7aa7741121cbe74c33da3
parentfa4def3d4675c8b2d6aacb56959dbbf9e52df66a (diff)
Fix Brotli decompression in 'nix log'
This didn't work anymore since decompression was only done in the
non-coroutine case.

Decompressors are now sinks, just like compressors.

Also fixed a bug in bzip2 API handling (we have to handle BZ_RUN_OK
rather than BZ_OK), which we didn't notice because there was a missing
'throw':

  if (ret != BZ_OK)
      CompressionError("error while compressing bzip2 file");
-rw-r--r--src/libstore/binary-cache-store.cc23
-rw-r--r--src/libstore/builtins/fetchurl.cc21
-rw-r--r--src/libstore/download.cc68
-rw-r--r--src/libstore/download.hh3
-rw-r--r--src/libstore/s3-binary-cache-store.cc6
-rw-r--r--src/libutil/compression.cc557
-rw-r--r--src/libutil/compression.hh12
7 files changed, 309 insertions, 381 deletions
diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc
index 76c0a1a891b8..9c75c85993f9 100644
--- a/src/libstore/binary-cache-store.cc
+++ b/src/libstore/binary-cache-store.cc
@@ -217,17 +217,6 @@ void BinaryCacheStore::narFromPath(const Path & storePath, Sink & sink)
 {
     auto info = queryPathInfo(storePath).cast<const NarInfo>();
 
-    auto source = sinkToSource([this, url{info->url}](Sink & sink) {
-        try {
-            getFile(url, sink);
-        } catch (NoSuchBinaryCacheFile & e) {
-            throw SubstituteGone(e.what());
-        }
-    });
-
-    stats.narRead++;
-    //stats.narReadCompressedBytes += nar->size(); // FIXME
-
     uint64_t narSize = 0;
 
     LambdaSink wrapperSink([&](const unsigned char * data, size_t len) {
@@ -235,8 +224,18 @@ void BinaryCacheStore::narFromPath(const Path & storePath, Sink & sink)
         narSize += len;
     });
 
-    decompress(info->compression, *source, wrapperSink);
+    auto decompressor = makeDecompressionSink(info->compression, wrapperSink);
 
+    try {
+        getFile(info->url, *decompressor);
+    } catch (NoSuchBinaryCacheFile & e) {
+        throw SubstituteGone(e.what());
+    }
+
+    decompressor->flush();
+
+    stats.narRead++;
+    //stats.narReadCompressedBytes += nar->size(); // FIXME
     stats.narReadBytes += narSize;
 }
 
diff --git a/src/libstore/builtins/fetchurl.cc b/src/libstore/builtins/fetchurl.cc
index 1f4abd374f54..b4dcb35f951a 100644
--- a/src/libstore/builtins/fetchurl.cc
+++ b/src/libstore/builtins/fetchurl.cc
@@ -39,21 +39,16 @@ void builtinFetchurl(const BasicDerivation & drv, const std::string & netrcData)
             request.verifyTLS = false;
             request.decompress = false;
 
-            downloader->download(std::move(request), sink);
+            auto decompressor = makeDecompressionSink(
+                hasSuffix(mainUrl, ".xz") ? "xz" : "none", sink);
+            downloader->download(std::move(request), *decompressor);
+            decompressor->finish();
         });
 
-        if (get(drv.env, "unpack", "") == "1") {
-
-            if (hasSuffix(mainUrl, ".xz")) {
-                auto source2 = sinkToSource([&](Sink & sink) {
-                    decompress("xz", *source, sink);
-                });
-                restorePath(storePath, *source2);
-            } else
-                restorePath(storePath, *source);
-
-        } else
-              writeFile(storePath, *source);
+        if (get(drv.env, "unpack", "") == "1")
+            restorePath(storePath, *source);
+        else
+            writeFile(storePath, *source);
 
         auto executable = drv.env.find("executable");
         if (executable != drv.env.end() && executable->second == "1") {
diff --git a/src/libstore/download.cc b/src/libstore/download.cc
index f0ea1995ae73..973fca0b130f 100644
--- a/src/libstore/download.cc
+++ b/src/libstore/download.cc
@@ -58,16 +58,6 @@ std::string resolveUri(const std::string & uri)
         return uri;
 }
 
-ref<std::string> decodeContent(const std::string & encoding, ref<std::string> data)
-{
-    if (encoding == "")
-        return data;
-    else if (encoding == "br")
-        return decompress(encoding, *data);
-    else
-        throw Error("unsupported Content-Encoding '%s'", encoding);
-}
-
 struct CurlDownloader : public Downloader
 {
     CURLM * curlm = 0;
@@ -106,6 +96,12 @@ struct CurlDownloader : public Downloader
                 fmt(request.data ? "uploading '%s'" : "downloading '%s'", request.uri),
                 {request.uri}, request.parentAct)
             , callback(callback)
+            , finalSink([this](const unsigned char * data, size_t len) {
+                if (this->request.dataCallback)
+                    this->request.dataCallback((char *) data, len);
+                else
+                    this->result.data->append((char *) data, len);
+              })
         {
             if (!request.expectedETag.empty())
                 requestHeaders = curl_slist_append(requestHeaders, ("If-None-Match: " + request.expectedETag).c_str());
@@ -129,23 +125,40 @@ struct CurlDownloader : public Downloader
             }
         }
 
-        template<class T>
-        void fail(const T & e)
+        void failEx(std::exception_ptr ex)
         {
             assert(!done);
             done = true;
-            callback.rethrow(std::make_exception_ptr(e));
+            callback.rethrow(ex);
         }
 
+        template<class T>
+        void fail(const T & e)
+        {
+            failEx(std::make_exception_ptr(e));
+        }
+
+        LambdaSink finalSink;
+        std::shared_ptr<CompressionSink> decompressionSink;
+
+        std::exception_ptr writeException;
+
         size_t writeCallback(void * contents, size_t size, size_t nmemb)
         {
-            size_t realSize = size * nmemb;
-            result.bodySize += realSize;
-            if (request.dataCallback)
-                request.dataCallback((char *) contents, realSize);
-            else
-                result.data->append((char *) contents, realSize);
-            return realSize;
+            try {
+                size_t realSize = size * nmemb;
+                result.bodySize += realSize;
+
+                if (!decompressionSink)
+                    decompressionSink = makeDecompressionSink(encoding, finalSink);
+
+                (*decompressionSink)((unsigned char *) contents, realSize);
+
+                return realSize;
+            } catch (...) {
+                writeException = std::current_exception();
+                return 0;
+            }
         }
 
         static size_t writeCallbackWrapper(void * contents, size_t size, size_t nmemb, void * userp)
@@ -314,27 +327,33 @@ struct CurlDownloader : public Downloader
             debug("finished %s of '%s'; curl status = %d, HTTP status = %d, body = %d bytes",
                 request.verb(), request.uri, code, httpStatus, result.bodySize);
 
+            if (decompressionSink)
+                decompressionSink->finish();
+
             if (code == CURLE_WRITE_ERROR && result.etag == request.expectedETag) {
                 code = CURLE_OK;
                 httpStatus = 304;
             }
 
-            if (code == CURLE_OK &&
+            if (writeException)
+                failEx(writeException);
+
+            else if (code == CURLE_OK &&
                 (httpStatus == 200 || httpStatus == 201 || httpStatus == 204 || httpStatus == 304 || httpStatus == 226 /* FTP */ || httpStatus == 0 /* other protocol */))
             {
                 result.cached = httpStatus == 304;
                 done = true;
 
                 try {
-                    if (request.decompress)
-                        result.data = decodeContent(encoding, ref<std::string>(result.data));
                     act.progress(result.data->size(), result.data->size());
                     callback(std::move(result));
                 } catch (...) {
                     done = true;
                     callback.rethrow();
                 }
-            } else {
+            }
+
+            else {
                 // We treat most errors as transient, but won't retry when hopeless
                 Error err = Transient;
 
@@ -369,6 +388,7 @@ struct CurlDownloader : public Downloader
                         case CURLE_UNKNOWN_OPTION:
                         case CURLE_SSL_CACERT_BADFILE:
                         case CURLE_TOO_MANY_REDIRECTS:
+                        case CURLE_WRITE_ERROR:
                             err = Misc;
                             break;
                         default: // Shut up warnings
diff --git a/src/libstore/download.hh b/src/libstore/download.hh
index ff38a2870cc0..f0228f7d053a 100644
--- a/src/libstore/download.hh
+++ b/src/libstore/download.hh
@@ -88,7 +88,4 @@ public:
 
 bool isUri(const string & s);
 
-/* Decode data according to the Content-Encoding header. */
-ref<std::string> decodeContent(const std::string & encoding, ref<std::string> data);
-
 }
diff --git a/src/libstore/s3-binary-cache-store.cc b/src/libstore/s3-binary-cache-store.cc
index 2f18e3f38c6e..660583d31fe0 100644
--- a/src/libstore/s3-binary-cache-store.cc
+++ b/src/libstore/s3-binary-cache-store.cc
@@ -153,10 +153,8 @@ S3Helper::DownloadResult S3Helper::getObject(
         auto result = checkAws(fmt("AWS error fetching '%s'", key),
             client->GetObject(request));
 
-        res.data = decodeContent(
-            result.GetContentEncoding(),
-            make_ref<std::string>(
-                dynamic_cast<std::stringstream &>(result.GetBody()).str()));
+        res.data = decompress(result.GetContentEncoding(),
+            dynamic_cast<std::stringstream &>(result.GetBody()).str());
 
     } catch (S3Error & e) {
         if (e.err != Aws::S3::S3Errors::NO_SUCH_KEY) throw;
diff --git a/src/libutil/compression.cc b/src/libutil/compression.cc
index 5de85ede1500..53b62f62a76f 100644
--- a/src/libutil/compression.cc
+++ b/src/libutil/compression.cc
@@ -17,228 +17,258 @@ namespace nix {
 
 static const size_t bufSize = 32 * 1024;
 
-static void decompressNone(Source & source, Sink & sink)
+// Don't feed brotli too much at once.
+struct ChunkedCompressionSink : CompressionSink
 {
-    std::vector<unsigned char> buf(bufSize);
-    while (true) {
-        size_t n;
-        try {
-            n = source.read(buf.data(), buf.size());
-        } catch (EndOfFile &) {
-            break;
+    uint8_t outbuf[BUFSIZ];
+
+    void write(const unsigned char * data, size_t len) override
+    {
+        const size_t CHUNK_SIZE = sizeof(outbuf) << 2;
+        while (len) {
+            size_t n = std::min(CHUNK_SIZE, len);
+            writeInternal(data, n);
+            data += n;
+            len -= n;
         }
-        sink(buf.data(), n);
     }
-}
 
-static void decompressXZ(Source & source, Sink & sink)
+    virtual void writeInternal(const unsigned char * data, size_t len) = 0;
+};
+
+struct NoneSink : CompressionSink
 {
-    lzma_stream strm(LZMA_STREAM_INIT);
-
-    lzma_ret ret = lzma_stream_decoder(
-        &strm, UINT64_MAX, LZMA_CONCATENATED);
-    if (ret != LZMA_OK)
-        throw CompressionError("unable to initialise lzma decoder");
-
-    Finally free([&]() { lzma_end(&strm); });
-
-    lzma_action action = LZMA_RUN;
-    std::vector<uint8_t> inbuf(bufSize), outbuf(bufSize);
-    strm.next_in = nullptr;
-    strm.avail_in = 0;
-    strm.next_out = outbuf.data();
-    strm.avail_out = outbuf.size();
-    bool eof = false;
-
-    while (true) {
-        checkInterrupt();
-
-        if (strm.avail_in == 0 && !eof) {
-            strm.next_in = inbuf.data();
-            try {
-                strm.avail_in = source.read((unsigned char *) strm.next_in, inbuf.size());
-            } catch (EndOfFile &) {
-                eof = true;
-            }
-        }
+    Sink & nextSink;
+    NoneSink(Sink & nextSink) : nextSink(nextSink) { }
+    void finish() override { flush(); }
+    void write(const unsigned char * data, size_t len) override { nextSink(data, len); }
+};
 
-        if (strm.avail_in == 0)
-            action = LZMA_FINISH;
+struct XzDecompressionSink : CompressionSink
+{
+    Sink & nextSink;
+    uint8_t outbuf[BUFSIZ];
+    lzma_stream strm = LZMA_STREAM_INIT;
+    bool finished = false;
 
-        lzma_ret ret = lzma_code(&strm, action);
+    XzDecompressionSink(Sink & nextSink) : nextSink(nextSink)
+    {
+        lzma_ret ret = lzma_stream_decoder(
+            &strm, UINT64_MAX, LZMA_CONCATENATED);
+        if (ret != LZMA_OK)
+            throw CompressionError("unable to initialise lzma decoder");
 
-        if (strm.avail_out < outbuf.size()) {
-            sink((unsigned char *) outbuf.data(), outbuf.size() - strm.avail_out);
-            strm.next_out = outbuf.data();
-            strm.avail_out = outbuf.size();
-        }
+        strm.next_out = outbuf;
+        strm.avail_out = sizeof(outbuf);
+    }
 
-        if (ret == LZMA_STREAM_END) return;
+    ~XzDecompressionSink()
+    {
+        lzma_end(&strm);
+    }
 
-        if (ret != LZMA_OK)
-            throw CompressionError("error %d while decompressing xz file", ret);
+    void finish() override
+    {
+        CompressionSink::flush();
+        write(nullptr, 0);
     }
-}
 
-static void decompressBzip2(Source & source, Sink & sink)
-{
-    bz_stream strm;
-    memset(&strm, 0, sizeof(strm));
-
-    int ret = BZ2_bzDecompressInit(&strm, 0, 0);
-    if (ret != BZ_OK)
-        throw CompressionError("unable to initialise bzip2 decoder");
-
-    Finally free([&]() { BZ2_bzDecompressEnd(&strm); });
-
-    std::vector<char> inbuf(bufSize), outbuf(bufSize);
-    strm.next_in = nullptr;
-    strm.avail_in = 0;
-    strm.next_out = outbuf.data();
-    strm.avail_out = outbuf.size();
-    bool eof = false;
-
-    while (true) {
-        checkInterrupt();
-
-        if (strm.avail_in == 0 && !eof) {
-            strm.next_in = inbuf.data();
-            try {
-                strm.avail_in = source.read((unsigned char *) strm.next_in, inbuf.size());
-            } catch (EndOfFile &) {
-                eof = true;
-            }
-        }
+    void write(const unsigned char * data, size_t len) override
+    {
+        strm.next_in = data;
+        strm.avail_in = len;
 
-        int ret = BZ2_bzDecompress(&strm);
+        while (!finished && (!data || strm.avail_in)) {
+            checkInterrupt();
 
-        if (strm.avail_in == 0 && strm.avail_out == outbuf.size() && eof)
-            throw CompressionError("bzip2 data ends prematurely");
+            lzma_ret ret = lzma_code(&strm, data ? LZMA_RUN : LZMA_FINISH);
+            if (ret != LZMA_OK && ret != LZMA_STREAM_END)
+                throw CompressionError("error %d while decompressing xz file", ret);
+
+            finished = ret == LZMA_STREAM_END;
 
-        if (strm.avail_out < outbuf.size()) {
-            sink((unsigned char *) outbuf.data(), outbuf.size() - strm.avail_out);
-            strm.next_out = outbuf.data();
-            strm.avail_out = outbuf.size();
+            if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
+                nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
+                strm.next_out = outbuf;
+                strm.avail_out = sizeof(outbuf);
+            }
         }
+    }
+};
 
-        if (ret == BZ_STREAM_END) return;
+struct BzipDecompressionSink : ChunkedCompressionSink
+{
+    Sink & nextSink;
+    bz_stream strm;
+    bool finished = false;
 
+    BzipDecompressionSink(Sink & nextSink) : nextSink(nextSink)
+    {
+        memset(&strm, 0, sizeof(strm));
+        int ret = BZ2_bzDecompressInit(&strm, 0, 0);
         if (ret != BZ_OK)
-            throw CompressionError("error while decompressing bzip2 file");
+            throw CompressionError("unable to initialise bzip2 decoder");
+
+        strm.next_out = (char *) outbuf;
+        strm.avail_out = sizeof(outbuf);
     }
-}
 
-static void decompressBrotli(Source & source, Sink & sink)
-{
-    auto *s = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
-    if (!s)
-        throw CompressionError("unable to initialize brotli decoder");
-
-    Finally free([s]() { BrotliDecoderDestroyInstance(s); });
-
-    std::vector<uint8_t> inbuf(bufSize), outbuf(bufSize);
-    const uint8_t * next_in = nullptr;
-    size_t avail_in = 0;
-    bool eof = false;
-
-    while (true) {
-        checkInterrupt();
-
-        if (avail_in == 0 && !eof) {
-            next_in = inbuf.data();
-            try {
-                avail_in = source.read((unsigned char *) next_in, inbuf.size());
-            } catch (EndOfFile &) {
-                eof = true;
+    ~BzipDecompressionSink()
+    {
+        BZ2_bzDecompressEnd(&strm);
+    }
+
+    void finish() override
+    {
+        flush();
+        write(nullptr, 0);
+    }
+
+    void writeInternal(const unsigned char * data, size_t len)
+    {
+        assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max());
+
+        strm.next_in = (char *) data;
+        strm.avail_in = len;
+
+        while (strm.avail_in) {
+            checkInterrupt();
+
+            int ret = BZ2_bzDecompress(&strm);
+            if (ret != BZ_OK && ret != BZ_STREAM_END)
+                throw CompressionError("error while decompressing bzip2 file");
+
+            finished = ret == BZ_STREAM_END;
+
+            if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
+                nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
+                strm.next_out = (char *) outbuf;
+                strm.avail_out = sizeof(outbuf);
             }
         }
+    }
+};
 
-        uint8_t * next_out = outbuf.data();
-        size_t avail_out = outbuf.size();
-
-        auto ret = BrotliDecoderDecompressStream(s,
-                &avail_in, &next_in,
-                &avail_out, &next_out,
-                nullptr);
-
-        switch (ret) {
-        case BROTLI_DECODER_RESULT_ERROR:
-            throw CompressionError("error while decompressing brotli file");
-        case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT:
-            if (eof)
-                throw CompressionError("incomplete or corrupt brotli file");
-            break;
-        case BROTLI_DECODER_RESULT_SUCCESS:
-            if (avail_in != 0)
-                throw CompressionError("unexpected input after brotli decompression");
-            break;
-        case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT:
-            // I'm not sure if this can happen, but abort if this happens with empty buffer
-            if (avail_out == outbuf.size())
-                throw CompressionError("brotli decompression requires larger buffer");
-            break;
-        }
+struct BrotliDecompressionSink : ChunkedCompressionSink
+{
+    Sink & nextSink;
+    BrotliDecoderState * state;
+    bool finished = false;
 
-        // Always ensure we have full buffer for next invocation
-        if (avail_out < outbuf.size())
-            sink((unsigned char *) outbuf.data(), outbuf.size() - avail_out);
+    BrotliDecompressionSink(Sink & nextSink) : nextSink(nextSink)
+    {
+        state = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
+        if (!state)
+            throw CompressionError("unable to initialize brotli decoder");
+    }
 
-        if (ret == BROTLI_DECODER_RESULT_SUCCESS) return;
+    ~BrotliDecompressionSink()
+    {
+        BrotliDecoderDestroyInstance(state);
     }
-}
+
+    void finish() override
+    {
+        flush();
+        writeInternal(nullptr, 0);
+    }
+
+    void writeInternal(const unsigned char * data, size_t len)
+    {
+        const uint8_t * next_in = data;
+        size_t avail_in = len;
+        uint8_t * next_out = outbuf;
+        size_t avail_out = sizeof(outbuf);
+
+        while (!finished && (!data || avail_in)) {
+            checkInterrupt();
+
+            if (!BrotliDecoderDecompressStream(state,
+                    &avail_in, &next_in,
+                    &avail_out, &next_out,
+                    nullptr))
+                throw CompressionError("error while decompressing brotli file");
+
+            if (avail_out < sizeof(outbuf) || avail_in == 0) {
+                nextSink(outbuf, sizeof(outbuf) - avail_out);
+                next_out = outbuf;
+                avail_out = sizeof(outbuf);
+            }
+
+            finished = BrotliDecoderIsFinished(state);
+        }
+    }
+};
 
 ref<std::string> decompress(const std::string & method, const std::string & in)
 {
-    StringSource source(in);
-    StringSink sink;
-    decompress(method, source, sink);
-    return sink.s;
+    StringSink ssink;
+    auto sink = makeDecompressionSink(method, ssink);
+    (*sink)(in);
+    sink->finish();
+    return ssink.s;
 }
 
-void decompress(const std::string & method, Source & source, Sink & sink)
+ref<CompressionSink> makeDecompressionSink(const std::string & method, Sink & nextSink)
 {
-    if (method == "none")
-        return decompressNone(source, sink);
+    if (method == "none" || method == "")
+        return make_ref<NoneSink>(nextSink);
     else if (method == "xz")
-        return decompressXZ(source, sink);
+        return make_ref<XzDecompressionSink>(nextSink);
     else if (method == "bzip2")
-        return decompressBzip2(source, sink);
+        return make_ref<BzipDecompressionSink>(nextSink);
     else if (method == "br")
-        return decompressBrotli(source, sink);
+        return make_ref<BrotliDecompressionSink>(nextSink);
     else
         throw UnknownCompressionMethod("unknown compression method '%s'", method);
 }
 
-struct NoneSink : CompressionSink
-{
-    Sink & nextSink;
-    NoneSink(Sink & nextSink) : nextSink(nextSink) { }
-    void finish() override { flush(); }
-    void write(const unsigned char * data, size_t len) override { nextSink(data, len); }
-};
-
-struct XzSink : CompressionSink
+struct XzCompressionSink : CompressionSink
 {
     Sink & nextSink;
     uint8_t outbuf[BUFSIZ];
     lzma_stream strm = LZMA_STREAM_INIT;
     bool finished = false;
 
-    template <typename F>
-    XzSink(Sink & nextSink, F&& initEncoder) : nextSink(nextSink) {
-        lzma_ret ret = initEncoder();
+    XzCompressionSink(Sink & nextSink, bool parallel) : nextSink(nextSink)
+    {
+        lzma_ret ret;
+        bool done = false;
+
+        if (parallel) {
+#ifdef HAVE_LZMA_MT
+            lzma_mt mt_options = {};
+            mt_options.flags = 0;
+            mt_options.timeout = 300; // Using the same setting as the xz cmd line
+            mt_options.preset = LZMA_PRESET_DEFAULT;
+            mt_options.filters = NULL;
+            mt_options.check = LZMA_CHECK_CRC64;
+            mt_options.threads = lzma_cputhreads();
+            mt_options.block_size = 0;
+            if (mt_options.threads == 0)
+                mt_options.threads = 1;
+            // FIXME: maybe use lzma_stream_encoder_mt_memusage() to control the
+            // number of threads.
+            ret = lzma_stream_encoder_mt(&strm, &mt_options);
+            done = true;
+#else
+            printMsg(lvlError, "warning: parallel compression requested but not supported for metho  d '%1%', falling back to single-threaded compression", method);
+#endif
+        }
+
+        if (!done)
+            ret = lzma_easy_encoder(&strm, 6, LZMA_CHECK_CRC64);
+
         if (ret != LZMA_OK)
             throw CompressionError("unable to initialise lzma encoder");
+
         // FIXME: apply the x86 BCJ filter?
 
         strm.next_out = outbuf;
         strm.avail_out = sizeof(outbuf);
     }
-    XzSink(Sink & nextSink) : XzSink(nextSink, [this]() {
-        return lzma_easy_encoder(&strm, 6, LZMA_CHECK_CRC64);
-    }) {}
 
-    ~XzSink()
+    ~XzCompressionSink()
     {
         lzma_end(&strm);
     }
@@ -246,43 +276,25 @@ struct XzSink : CompressionSink
     void finish() override
     {
         CompressionSink::flush();
-
-        assert(!finished);
-        finished = true;
-
-        while (true) {
-            checkInterrupt();
-
-            lzma_ret ret = lzma_code(&strm, LZMA_FINISH);
-            if (ret != LZMA_OK && ret != LZMA_STREAM_END)
-                throw CompressionError("error while flushing xz file");
-
-            if (strm.avail_out == 0 || ret == LZMA_STREAM_END) {
-                nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
-                strm.next_out = outbuf;
-                strm.avail_out = sizeof(outbuf);
-            }
-
-            if (ret == LZMA_STREAM_END) break;
-        }
+        write(nullptr, 0);
     }
 
     void write(const unsigned char * data, size_t len) override
     {
-        assert(!finished);
-
         strm.next_in = data;
         strm.avail_in = len;
 
-        while (strm.avail_in) {
+        while (!finished && (!data || strm.avail_in)) {
             checkInterrupt();
 
-            lzma_ret ret = lzma_code(&strm, LZMA_RUN);
-            if (ret != LZMA_OK)
-                throw CompressionError("error while compressing xz file");
+            lzma_ret ret = lzma_code(&strm, data ? LZMA_RUN : LZMA_FINISH);
+            if (ret != LZMA_OK && ret != LZMA_STREAM_END)
+                throw CompressionError("error %d while compressing xz file", ret);
+
+            finished = ret == LZMA_STREAM_END;
 
-            if (strm.avail_out == 0) {
-                nextSink(outbuf, sizeof(outbuf));
+            if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
+                nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
                 strm.next_out = outbuf;
                 strm.avail_out = sizeof(outbuf);
             }
@@ -290,46 +302,24 @@ struct XzSink : CompressionSink
     }
 };
 
-#ifdef HAVE_LZMA_MT
-struct ParallelXzSink : public XzSink
-{
-  ParallelXzSink(Sink &nextSink) : XzSink(nextSink, [this]() {
-        lzma_mt mt_options = {};
-        mt_options.flags = 0;
-        mt_options.timeout = 300; // Using the same setting as the xz cmd line
-        mt_options.preset = LZMA_PRESET_DEFAULT;
-        mt_options.filters = NULL;
-        mt_options.check = LZMA_CHECK_CRC64;
-        mt_options.threads = lzma_cputhreads();
-        mt_options.block_size = 0;
-        if (mt_options.threads == 0)
-            mt_options.threads = 1;
-        // FIXME: maybe use lzma_stream_encoder_mt_memusage() to control the
-        // number of threads.
-        return lzma_stream_encoder_mt(&strm, &mt_options);
-  }) {}
-};
-#endif
-
-struct BzipSink : CompressionSink
+struct BzipCompressionSink : ChunkedCompressionSink
 {
     Sink & nextSink;
-    char outbuf[BUFSIZ];
     bz_stream strm;
     bool finished = false;
 
-    BzipSink(Sink & nextSink) : nextSink(nextSink)
+    BzipCompressionSink(Sink & nextSink) : nextSink(nextSink)
     {
         memset(&strm, 0, sizeof(strm));
         int ret = BZ2_bzCompressInit(&strm, 9, 0, 30);
         if (ret != BZ_OK)
             throw CompressionError("unable to initialise bzip2 encoder");
 
-        strm.next_out = outbuf;
+        strm.next_out = (char *) outbuf;
         strm.avail_out = sizeof(outbuf);
     }
 
-    ~BzipSink()
+    ~BzipCompressionSink()
     {
         BZ2_bzCompressEnd(&strm);
     }
@@ -337,78 +327,49 @@ struct BzipSink : CompressionSink
     void finish() override
     {
         flush();
-
-        assert(!finished);
-        finished = true;
-
-        while (true) {
-            checkInterrupt();
-
-            int ret = BZ2_bzCompress(&strm, BZ_FINISH);
-            if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END)
-                throw CompressionError("error while flushing bzip2 file");
-
-            if (strm.avail_out == 0 || ret == BZ_STREAM_END) {
-                nextSink((unsigned char *) outbuf, sizeof(outbuf) - strm.avail_out);
-                strm.next_out = outbuf;
-                strm.avail_out = sizeof(outbuf);
-            }
-
-            if (ret == BZ_STREAM_END) break;
-        }
-    }
-
-    void write(const unsigned char * data, size_t len) override
-    {
-        /* Bzip2's 'avail_in' parameter is an unsigned int, so we need
-           to split the input into chunks of at most 4 GiB. */
-        while (len) {
-            auto n = std::min((size_t) std::numeric_limits<decltype(strm.avail_in)>::max(), len);
-            writeInternal(data, n);
-            data += n;
-            len -= n;
-        }
+        writeInternal(nullptr, 0);
     }
 
     void writeInternal(const unsigned char * data, size_t len)
     {
-        assert(!finished);
         assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max());
 
         strm.next_in = (char *) data;
         strm.avail_in = len;
 
-        while (strm.avail_in) {
+        while (!finished && (!data || strm.avail_in)) {
             checkInterrupt();
 
-            int ret = BZ2_bzCompress(&strm, BZ_RUN);
-            if (ret != BZ_OK)
-                CompressionError("error while compressing bzip2 file");
+            int ret = BZ2_bzCompress(&strm, data ? BZ_RUN : BZ_FINISH);
+            if (ret != BZ_RUN_OK && ret != BZ_FINISH_OK && ret != BZ_STREAM_END)
+                throw CompressionError("error %d while compressing bzip2 file", ret);
 
-            if (strm.avail_out == 0) {
-                nextSink((unsigned char *) outbuf, sizeof(outbuf));
-                strm.next_out = outbuf;
+            finished = ret == BZ_STREAM_END;
+
+            if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
+                nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
+                strm.next_out = (char *) outbuf;
                 strm.avail_out = sizeof(outbuf);
             }
         }
     }
 };
 
-struct BrotliSink : CompressionSink
+struct BrotliCompressionSink : ChunkedCompressionSink
 {
     Sink & nextSink;
     uint8_t outbuf[BUFSIZ];
     BrotliEncoderState *state;
     bool finished = false;
 
-    BrotliSink(Sink & nextSink) : nextSink(nextSink)
+    BrotliCompressionSink(Sink & nextSink) : nextSink(nextSink)
     {
         state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr);
         if (!state)
             throw CompressionError("unable to initialise brotli encoder");
     }
 
-    ~BrotliSink()
+    ~BrotliCompressionSink()
     {
         BrotliEncoderDestroyInstance(state);
     }
@@ -416,89 +377,47 @@ struct BrotliSink : CompressionSink
     void finish() override
     {
         flush();
-        assert(!finished);
-
-        const uint8_t *next_in = nullptr;
-        size_t avail_in = 0;
-        uint8_t *next_out = outbuf;
-        size_t avail_out = sizeof(outbuf);
-        while (!finished) {
-            checkInterrupt();
-
-            if (!BrotliEncoderCompressStream(state,
-                        BROTLI_OPERATION_FINISH,
-                        &avail_in, &next_in,
-                        &avail_out, &next_out,
-                        nullptr))
-                throw CompressionError("error while finishing brotli file");
-
-            finished = BrotliEncoderIsFinished(state);
-            if (avail_out == 0 || finished) {
-                nextSink(outbuf, sizeof(outbuf) - avail_out);
-                next_out = outbuf;
-                avail_out = sizeof(outbuf);
-            }
-        }
-    }
-
-    void write(const unsigned char * data, size_t len) override
-    {
-        // Don't feed brotli too much at once
-        const size_t CHUNK_SIZE = sizeof(outbuf) << 2;
-        while (len) {
-          size_t n = std::min(CHUNK_SIZE, len);
-          writeInternal(data, n);
-          data += n;
-          len -= n;
-        }
+        writeInternal(nullptr, 0);
     }
 
     void writeInternal(const unsigned char * data, size_t len)
     {
-        assert(!finished);
-
-        const uint8_t *next_in = data;
+        const uint8_t * next_in = data;
         size_t avail_in = len;
-        uint8_t *next_out = outbuf;
+        uint8_t * next_out = outbuf;
         size_t avail_out = sizeof(outbuf);
 
-        while (avail_in > 0) {
+        while (!finished && (!data || avail_in)) {
             checkInterrupt();
 
             if (!BrotliEncoderCompressStream(state,
-                      BROTLI_OPERATION_PROCESS,
-                      &avail_in, &next_in,
-                      &avail_out, &next_out,
-                      nullptr))
-                throw CompressionError("error while compressing brotli file");
+                    data ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH,
+                    &avail_in, &next_in,
+                    &avail_out, &next_out,
+                    nullptr))
+                throw CompressionError("error while compressing brotli compression");
 
             if (avail_out < sizeof(outbuf) || avail_in == 0) {
                 nextSink(outbuf, sizeof(outbuf) - avail_out);
                 next_out = outbuf;
                 avail_out = sizeof(outbuf);
             }
+
+            finished = BrotliEncoderIsFinished(state);
         }
     }
 };
 
 ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel)
 {
-    if (parallel) {
-#ifdef HAVE_LZMA_MT
-        if (method == "xz")
-            return make_ref<ParallelXzSink>(nextSink);
-#endif
-        printMsg(lvlError, format("Warning: parallel compression requested but not supported for method '%1%', falling back to single-threaded compression") % method);
-    }
-
     if (method == "none")
         return make_ref<NoneSink>(nextSink);
     else if (method == "xz")
-        return make_ref<XzSink>(nextSink);
+        return make_ref<XzCompressionSink>(nextSink, parallel);
     else if (method == "bzip2")
-        return make_ref<BzipSink>(nextSink);
+        return make_ref<BzipCompressionSink>(nextSink);
     else if (method == "br")
-        return make_ref<BrotliSink>(nextSink);
+        return make_ref<BrotliCompressionSink>(nextSink);
     else
         throw UnknownCompressionMethod(format("unknown compression method '%s'") % method);
 }
diff --git a/src/libutil/compression.hh b/src/libutil/compression.hh
index f7a3e3fbd32e..dd666a4e19fd 100644
--- a/src/libutil/compression.hh
+++ b/src/libutil/compression.hh
@@ -8,17 +8,17 @@
 
 namespace nix {
 
-ref<std::string> decompress(const std::string & method, const std::string & in);
-
-void decompress(const std::string & method, Source & source, Sink & sink);
-
-ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel = false);
-
 struct CompressionSink : BufferedSink
 {
     virtual void finish() = 0;
 };
 
+ref<std::string> decompress(const std::string & method, const std::string & in);
+
+ref<CompressionSink> makeDecompressionSink(const std::string & method, Sink & nextSink);
+
+ref<std::string> compress(const std::string & method, const std::string & in, const bool parallel = false);
+
 ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink, const bool parallel = false);
 
 MakeError(UnknownCompressionMethod, Error);