about summary refs log tree commit diff
path: root/src/libutil/compression.cc
diff options
context:
space:
mode:
authorWill Dietz <w@wdtz.org>2017-12-29T20·42-0600
committerWill Dietz <w@wdtz.org>2017-12-31T02·26-0600
commit9dd2b8ac7b8d82df8c1f3f36efb683175fd6ecee (patch)
treee05cbf82ccce203db9b52d25220e2838544034d8 /src/libutil/compression.cc
parent6a0dd635084213bf75c1f36bc9bc38d242096e65 (diff)
use libbrotli directly when available
* Look for both 'brotli' and 'bro' as external command,
  since upstream has renamed it in newer versions.
  If neither are found, current runtime behavior
  is preserved: try to find 'bro' on PATH.
* Limit amount handed to BrotliEncoderCompressStream
  to ensure interrupts are processed in a timely manner.
  Testing shows negligible performance impact.
  (Other compression sinks don't seem to require this)
Diffstat (limited to 'src/libutil/compression.cc')
-rw-r--r--src/libutil/compression.cc171
1 files changed, 163 insertions, 8 deletions
diff --git a/src/libutil/compression.cc b/src/libutil/compression.cc
index 2b3dff3a5e..5e2631ba34 100644
--- a/src/libutil/compression.cc
+++ b/src/libutil/compression.cc
@@ -7,6 +7,11 @@
 #include <cstdio>
 #include <cstring>
 
+#if HAVE_BROTLI
+#include <brotli/decode.h>
+#include <brotli/encode.h>
+#endif // HAVE_BROTLI
+
 #include <iostream>
 
 namespace nix {
@@ -94,8 +99,56 @@ static ref<std::string> decompressBzip2(const std::string & in)
 
 static ref<std::string> decompressBrotli(const std::string & in)
 {
-    // FIXME: use libbrotli
-    return make_ref<std::string>(runProgram(BRO, true, {"-d"}, {in}));
+#if !HAVE_BROTLI
+    return make_ref<std::string>(runProgram(BROTLI, true, {"-d"}, {in}));
+#else
+    auto *s = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
+    if (!s)
+        throw CompressionError("unable to initialize brotli decoder");
+
+    Finally free([s]() { BrotliDecoderDestroyInstance(s); });
+
+    uint8_t outbuf[BUFSIZ];
+    ref<std::string> res = make_ref<std::string>();
+    const uint8_t *next_in = (uint8_t *)in.c_str();
+    size_t avail_in = in.size();
+    uint8_t *next_out = outbuf;
+    size_t avail_out = sizeof(outbuf);
+
+    while (true) {
+        checkInterrupt();
+
+        auto ret = BrotliDecoderDecompressStream(s,
+                &avail_in, &next_in,
+                &avail_out, &next_out,
+                nullptr);
+
+        switch (ret) {
+        case BROTLI_DECODER_RESULT_ERROR:
+            throw CompressionError("error while decompressing brotli file");
+        case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT:
+            throw CompressionError("incomplete or corrupt brotli file");
+        case BROTLI_DECODER_RESULT_SUCCESS:
+            if (avail_in != 0)
+                throw CompressionError("unexpected input after brotli decompression");
+            break;
+        case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT:
+            // I'm not sure if this can happen, but abort if this happens with empty buffer
+            if (avail_out == sizeof(outbuf))
+                throw CompressionError("brotli decompression requires larger buffer");
+            break;
+        }
+
+        // Always ensure we have full buffer for next invocation
+        if (avail_out < sizeof(outbuf)) {
+            res->append((char*)outbuf, sizeof(outbuf) - avail_out);
+            next_out = outbuf;
+            avail_out = sizeof(outbuf);
+        }
+
+        if (ret == BROTLI_DECODER_RESULT_SUCCESS) return res;
+    }
+#endif // HAVE_BROTLI
 }
 
 ref<std::string> compress(const std::string & method, const std::string & in)
@@ -270,33 +323,131 @@ struct BzipSink : CompressionSink
     }
 };
 
-struct BrotliSink : CompressionSink
+struct LambdaCompressionSink : CompressionSink
 {
     Sink & nextSink;
     std::string data;
+    using CompressFnTy = std::function<std::string(const std::string&)>;
+    CompressFnTy compressFn;
+    LambdaCompressionSink(Sink& nextSink, CompressFnTy compressFn)
+        : nextSink(nextSink)
+        , compressFn(std::move(compressFn))
+    {
+    };
+
+    void finish() override
+    {
+        flush();
+        nextSink(compressFn(data));
+    }
+
+    void write(const unsigned char * data, size_t len) override
+    {
+        checkInterrupt();
+        this->data.append((const char *) data, len);
+    }
+};
+
+struct BrotliCmdSink : LambdaCompressionSink
+{
+    BrotliCmdSink(Sink& nextSink)
+        : LambdaCompressionSink(nextSink, [](const std::string& data) {
+            return runProgram(BROTLI, true, {}, data);
+        })
+    {
+    }
+};
+
+#if HAVE_BROTLI
+struct BrotliSink : CompressionSink
+{
+    Sink & nextSink;
+    uint8_t outbuf[BUFSIZ];
+    BrotliEncoderState *state;
+    bool finished = false;
 
     BrotliSink(Sink & nextSink) : nextSink(nextSink)
     {
+        state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr);
+        if (!state)
+            throw CompressionError("unable to initialise brotli encoder");
     }
 
     ~BrotliSink()
     {
+        BrotliEncoderDestroyInstance(state);
     }
 
-    // FIXME: use libbrotli
-
     void finish() override
     {
         flush();
-        nextSink(runProgram(BRO, true, {}, data));
+        assert(!finished);
+
+        const uint8_t *next_in = nullptr;
+        size_t avail_in = 0;
+        uint8_t *next_out = outbuf;
+        size_t avail_out = sizeof(outbuf);
+        while (!finished) {
+            checkInterrupt();
+
+            if (!BrotliEncoderCompressStream(state,
+                        BROTLI_OPERATION_FINISH,
+                        &avail_in, &next_in,
+                        &avail_out, &next_out,
+                        nullptr))
+                throw CompressionError("error while finishing brotli file");
+
+            finished = BrotliEncoderIsFinished(state);
+            if (avail_out == 0 || finished) {
+                nextSink(outbuf, sizeof(outbuf) - avail_out);
+                next_out = outbuf;
+                avail_out = sizeof(outbuf);
+            }
+        }
     }
 
     void write(const unsigned char * data, size_t len) override
     {
-        checkInterrupt();
-        this->data.append((const char *) data, len);
+        assert(!finished);
+
+        // Don't feed brotli too much at once
+        const size_t CHUNK_SIZE = sizeof(outbuf) << 2;
+        while (len) {
+          size_t n = std::min(CHUNK_SIZE, len);
+          writeInternal(data, n);
+          data += n;
+          len -= n;
+        }
+    }
+  private:
+    void writeInternal(const unsigned char * data, size_t len)
+    {
+        assert(!finished);
+
+        const uint8_t *next_in = data;
+        size_t avail_in = len;
+        uint8_t *next_out = outbuf;
+        size_t avail_out = sizeof(outbuf);
+
+        while (avail_in > 0) {
+            checkInterrupt();
+
+            if (!BrotliEncoderCompressStream(state,
+                      BROTLI_OPERATION_PROCESS,
+                      &avail_in, &next_in,
+                      &avail_out, &next_out,
+                      nullptr))
+                throw CompressionError("error while compressing brotli file");
+
+            if (avail_out < sizeof(outbuf) || avail_in == 0) {
+                nextSink(outbuf, sizeof(outbuf) - avail_out);
+                next_out = outbuf;
+                avail_out = sizeof(outbuf);
+            }
+        }
     }
 };
+#endif // HAVE_BROTLI
 
 ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink)
 {
@@ -307,7 +458,11 @@ ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & next
     else if (method == "bzip2")
         return make_ref<BzipSink>(nextSink);
     else if (method == "br")
+#if HAVE_BROTLI
         return make_ref<BrotliSink>(nextSink);
+#else
+        return make_ref<BrotliCmdSink>(nextSink);
+#endif
     else
         throw UnknownCompressionMethod(format("unknown compression method '%s'") % method);
 }