about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEelco Dolstra <eelco.dolstra@logicblox.com>2018-01-02T10·31+0100
committerGitHub <noreply@github.com>2018-01-02T10·31+0100
commit9b67f234c9b6d7496a49dc4d93fd7a51a727d52d (patch)
treef0c7d7798cbb677a86e3679c3d82def15111f07f
parent099ba37820653763fa30b7856c85c4b8fed657e0 (diff)
parent9dd2b8ac7b8d82df8c1f3f36efb683175fd6ecee (diff)
Merge pull request #1768 from dtzWill/feature/brotli
use libbrotli directly when available
-rw-r--r--Makefile.config.in4
-rw-r--r--configure.ac9
-rw-r--r--src/libutil/compression.cc171
-rw-r--r--src/libutil/local.mk4
-rw-r--r--tests/brotli.sh28
-rw-r--r--tests/common.sh.in1
-rw-r--r--tests/local.mk3
7 files changed, 207 insertions, 13 deletions
diff --git a/Makefile.config.in b/Makefile.config.in
index 45a70cd6dd1a..fab82194656e 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -6,6 +6,7 @@ CXXFLAGS = @CXXFLAGS@
 ENABLE_S3 = @ENABLE_S3@
 HAVE_SODIUM = @HAVE_SODIUM@
 HAVE_READLINE = @HAVE_READLINE@
+HAVE_BROTLI = @HAVE_BROTLI@
 LIBCURL_LIBS = @LIBCURL_LIBS@
 OPENSSL_LIBS = @OPENSSL_LIBS@
 PACKAGE_NAME = @PACKAGE_NAME@
@@ -13,9 +14,10 @@ PACKAGE_VERSION = @PACKAGE_VERSION@
 SODIUM_LIBS = @SODIUM_LIBS@
 LIBLZMA_LIBS = @LIBLZMA_LIBS@
 SQLITE3_LIBS = @SQLITE3_LIBS@
+LIBBROTLI_LIBS = @LIBBROTLI_LIBS@
 bash = @bash@
 bindir = @bindir@
-bro = @bro@
+brotli = @brotli@
 lsof = @lsof@
 datadir = @datadir@
 datarootdir = @datarootdir@
diff --git a/configure.ac b/configure.ac
index c395b8713f23..9db92ce91401 100644
--- a/configure.ac
+++ b/configure.ac
@@ -127,7 +127,7 @@ NEED_PROG(gzip, gzip)
 NEED_PROG(xz, xz)
 AC_PATH_PROG(dot, dot)
 AC_PATH_PROG(pv, pv, pv)
-AC_PATH_PROG(bro, bro, bro)
+AC_PATH_PROGS(brotli, brotli bro, bro)
 AC_PATH_PROG(lsof, lsof, lsof)
 
 
@@ -176,6 +176,13 @@ AC_SUBST(HAVE_SODIUM, [$have_sodium])
 PKG_CHECK_MODULES([LIBLZMA], [liblzma], [CXXFLAGS="$LIBLZMA_CFLAGS $CXXFLAGS"])
 
 
+# Look for libbrotli{enc,dec}, optional dependencies
+PKG_CHECK_MODULES([LIBBROTLI], [libbrotlienc libbrotlidec],
+  [AC_DEFINE([HAVE_BROTLI], [1], [Whether to use libbrotli.])
+   CXXFLAGS="$LIBBROTLI_CFLAGS $CXXFLAGS"]
+   have_brotli=1], [have_brotli=])
+AC_SUBST(HAVE_BROTLI, [$have_brotli])
+
 # Look for libseccomp, required for Linux sandboxing.
 if test "$sys_name" = linux; then
   PKG_CHECK_MODULES([LIBSECCOMP], [libseccomp],
diff --git a/src/libutil/compression.cc b/src/libutil/compression.cc
index 2b3dff3a5ea1..5e2631ba3408 100644
--- a/src/libutil/compression.cc
+++ b/src/libutil/compression.cc
@@ -7,6 +7,11 @@
 #include <cstdio>
 #include <cstring>
 
+#if HAVE_BROTLI
+#include <brotli/decode.h>
+#include <brotli/encode.h>
+#endif // HAVE_BROTLI
+
 #include <iostream>
 
 namespace nix {
@@ -94,8 +99,56 @@ static ref<std::string> decompressBzip2(const std::string & in)
 
 static ref<std::string> decompressBrotli(const std::string & in)
 {
-    // FIXME: use libbrotli
-    return make_ref<std::string>(runProgram(BRO, true, {"-d"}, {in}));
+#if !HAVE_BROTLI
+    return make_ref<std::string>(runProgram(BROTLI, true, {"-d"}, {in}));
+#else
+    auto *s = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
+    if (!s)
+        throw CompressionError("unable to initialize brotli decoder");
+
+    Finally free([s]() { BrotliDecoderDestroyInstance(s); });
+
+    uint8_t outbuf[BUFSIZ];
+    ref<std::string> res = make_ref<std::string>();
+    const uint8_t *next_in = (uint8_t *)in.c_str();
+    size_t avail_in = in.size();
+    uint8_t *next_out = outbuf;
+    size_t avail_out = sizeof(outbuf);
+
+    while (true) {
+        checkInterrupt();
+
+        auto ret = BrotliDecoderDecompressStream(s,
+                &avail_in, &next_in,
+                &avail_out, &next_out,
+                nullptr);
+
+        switch (ret) {
+        case BROTLI_DECODER_RESULT_ERROR:
+            throw CompressionError("error while decompressing brotli file");
+        case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT:
+            throw CompressionError("incomplete or corrupt brotli file");
+        case BROTLI_DECODER_RESULT_SUCCESS:
+            if (avail_in != 0)
+                throw CompressionError("unexpected input after brotli decompression");
+            break;
+        case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT:
+            // I'm not sure if this can happen, but abort if this happens with empty buffer
+            if (avail_out == sizeof(outbuf))
+                throw CompressionError("brotli decompression requires larger buffer");
+            break;
+        }
+
+        // Always ensure we have full buffer for next invocation
+        if (avail_out < sizeof(outbuf)) {
+            res->append((char*)outbuf, sizeof(outbuf) - avail_out);
+            next_out = outbuf;
+            avail_out = sizeof(outbuf);
+        }
+
+        if (ret == BROTLI_DECODER_RESULT_SUCCESS) return res;
+    }
+#endif // HAVE_BROTLI
 }
 
 ref<std::string> compress(const std::string & method, const std::string & in)
@@ -270,33 +323,131 @@ struct BzipSink : CompressionSink
     }
 };
 
-struct BrotliSink : CompressionSink
+struct LambdaCompressionSink : CompressionSink
 {
     Sink & nextSink;
     std::string data;
+    using CompressFnTy = std::function<std::string(const std::string&)>;
+    CompressFnTy compressFn;
+    LambdaCompressionSink(Sink& nextSink, CompressFnTy compressFn)
+        : nextSink(nextSink)
+        , compressFn(std::move(compressFn))
+    {
+    };
+
+    void finish() override
+    {
+        flush();
+        nextSink(compressFn(data));
+    }
+
+    void write(const unsigned char * data, size_t len) override
+    {
+        checkInterrupt();
+        this->data.append((const char *) data, len);
+    }
+};
+
+struct BrotliCmdSink : LambdaCompressionSink
+{
+    BrotliCmdSink(Sink& nextSink)
+        : LambdaCompressionSink(nextSink, [](const std::string& data) {
+            return runProgram(BROTLI, true, {}, data);
+        })
+    {
+    }
+};
+
+#if HAVE_BROTLI
+struct BrotliSink : CompressionSink
+{
+    Sink & nextSink;
+    uint8_t outbuf[BUFSIZ];
+    BrotliEncoderState *state;
+    bool finished = false;
 
     BrotliSink(Sink & nextSink) : nextSink(nextSink)
     {
+        state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr);
+        if (!state)
+            throw CompressionError("unable to initialise brotli encoder");
     }
 
     ~BrotliSink()
     {
+        BrotliEncoderDestroyInstance(state);
     }
 
-    // FIXME: use libbrotli
-
     void finish() override
     {
         flush();
-        nextSink(runProgram(BRO, true, {}, data));
+        assert(!finished);
+
+        const uint8_t *next_in = nullptr;
+        size_t avail_in = 0;
+        uint8_t *next_out = outbuf;
+        size_t avail_out = sizeof(outbuf);
+        while (!finished) {
+            checkInterrupt();
+
+            if (!BrotliEncoderCompressStream(state,
+                        BROTLI_OPERATION_FINISH,
+                        &avail_in, &next_in,
+                        &avail_out, &next_out,
+                        nullptr))
+                throw CompressionError("error while finishing brotli file");
+
+            finished = BrotliEncoderIsFinished(state);
+            if (avail_out == 0 || finished) {
+                nextSink(outbuf, sizeof(outbuf) - avail_out);
+                next_out = outbuf;
+                avail_out = sizeof(outbuf);
+            }
+        }
     }
 
     void write(const unsigned char * data, size_t len) override
     {
-        checkInterrupt();
-        this->data.append((const char *) data, len);
+        assert(!finished);
+
+        // Don't feed brotli too much at once
+        const size_t CHUNK_SIZE = sizeof(outbuf) << 2;
+        while (len) {
+          size_t n = std::min(CHUNK_SIZE, len);
+          writeInternal(data, n);
+          data += n;
+          len -= n;
+        }
+    }
+  private:
+    void writeInternal(const unsigned char * data, size_t len)
+    {
+        assert(!finished);
+
+        const uint8_t *next_in = data;
+        size_t avail_in = len;
+        uint8_t *next_out = outbuf;
+        size_t avail_out = sizeof(outbuf);
+
+        while (avail_in > 0) {
+            checkInterrupt();
+
+            if (!BrotliEncoderCompressStream(state,
+                      BROTLI_OPERATION_PROCESS,
+                      &avail_in, &next_in,
+                      &avail_out, &next_out,
+                      nullptr))
+                throw CompressionError("error while compressing brotli file");
+
+            if (avail_out < sizeof(outbuf) || avail_in == 0) {
+                nextSink(outbuf, sizeof(outbuf) - avail_out);
+                next_out = outbuf;
+                avail_out = sizeof(outbuf);
+            }
+        }
     }
 };
+#endif // HAVE_BROTLI
 
 ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink)
 {
@@ -307,7 +458,11 @@ ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & next
     else if (method == "bzip2")
         return make_ref<BzipSink>(nextSink);
     else if (method == "br")
+#if HAVE_BROTLI
         return make_ref<BrotliSink>(nextSink);
+#else
+        return make_ref<BrotliCmdSink>(nextSink);
+#endif
     else
         throw UnknownCompressionMethod(format("unknown compression method '%s'") % method);
 }
diff --git a/src/libutil/local.mk b/src/libutil/local.mk
index 0721b21c2089..5fc2aab569da 100644
--- a/src/libutil/local.mk
+++ b/src/libutil/local.mk
@@ -6,8 +6,8 @@ libutil_DIR := $(d)
 
 libutil_SOURCES := $(wildcard $(d)/*.cc)
 
-libutil_LDFLAGS = $(LIBLZMA_LIBS) -lbz2 -pthread $(OPENSSL_LIBS)
+libutil_LDFLAGS = $(LIBLZMA_LIBS) -lbz2 -pthread $(OPENSSL_LIBS) $(LIBBROTLI_LIBS)
 
 libutil_LIBS = libformat
 
-libutil_CXXFLAGS = -DBRO=\"$(bro)\"
+libutil_CXXFLAGS = -DBROTLI=\"$(brotli)\"
diff --git a/tests/brotli.sh b/tests/brotli.sh
new file mode 100644
index 000000000000..645dd4214ec6
--- /dev/null
+++ b/tests/brotli.sh
@@ -0,0 +1,28 @@
+source common.sh
+
+
+# Only test if we found brotli libraries
+# (CLI tool is likely unavailable if libraries are missing)
+if [ -n "$HAVE_BROTLI" ]; then
+
+clearStore
+clearCache
+
+cacheURI="file://$cacheDir?compression=br"
+
+outPath=$(nix-build dependencies.nix --no-out-link)
+
+nix copy --to $cacheURI $outPath
+
+HASH=$(nix hash-path $outPath)
+
+clearStore
+clearCacheCache
+
+nix copy --from $cacheURI $outPath --no-check-sigs
+
+HASH2=$(nix hash-path $outPath)
+
+[[ $HASH = $HASH2 ]]
+
+fi # HAVE_BROTLI
diff --git a/tests/common.sh.in b/tests/common.sh.in
index 09f2949141a4..83643d8b06bb 100644
--- a/tests/common.sh.in
+++ b/tests/common.sh.in
@@ -32,6 +32,7 @@ export xmllint="@xmllint@"
 export SHELL="@bash@"
 export PAGER=cat
 export HAVE_SODIUM="@HAVE_SODIUM@"
+export HAVE_BROTLI="@HAVE_BROTLI@"
 
 export version=@PACKAGE_VERSION@
 export system=@system@
diff --git a/tests/local.mk b/tests/local.mk
index baf74224bb12..83154228e999 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -19,7 +19,8 @@ nix_tests = \
   fetchGit.sh \
   fetchMercurial.sh \
   signing.sh \
-  run.sh
+  run.sh \
+  brotli.sh
   # parallel.sh
 
 install-tests += $(foreach x, $(nix_tests), tests/$(x))