diff options
author | Will Dietz <w@wdtz.org> | 2017-12-29T20·42-0600 |
---|---|---|
committer | Will Dietz <w@wdtz.org> | 2017-12-31T02·26-0600 |
commit | 9dd2b8ac7b8d82df8c1f3f36efb683175fd6ecee (patch) | |
tree | e05cbf82ccce203db9b52d25220e2838544034d8 /src | |
parent | 6a0dd635084213bf75c1f36bc9bc38d242096e65 (diff) |
use libbrotli directly when available
* Look for both 'brotli' and 'bro' as external command, since upstream has renamed it in newer versions. If neither are found, current runtime behavior is preserved: try to find 'bro' on PATH. * Limit amount handed to BrotliEncoderCompressStream to ensure interrupts are processed in a timely manner. Testing shows negligible performance impact. (Other compression sinks don't seem to require this)
Diffstat (limited to 'src')
-rw-r--r-- | src/libutil/compression.cc | 171 | ||||
-rw-r--r-- | src/libutil/local.mk | 4 |
2 files changed, 165 insertions, 10 deletions
diff --git a/src/libutil/compression.cc b/src/libutil/compression.cc index 2b3dff3a5ea1..5e2631ba3408 100644 --- a/src/libutil/compression.cc +++ b/src/libutil/compression.cc @@ -7,6 +7,11 @@ #include <cstdio> #include <cstring> +#if HAVE_BROTLI +#include <brotli/decode.h> +#include <brotli/encode.h> +#endif // HAVE_BROTLI + #include <iostream> namespace nix { @@ -94,8 +99,56 @@ static ref<std::string> decompressBzip2(const std::string & in) static ref<std::string> decompressBrotli(const std::string & in) { - // FIXME: use libbrotli - return make_ref<std::string>(runProgram(BRO, true, {"-d"}, {in})); +#if !HAVE_BROTLI + return make_ref<std::string>(runProgram(BROTLI, true, {"-d"}, {in})); +#else + auto *s = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr); + if (!s) + throw CompressionError("unable to initialize brotli decoder"); + + Finally free([s]() { BrotliDecoderDestroyInstance(s); }); + + uint8_t outbuf[BUFSIZ]; + ref<std::string> res = make_ref<std::string>(); + const uint8_t *next_in = (uint8_t *)in.c_str(); + size_t avail_in = in.size(); + uint8_t *next_out = outbuf; + size_t avail_out = sizeof(outbuf); + + while (true) { + checkInterrupt(); + + auto ret = BrotliDecoderDecompressStream(s, + &avail_in, &next_in, + &avail_out, &next_out, + nullptr); + + switch (ret) { + case BROTLI_DECODER_RESULT_ERROR: + throw CompressionError("error while decompressing brotli file"); + case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT: + throw CompressionError("incomplete or corrupt brotli file"); + case BROTLI_DECODER_RESULT_SUCCESS: + if (avail_in != 0) + throw CompressionError("unexpected input after brotli decompression"); + break; + case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT: + // I'm not sure if this can happen, but abort if this happens with empty buffer + if (avail_out == sizeof(outbuf)) + throw CompressionError("brotli decompression requires larger buffer"); + break; + } + + // Always ensure we have full buffer for next invocation + if (avail_out < sizeof(outbuf)) { + res->append((char*)outbuf, sizeof(outbuf) - avail_out); + next_out = outbuf; + avail_out = sizeof(outbuf); + } + + if (ret == BROTLI_DECODER_RESULT_SUCCESS) return res; + } +#endif // HAVE_BROTLI } ref<std::string> compress(const std::string & method, const std::string & in) @@ -270,33 +323,131 @@ struct BzipSink : CompressionSink } }; -struct BrotliSink : CompressionSink +struct LambdaCompressionSink : CompressionSink { Sink & nextSink; std::string data; + using CompressFnTy = std::function<std::string(const std::string&)>; + CompressFnTy compressFn; + LambdaCompressionSink(Sink& nextSink, CompressFnTy compressFn) + : nextSink(nextSink) + , compressFn(std::move(compressFn)) + { + }; + + void finish() override + { + flush(); + nextSink(compressFn(data)); + } + + void write(const unsigned char * data, size_t len) override + { + checkInterrupt(); + this->data.append((const char *) data, len); + } +}; + +struct BrotliCmdSink : LambdaCompressionSink +{ + BrotliCmdSink(Sink& nextSink) + : LambdaCompressionSink(nextSink, [](const std::string& data) { + return runProgram(BROTLI, true, {}, data); + }) + { + } +}; + +#if HAVE_BROTLI +struct BrotliSink : CompressionSink +{ + Sink & nextSink; + uint8_t outbuf[BUFSIZ]; + BrotliEncoderState *state; + bool finished = false; BrotliSink(Sink & nextSink) : nextSink(nextSink) { + state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr); + if (!state) + throw CompressionError("unable to initialise brotli encoder"); } ~BrotliSink() { + BrotliEncoderDestroyInstance(state); } - // FIXME: use libbrotli - void finish() override { flush(); - nextSink(runProgram(BRO, true, {}, data)); + assert(!finished); + + const uint8_t *next_in = nullptr; + size_t avail_in = 0; + uint8_t *next_out = outbuf; + size_t avail_out = sizeof(outbuf); + while (!finished) { + checkInterrupt(); + + if (!BrotliEncoderCompressStream(state, + BROTLI_OPERATION_FINISH, + &avail_in, &next_in, + &avail_out, &next_out, + nullptr)) + throw CompressionError("error while finishing brotli file"); + + finished = BrotliEncoderIsFinished(state); + if (avail_out == 0 || finished) { + nextSink(outbuf, sizeof(outbuf) - avail_out); + next_out = outbuf; + avail_out = sizeof(outbuf); + } + } } void write(const unsigned char * data, size_t len) override { - checkInterrupt(); - this->data.append((const char *) data, len); + assert(!finished); + + // Don't feed brotli too much at once + const size_t CHUNK_SIZE = sizeof(outbuf) << 2; + while (len) { + size_t n = std::min(CHUNK_SIZE, len); + writeInternal(data, n); + data += n; + len -= n; + } + } + private: + void writeInternal(const unsigned char * data, size_t len) + { + assert(!finished); + + const uint8_t *next_in = data; + size_t avail_in = len; + uint8_t *next_out = outbuf; + size_t avail_out = sizeof(outbuf); + + while (avail_in > 0) { + checkInterrupt(); + + if (!BrotliEncoderCompressStream(state, + BROTLI_OPERATION_PROCESS, + &avail_in, &next_in, + &avail_out, &next_out, + nullptr)) + throw CompressionError("error while compressing brotli file"); + + if (avail_out < sizeof(outbuf) || avail_in == 0) { + nextSink(outbuf, sizeof(outbuf) - avail_out); + next_out = outbuf; + avail_out = sizeof(outbuf); + } + } } }; +#endif // HAVE_BROTLI ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & nextSink) { @@ -307,7 +458,11 @@ ref<CompressionSink> makeCompressionSink(const std::string & method, Sink & next else if (method == "bzip2") return make_ref<BzipSink>(nextSink); else if (method == "br") +#if HAVE_BROTLI return make_ref<BrotliSink>(nextSink); +#else + return make_ref<BrotliCmdSink>(nextSink); +#endif else throw UnknownCompressionMethod(format("unknown compression method '%s'") % method); } diff --git a/src/libutil/local.mk b/src/libutil/local.mk index 0721b21c2089..5fc2aab569da 100644 --- a/src/libutil/local.mk +++ b/src/libutil/local.mk @@ -6,8 +6,8 @@ libutil_DIR := $(d) libutil_SOURCES := $(wildcard $(d)/*.cc) -libutil_LDFLAGS = $(LIBLZMA_LIBS) -lbz2 -pthread $(OPENSSL_LIBS) +libutil_LDFLAGS = $(LIBLZMA_LIBS) -lbz2 -pthread $(OPENSSL_LIBS) $(LIBBROTLI_LIBS) libutil_LIBS = libformat -libutil_CXXFLAGS = -DBRO=\"$(bro)\" +libutil_CXXFLAGS = -DBROTLI=\"$(brotli)\" |