From 9dd2b8ac7b8d82df8c1f3f36efb683175fd6ecee Mon Sep 17 00:00:00 2001 From: Will Dietz Date: Fri, 29 Dec 2017 14:42:14 -0600 Subject: use libbrotli directly when available * Look for both 'brotli' and 'bro' as external command, since upstream has renamed it in newer versions. If neither are found, current runtime behavior is preserved: try to find 'bro' on PATH. * Limit amount handed to BrotliEncoderCompressStream to ensure interrupts are processed in a timely manner. Testing shows negligible performance impact. (Other compression sinks don't seem to require this) --- src/libutil/compression.cc | 171 ++++++++++++++++++++++++++++++++++++++++++--- src/libutil/local.mk | 4 +- 2 files changed, 165 insertions(+), 10 deletions(-) (limited to 'src/libutil') diff --git a/src/libutil/compression.cc b/src/libutil/compression.cc index 2b3dff3a5e..5e2631ba34 100644 --- a/src/libutil/compression.cc +++ b/src/libutil/compression.cc @@ -7,6 +7,11 @@ #include #include +#if HAVE_BROTLI +#include +#include +#endif // HAVE_BROTLI + #include namespace nix { @@ -94,8 +99,56 @@ static ref decompressBzip2(const std::string & in) static ref decompressBrotli(const std::string & in) { - // FIXME: use libbrotli - return make_ref(runProgram(BRO, true, {"-d"}, {in})); +#if !HAVE_BROTLI + return make_ref(runProgram(BROTLI, true, {"-d"}, {in})); +#else + auto *s = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr); + if (!s) + throw CompressionError("unable to initialize brotli decoder"); + + Finally free([s]() { BrotliDecoderDestroyInstance(s); }); + + uint8_t outbuf[BUFSIZ]; + ref res = make_ref(); + const uint8_t *next_in = (uint8_t *)in.c_str(); + size_t avail_in = in.size(); + uint8_t *next_out = outbuf; + size_t avail_out = sizeof(outbuf); + + while (true) { + checkInterrupt(); + + auto ret = BrotliDecoderDecompressStream(s, + &avail_in, &next_in, + &avail_out, &next_out, + nullptr); + + switch (ret) { + case BROTLI_DECODER_RESULT_ERROR: + throw CompressionError("error while decompressing brotli file"); + case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT: + throw CompressionError("incomplete or corrupt brotli file"); + case BROTLI_DECODER_RESULT_SUCCESS: + if (avail_in != 0) + throw CompressionError("unexpected input after brotli decompression"); + break; + case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT: + // I'm not sure if this can happen, but abort if this happens with empty buffer + if (avail_out == sizeof(outbuf)) + throw CompressionError("brotli decompression requires larger buffer"); + break; + } + + // Always ensure we have full buffer for next invocation + if (avail_out < sizeof(outbuf)) { + res->append((char*)outbuf, sizeof(outbuf) - avail_out); + next_out = outbuf; + avail_out = sizeof(outbuf); + } + + if (ret == BROTLI_DECODER_RESULT_SUCCESS) return res; + } +#endif // HAVE_BROTLI } ref compress(const std::string & method, const std::string & in) @@ -270,33 +323,131 @@ struct BzipSink : CompressionSink } }; -struct BrotliSink : CompressionSink +struct LambdaCompressionSink : CompressionSink { Sink & nextSink; std::string data; + using CompressFnTy = std::function; + CompressFnTy compressFn; + LambdaCompressionSink(Sink& nextSink, CompressFnTy compressFn) + : nextSink(nextSink) + , compressFn(std::move(compressFn)) + { + }; + + void finish() override + { + flush(); + nextSink(compressFn(data)); + } + + void write(const unsigned char * data, size_t len) override + { + checkInterrupt(); + this->data.append((const char *) data, len); + } +}; + +struct BrotliCmdSink : LambdaCompressionSink +{ + BrotliCmdSink(Sink& nextSink) + : LambdaCompressionSink(nextSink, [](const std::string& data) { + return runProgram(BROTLI, true, {}, data); + }) + { + } +}; + +#if HAVE_BROTLI +struct BrotliSink : CompressionSink +{ + Sink & nextSink; + uint8_t outbuf[BUFSIZ]; + BrotliEncoderState *state; + bool finished = false; BrotliSink(Sink & nextSink) : nextSink(nextSink) { + state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr); + if (!state) + throw CompressionError("unable to initialise brotli encoder"); } ~BrotliSink() { + BrotliEncoderDestroyInstance(state); } - // FIXME: use libbrotli - void finish() override { flush(); - nextSink(runProgram(BRO, true, {}, data)); + assert(!finished); + + const uint8_t *next_in = nullptr; + size_t avail_in = 0; + uint8_t *next_out = outbuf; + size_t avail_out = sizeof(outbuf); + while (!finished) { + checkInterrupt(); + + if (!BrotliEncoderCompressStream(state, + BROTLI_OPERATION_FINISH, + &avail_in, &next_in, + &avail_out, &next_out, + nullptr)) + throw CompressionError("error while finishing brotli file"); + + finished = BrotliEncoderIsFinished(state); + if (avail_out == 0 || finished) { + nextSink(outbuf, sizeof(outbuf) - avail_out); + next_out = outbuf; + avail_out = sizeof(outbuf); + } + } } void write(const unsigned char * data, size_t len) override { - checkInterrupt(); - this->data.append((const char *) data, len); + assert(!finished); + + // Don't feed brotli too much at once + const size_t CHUNK_SIZE = sizeof(outbuf) << 2; + while (len) { + size_t n = std::min(CHUNK_SIZE, len); + writeInternal(data, n); + data += n; + len -= n; + } + } + private: + void writeInternal(const unsigned char * data, size_t len) + { + assert(!finished); + + const uint8_t *next_in = data; + size_t avail_in = len; + uint8_t *next_out = outbuf; + size_t avail_out = sizeof(outbuf); + + while (avail_in > 0) { + checkInterrupt(); + + if (!BrotliEncoderCompressStream(state, + BROTLI_OPERATION_PROCESS, + &avail_in, &next_in, + &avail_out, &next_out, + nullptr)) + throw CompressionError("error while compressing brotli file"); + + if (avail_out < sizeof(outbuf) || avail_in == 0) { + nextSink(outbuf, sizeof(outbuf) - avail_out); + next_out = outbuf; + avail_out = sizeof(outbuf); + } + } } }; +#endif // HAVE_BROTLI ref makeCompressionSink(const std::string & method, Sink & nextSink) { @@ -307,7 +458,11 @@ ref makeCompressionSink(const std::string & method, Sink & next else if (method == "bzip2") return make_ref(nextSink); else if (method == "br") +#if HAVE_BROTLI return make_ref(nextSink); +#else + return make_ref(nextSink); +#endif else throw UnknownCompressionMethod(format("unknown compression method '%s'") % method); } diff --git a/src/libutil/local.mk b/src/libutil/local.mk index 0721b21c20..5fc2aab569 100644 --- a/src/libutil/local.mk +++ b/src/libutil/local.mk @@ -6,8 +6,8 @@ libutil_DIR := $(d) libutil_SOURCES := $(wildcard $(d)/*.cc) -libutil_LDFLAGS = $(LIBLZMA_LIBS) -lbz2 -pthread $(OPENSSL_LIBS) +libutil_LDFLAGS = $(LIBLZMA_LIBS) -lbz2 -pthread $(OPENSSL_LIBS) $(LIBBROTLI_LIBS) libutil_LIBS = libformat -libutil_CXXFLAGS = -DBRO=\"$(bro)\" +libutil_CXXFLAGS = -DBROTLI=\"$(brotli)\" -- cgit 1.4.1