#include "libutil/compression.hh" #include <cstdio> #include <cstring> #include <iostream> #include <brotli/decode.h> #include <brotli/encode.h> #include <bzlib.h> #include <glog/logging.h> #include <lzma.h> #include "libutil/finally.hh" #include "libutil/util.hh" namespace nix { // Don't feed brotli too much at once. struct ChunkedCompressionSink : CompressionSink { uint8_t outbuf[32 * 1024]{}; void write(const unsigned char* data, size_t len) override { const size_t CHUNK_SIZE = sizeof(outbuf) << 2; while (len != 0u) { size_t n = std::min(CHUNK_SIZE, len); writeInternal(data, n); data += n; len -= n; } } virtual void writeInternal(const unsigned char* data, size_t len) = 0; }; struct NoneSink : CompressionSink { Sink& nextSink; explicit NoneSink(Sink& nextSink) : nextSink(nextSink) {} void finish() override { flush(); } void write(const unsigned char* data, size_t len) override { nextSink(data, len); } }; struct XzDecompressionSink : CompressionSink { Sink& nextSink; uint8_t outbuf[BUFSIZ]{}; lzma_stream strm = LZMA_STREAM_INIT; bool finished = false; explicit XzDecompressionSink(Sink& nextSink) : nextSink(nextSink) { lzma_ret ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED); if (ret != LZMA_OK) { throw CompressionError("unable to initialise lzma decoder"); } strm.next_out = outbuf; strm.avail_out = sizeof(outbuf); } ~XzDecompressionSink() override { lzma_end(&strm); } void finish() override { CompressionSink::flush(); write(nullptr, 0); } void write(const unsigned char* data, size_t len) override { strm.next_in = data; strm.avail_in = len; while (!finished && ((data == nullptr) || (strm.avail_in != 0u))) { checkInterrupt(); lzma_ret ret = lzma_code(&strm, data != nullptr ? LZMA_RUN : LZMA_FINISH); if (ret != LZMA_OK && ret != LZMA_STREAM_END) { throw CompressionError("error %d while decompressing xz file", ret); } finished = ret == LZMA_STREAM_END; if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { nextSink(outbuf, sizeof(outbuf) - strm.avail_out); strm.next_out = outbuf; strm.avail_out = sizeof(outbuf); } } } }; struct BzipDecompressionSink : ChunkedCompressionSink { Sink& nextSink; bz_stream strm{}; bool finished = false; explicit BzipDecompressionSink(Sink& nextSink) : nextSink(nextSink) { memset(&strm, 0, sizeof(strm)); int ret = BZ2_bzDecompressInit(&strm, 0, 0); if (ret != BZ_OK) { throw CompressionError("unable to initialise bzip2 decoder"); } strm.next_out = reinterpret_cast<char*>(outbuf); strm.avail_out = sizeof(outbuf); } ~BzipDecompressionSink() override { BZ2_bzDecompressEnd(&strm); } void finish() override { flush(); write(nullptr, 0); } void writeInternal(const unsigned char* data, size_t len) override { assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max()); strm.next_in = (char*)data; strm.avail_in = len; while (strm.avail_in != 0u) { checkInterrupt(); int ret = BZ2_bzDecompress(&strm); if (ret != BZ_OK && ret != BZ_STREAM_END) { throw CompressionError("error while decompressing bzip2 file"); } finished = ret == BZ_STREAM_END; if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { nextSink(outbuf, sizeof(outbuf) - strm.avail_out); strm.next_out = reinterpret_cast<char*>(outbuf); strm.avail_out = sizeof(outbuf); } } } }; struct BrotliDecompressionSink : ChunkedCompressionSink { Sink& nextSink; BrotliDecoderState* state; bool finished = false; explicit BrotliDecompressionSink(Sink& nextSink) : nextSink(nextSink) { state = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr); if (state == nullptr) { throw CompressionError("unable to initialize brotli decoder"); } } ~BrotliDecompressionSink() override { BrotliDecoderDestroyInstance(state); } void finish() override { flush(); writeInternal(nullptr, 0); } void writeInternal(const unsigned char* data, size_t len) override { const uint8_t* next_in = data; size_t avail_in = len; uint8_t* next_out = outbuf; size_t avail_out = sizeof(outbuf); while (!finished && ((data == nullptr) || (avail_in != 0u))) { checkInterrupt(); if (BrotliDecoderDecompressStream(state, &avail_in, &next_in, &avail_out, &next_out, nullptr) == 0u) { throw CompressionError("error while decompressing brotli file"); } if (avail_out < sizeof(outbuf) || avail_in == 0) { nextSink(outbuf, sizeof(outbuf) - avail_out); next_out = outbuf; avail_out = sizeof(outbuf); } finished = (BrotliDecoderIsFinished(state) != 0); } } }; ref<std::string> decompress(const std::string& method, const std::string& in) { StringSink ssink; auto sink = makeDecompressionSink(method, ssink); (*sink)(in); sink->finish(); return ssink.s; } ref<CompressionSink> makeDecompressionSink(const std::string& method, Sink& nextSink) { if (method == "none" || method.empty()) { return make_ref<NoneSink>(nextSink); } if (method == "xz") { return make_ref<XzDecompressionSink>(nextSink); } else if (method == "bzip2") { return make_ref<BzipDecompressionSink>(nextSink); } else if (method == "br") { return make_ref<BrotliDecompressionSink>(nextSink); } else { throw UnknownCompressionMethod("unknown compression method '%s'", method); } } struct XzCompressionSink : CompressionSink { Sink& nextSink; uint8_t outbuf[BUFSIZ]{}; lzma_stream strm = LZMA_STREAM_INIT; bool finished = false; XzCompressionSink(Sink& nextSink, bool parallel) : nextSink(nextSink) { lzma_ret ret{}; bool done = false; if (parallel) { lzma_mt mt_options = {}; mt_options.flags = 0; mt_options.timeout = 300; // Using the same setting as the xz cmd line mt_options.preset = LZMA_PRESET_DEFAULT; mt_options.filters = NULL; mt_options.check = LZMA_CHECK_CRC64; mt_options.threads = lzma_cputhreads(); mt_options.block_size = 0; if (mt_options.threads == 0) { mt_options.threads = 1; } // FIXME: maybe use lzma_stream_encoder_mt_memusage() to control the // number of threads. ret = lzma_stream_encoder_mt(&strm, &mt_options); done = true; } if (!done) { ret = lzma_easy_encoder(&strm, 6, LZMA_CHECK_CRC64); } if (ret != LZMA_OK) { throw CompressionError("unable to initialise lzma encoder"); } // FIXME: apply the x86 BCJ filter? strm.next_out = outbuf; strm.avail_out = sizeof(outbuf); } ~XzCompressionSink() override { lzma_end(&strm); } void finish() override { CompressionSink::flush(); write(nullptr, 0); } void write(const unsigned char* data, size_t len) override { strm.next_in = data; strm.avail_in = len; while (!finished && ((data == nullptr) || (strm.avail_in != 0u))) { checkInterrupt(); lzma_ret ret = lzma_code(&strm, data != nullptr ? LZMA_RUN : LZMA_FINISH); if (ret != LZMA_OK && ret != LZMA_STREAM_END) { throw CompressionError("error %d while compressing xz file", ret); } finished = ret == LZMA_STREAM_END; if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { nextSink(outbuf, sizeof(outbuf) - strm.avail_out); strm.next_out = outbuf; strm.avail_out = sizeof(outbuf); } } } }; struct BzipCompressionSink : ChunkedCompressionSink { Sink& nextSink; bz_stream strm{}; bool finished = false; explicit BzipCompressionSink(Sink& nextSink) : nextSink(nextSink) { memset(&strm, 0, sizeof(strm)); int ret = BZ2_bzCompressInit(&strm, 9, 0, 30); if (ret != BZ_OK) { throw CompressionError("unable to initialise bzip2 encoder"); } strm.next_out = reinterpret_cast<char*>(outbuf); strm.avail_out = sizeof(outbuf); } ~BzipCompressionSink() override { BZ2_bzCompressEnd(&strm); } void finish() override { flush(); writeInternal(nullptr, 0); } void writeInternal(const unsigned char* data, size_t len) override { assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max()); strm.next_in = (char*)data; strm.avail_in = len; while (!finished && ((data == nullptr) || (strm.avail_in != 0u))) { checkInterrupt(); int ret = BZ2_bzCompress(&strm, data != nullptr ? BZ_RUN : BZ_FINISH); if (ret != BZ_RUN_OK && ret != BZ_FINISH_OK && ret != BZ_STREAM_END) { throw CompressionError("error %d while compressing bzip2 file", ret); } finished = ret == BZ_STREAM_END; if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { nextSink(outbuf, sizeof(outbuf) - strm.avail_out); strm.next_out = reinterpret_cast<char*>(outbuf); strm.avail_out = sizeof(outbuf); } } } }; struct BrotliCompressionSink : ChunkedCompressionSink { Sink& nextSink; uint8_t outbuf[BUFSIZ]{}; BrotliEncoderState* state; bool finished = false; explicit BrotliCompressionSink(Sink& nextSink) : nextSink(nextSink) { state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr); if (state == nullptr) { throw CompressionError("unable to initialise brotli encoder"); } } ~BrotliCompressionSink() override { BrotliEncoderDestroyInstance(state); } void finish() override { flush(); writeInternal(nullptr, 0); } void writeInternal(const unsigned char* data, size_t len) override { const uint8_t* next_in = data; size_t avail_in = len; uint8_t* next_out = outbuf; size_t avail_out = sizeof(outbuf); while (!finished && ((data == nullptr) || (avail_in != 0u))) { checkInterrupt(); if (BrotliEncoderCompressStream(state, data != nullptr ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH, &avail_in, &next_in, &avail_out, &next_out, nullptr) == 0) { throw CompressionError("error while compressing brotli compression"); } if (avail_out < sizeof(outbuf) || avail_in == 0) { nextSink(outbuf, sizeof(outbuf) - avail_out); next_out = outbuf; avail_out = sizeof(outbuf); } finished = (BrotliEncoderIsFinished(state) != 0); } } }; ref<CompressionSink> makeCompressionSink(const std::string& method, Sink& nextSink, const bool parallel) { if (method == "none") { return make_ref<NoneSink>(nextSink); } if (method == "xz") { return make_ref<XzCompressionSink>(nextSink, parallel); } else if (method == "bzip2") { return make_ref<BzipCompressionSink>(nextSink); } else if (method == "br") { return make_ref<BrotliCompressionSink>(nextSink); } else { throw UnknownCompressionMethod(format("unknown compression method '%s'") % method); } } ref<std::string> compress(const std::string& method, const std::string& in, const bool parallel) { StringSink ssink; auto sink = makeCompressionSink(method, ssink, parallel); (*sink)(in); sink->finish(); return ssink.s; } } // namespace nix