#include "compression.hh" #include <cstdio> #include <cstring> #include <iostream> #include <brotli/decode.h> #include <brotli/encode.h> #include <bzlib.h> #include <lzma.h> #include "finally.hh" #include "glog/logging.h" #include "util.hh" namespace nix { // Don't feed brotli too much at once. struct ChunkedCompressionSink : CompressionSink { uint8_t outbuf[32 * 1024]; void write(const unsigned char* data, size_t len) override { const size_t CHUNK_SIZE = sizeof(outbuf) << 2; while (len) { size_t n = std::min(CHUNK_SIZE, len); writeInternal(data, n); data += n; len -= n; } } virtual void writeInternal(const unsigned char* data, size_t len) = 0; }; struct NoneSink : CompressionSink { Sink& nextSink; NoneSink(Sink& nextSink) : nextSink(nextSink) {} void finish() override { flush(); } void write(const unsigned char* data, size_t len) override { nextSink(data, len); } }; struct XzDecompressionSink : CompressionSink { Sink& nextSink; uint8_t outbuf[BUFSIZ]; lzma_stream strm = LZMA_STREAM_INIT; bool finished = false; XzDecompressionSink(Sink& nextSink) : nextSink(nextSink) { lzma_ret ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED); if (ret != LZMA_OK) throw CompressionError("unable to initialise lzma decoder"); strm.next_out = outbuf; strm.avail_out = sizeof(outbuf); } ~XzDecompressionSink() { lzma_end(&strm); } void finish() override { CompressionSink::flush(); write(nullptr, 0); } void write(const unsigned char* data, size_t len) override { strm.next_in = data; strm.avail_in = len; while (!finished && (!data || strm.avail_in)) { checkInterrupt(); lzma_ret ret = lzma_code(&strm, data ? LZMA_RUN : LZMA_FINISH); if (ret != LZMA_OK && ret != LZMA_STREAM_END) throw CompressionError("error %d while decompressing xz file", ret); finished = ret == LZMA_STREAM_END; if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { nextSink(outbuf, sizeof(outbuf) - strm.avail_out); strm.next_out = outbuf; strm.avail_out = sizeof(outbuf); } } } }; struct BzipDecompressionSink : ChunkedCompressionSink { Sink& nextSink; bz_stream strm; bool finished = false; BzipDecompressionSink(Sink& nextSink) : nextSink(nextSink) { memset(&strm, 0, sizeof(strm)); int ret = BZ2_bzDecompressInit(&strm, 0, 0); if (ret != BZ_OK) throw CompressionError("unable to initialise bzip2 decoder"); strm.next_out = (char*)outbuf; strm.avail_out = sizeof(outbuf); } ~BzipDecompressionSink() { BZ2_bzDecompressEnd(&strm); } void finish() override { flush(); write(nullptr, 0); } void writeInternal(const unsigned char* data, size_t len) override { assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max()); strm.next_in = (char*)data; strm.avail_in = len; while (strm.avail_in) { checkInterrupt(); int ret = BZ2_bzDecompress(&strm); if (ret != BZ_OK && ret != BZ_STREAM_END) throw CompressionError("error while decompressing bzip2 file"); finished = ret == BZ_STREAM_END; if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { nextSink(outbuf, sizeof(outbuf) - strm.avail_out); strm.next_out = (char*)outbuf; strm.avail_out = sizeof(outbuf); } } } }; struct BrotliDecompressionSink : ChunkedCompressionSink { Sink& nextSink; BrotliDecoderState* state; bool finished = false; BrotliDecompressionSink(Sink& nextSink) : nextSink(nextSink) { state = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr); if (!state) { throw CompressionError("unable to initialize brotli decoder"); } } ~BrotliDecompressionSink() { BrotliDecoderDestroyInstance(state); } void finish() override { flush(); writeInternal(nullptr, 0); } void writeInternal(const unsigned char* data, size_t len) override { const uint8_t* next_in = data; size_t avail_in = len; uint8_t* next_out = outbuf; size_t avail_out = sizeof(outbuf); while (!finished && (!data || avail_in)) { checkInterrupt(); if (!BrotliDecoderDecompressStream(state, &avail_in, &next_in, &avail_out, &next_out, nullptr)) throw CompressionError("error while decompressing brotli file"); if (avail_out < sizeof(outbuf) || avail_in == 0) { nextSink(outbuf, sizeof(outbuf) - avail_out); next_out = outbuf; avail_out = sizeof(outbuf); } finished = BrotliDecoderIsFinished(state); } } }; ref<std::string> decompress(const std::string& method, const std::string& in) { StringSink ssink; auto sink = makeDecompressionSink(method, ssink); (*sink)(in); sink->finish(); return ssink.s; } ref<CompressionSink> makeDecompressionSink(const std::string& method, Sink& nextSink) { if (method == "none" || method == "") return make_ref<NoneSink>(nextSink); else if (method == "xz") return make_ref<XzDecompressionSink>(nextSink); else if (method == "bzip2") return make_ref<BzipDecompressionSink>(nextSink); else if (method == "br") return make_ref<BrotliDecompressionSink>(nextSink); else throw UnknownCompressionMethod("unknown compression method '%s'", method); } struct XzCompressionSink : CompressionSink { Sink& nextSink; uint8_t outbuf[BUFSIZ]; lzma_stream strm = LZMA_STREAM_INIT; bool finished = false; XzCompressionSink(Sink& nextSink, bool parallel) : nextSink(nextSink) { lzma_ret ret; bool done = false; if (parallel) { #ifdef HAVE_LZMA_MT lzma_mt mt_options = {}; mt_options.flags = 0; mt_options.timeout = 300; // Using the same setting as the xz cmd line mt_options.preset = LZMA_PRESET_DEFAULT; mt_options.filters = NULL; mt_options.check = LZMA_CHECK_CRC64; mt_options.threads = lzma_cputhreads(); mt_options.block_size = 0; if (mt_options.threads == 0) { mt_options.threads = 1; } // FIXME: maybe use lzma_stream_encoder_mt_memusage() to control the // number of threads. ret = lzma_stream_encoder_mt(&strm, &mt_options); done = true; #else LOG(ERROR) << "parallel XZ compression requested but not supported, " << "falling back to single-threaded compression"; #endif } if (!done) { ret = lzma_easy_encoder(&strm, 6, LZMA_CHECK_CRC64); } if (ret != LZMA_OK) { throw CompressionError("unable to initialise lzma encoder"); } // FIXME: apply the x86 BCJ filter? strm.next_out = outbuf; strm.avail_out = sizeof(outbuf); } ~XzCompressionSink() { lzma_end(&strm); } void finish() override { CompressionSink::flush(); write(nullptr, 0); } void write(const unsigned char* data, size_t len) override { strm.next_in = data; strm.avail_in = len; while (!finished && (!data || strm.avail_in)) { checkInterrupt(); lzma_ret ret = lzma_code(&strm, data ? LZMA_RUN : LZMA_FINISH); if (ret != LZMA_OK && ret != LZMA_STREAM_END) throw CompressionError("error %d while compressing xz file", ret); finished = ret == LZMA_STREAM_END; if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { nextSink(outbuf, sizeof(outbuf) - strm.avail_out); strm.next_out = outbuf; strm.avail_out = sizeof(outbuf); } } } }; struct BzipCompressionSink : ChunkedCompressionSink { Sink& nextSink; bz_stream strm; bool finished = false; BzipCompressionSink(Sink& nextSink) : nextSink(nextSink) { memset(&strm, 0, sizeof(strm)); int ret = BZ2_bzCompressInit(&strm, 9, 0, 30); if (ret != BZ_OK) throw CompressionError("unable to initialise bzip2 encoder"); strm.next_out = (char*)outbuf; strm.avail_out = sizeof(outbuf); } ~BzipCompressionSink() { BZ2_bzCompressEnd(&strm); } void finish() override { flush(); writeInternal(nullptr, 0); } void writeInternal(const unsigned char* data, size_t len) override { assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max()); strm.next_in = (char*)data; strm.avail_in = len; while (!finished && (!data || strm.avail_in)) { checkInterrupt(); int ret = BZ2_bzCompress(&strm, data ? BZ_RUN : BZ_FINISH); if (ret != BZ_RUN_OK && ret != BZ_FINISH_OK && ret != BZ_STREAM_END) throw CompressionError("error %d while compressing bzip2 file", ret); finished = ret == BZ_STREAM_END; if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { nextSink(outbuf, sizeof(outbuf) - strm.avail_out); strm.next_out = (char*)outbuf; strm.avail_out = sizeof(outbuf); } } } }; struct BrotliCompressionSink : ChunkedCompressionSink { Sink& nextSink; uint8_t outbuf[BUFSIZ]; BrotliEncoderState* state; bool finished = false; BrotliCompressionSink(Sink& nextSink) : nextSink(nextSink) { state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr); if (!state) { throw CompressionError("unable to initialise brotli encoder"); } } ~BrotliCompressionSink() { BrotliEncoderDestroyInstance(state); } void finish() override { flush(); writeInternal(nullptr, 0); } void writeInternal(const unsigned char* data, size_t len) override { const uint8_t* next_in = data; size_t avail_in = len; uint8_t* next_out = outbuf; size_t avail_out = sizeof(outbuf); while (!finished && (!data || avail_in)) { checkInterrupt(); if (!BrotliEncoderCompressStream( state, data ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH, &avail_in, &next_in, &avail_out, &next_out, nullptr)) throw CompressionError("error while compressing brotli compression"); if (avail_out < sizeof(outbuf) || avail_in == 0) { nextSink(outbuf, sizeof(outbuf) - avail_out); next_out = outbuf; avail_out = sizeof(outbuf); } finished = BrotliEncoderIsFinished(state); } } }; ref<CompressionSink> makeCompressionSink(const std::string& method, Sink& nextSink, const bool parallel) { if (method == "none") return make_ref<NoneSink>(nextSink); else if (method == "xz") return make_ref<XzCompressionSink>(nextSink, parallel); else if (method == "bzip2") return make_ref<BzipCompressionSink>(nextSink); else if (method == "br") return make_ref<BrotliCompressionSink>(nextSink); else throw UnknownCompressionMethod(format("unknown compression method '%s'") % method); } ref<std::string> compress(const std::string& method, const std::string& in, const bool parallel) { StringSink ssink; auto sink = makeCompressionSink(method, ssink, parallel); (*sink)(in); sink->finish(); return ssink.s; } } // namespace nix