about summary refs log tree commit diff
path: root/third_party/nix/src/libutil/compression.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/nix/src/libutil/compression.cc')
-rw-r--r--third_party/nix/src/libutil/compression.cc400
1 files changed, 400 insertions, 0 deletions
diff --git a/third_party/nix/src/libutil/compression.cc b/third_party/nix/src/libutil/compression.cc
new file mode 100644
index 000000000000..d0895ca5fda4
--- /dev/null
+++ b/third_party/nix/src/libutil/compression.cc
@@ -0,0 +1,400 @@
+#include "libutil/compression.hh"
+
+#include <cstdio>
+#include <cstring>
+#include <iostream>
+
+#include <brotli/decode.h>
+#include <brotli/encode.h>
+#include <bzlib.h>
+#include <glog/logging.h>
+#include <lzma.h>
+
+#include "libutil/finally.hh"
+#include "libutil/util.hh"
+
+namespace nix {
+
+// Don't feed brotli too much at once.
+struct ChunkedCompressionSink : CompressionSink {
+  uint8_t outbuf[32 * 1024];
+
+  void write(const unsigned char* data, size_t len) override {
+    const size_t CHUNK_SIZE = sizeof(outbuf) << 2;
+    while (len != 0u) {
+      size_t n = std::min(CHUNK_SIZE, len);
+      writeInternal(data, n);
+      data += n;
+      len -= n;
+    }
+  }
+
+  virtual void writeInternal(const unsigned char* data, size_t len) = 0;
+};
+
+struct NoneSink : CompressionSink {
+  Sink& nextSink;
+  explicit NoneSink(Sink& nextSink) : nextSink(nextSink) {}
+  void finish() override { flush(); }
+  void write(const unsigned char* data, size_t len) override {
+    nextSink(data, len);
+  }
+};
+
+struct XzDecompressionSink : CompressionSink {
+  Sink& nextSink;
+  uint8_t outbuf[BUFSIZ];
+  lzma_stream strm = LZMA_STREAM_INIT;
+  bool finished = false;
+
+  explicit XzDecompressionSink(Sink& nextSink) : nextSink(nextSink) {
+    lzma_ret ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED);
+    if (ret != LZMA_OK) {
+      throw CompressionError("unable to initialise lzma decoder");
+    }
+
+    strm.next_out = outbuf;
+    strm.avail_out = sizeof(outbuf);
+  }
+
+  ~XzDecompressionSink() override { lzma_end(&strm); }
+
+  void finish() override {
+    CompressionSink::flush();
+    write(nullptr, 0);
+  }
+
+  void write(const unsigned char* data, size_t len) override {
+    strm.next_in = data;
+    strm.avail_in = len;
+
+    while (!finished && ((data == nullptr) || (strm.avail_in != 0u))) {
+      checkInterrupt();
+
+      lzma_ret ret = lzma_code(&strm, data != nullptr ? LZMA_RUN : LZMA_FINISH);
+      if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
+        throw CompressionError("error %d while decompressing xz file", ret);
+      }
+
+      finished = ret == LZMA_STREAM_END;
+
+      if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
+        nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
+        strm.next_out = outbuf;
+        strm.avail_out = sizeof(outbuf);
+      }
+    }
+  }
+};
+
+struct BzipDecompressionSink : ChunkedCompressionSink {
+  Sink& nextSink;
+  bz_stream strm;
+  bool finished = false;
+
+  explicit BzipDecompressionSink(Sink& nextSink) : nextSink(nextSink) {
+    memset(&strm, 0, sizeof(strm));
+    int ret = BZ2_bzDecompressInit(&strm, 0, 0);
+    if (ret != BZ_OK) {
+      throw CompressionError("unable to initialise bzip2 decoder");
+    }
+
+    strm.next_out = reinterpret_cast<char*>(outbuf);
+    strm.avail_out = sizeof(outbuf);
+  }
+
+  ~BzipDecompressionSink() override { BZ2_bzDecompressEnd(&strm); }
+
+  void finish() override {
+    flush();
+    write(nullptr, 0);
+  }
+
+  void writeInternal(const unsigned char* data, size_t len) override {
+    assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max());
+
+    strm.next_in = (char*)data;
+    strm.avail_in = len;
+
+    while (strm.avail_in != 0u) {
+      checkInterrupt();
+
+      int ret = BZ2_bzDecompress(&strm);
+      if (ret != BZ_OK && ret != BZ_STREAM_END) {
+        throw CompressionError("error while decompressing bzip2 file");
+      }
+
+      finished = ret == BZ_STREAM_END;
+
+      if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
+        nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
+        strm.next_out = reinterpret_cast<char*>(outbuf);
+        strm.avail_out = sizeof(outbuf);
+      }
+    }
+  }
+};
+
+struct BrotliDecompressionSink : ChunkedCompressionSink {
+  Sink& nextSink;
+  BrotliDecoderState* state;
+  bool finished = false;
+
+  explicit BrotliDecompressionSink(Sink& nextSink) : nextSink(nextSink) {
+    state = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
+    if (state == nullptr) {
+      throw CompressionError("unable to initialize brotli decoder");
+    }
+  }
+
+  ~BrotliDecompressionSink() override { BrotliDecoderDestroyInstance(state); }
+
+  void finish() override {
+    flush();
+    writeInternal(nullptr, 0);
+  }
+
+  void writeInternal(const unsigned char* data, size_t len) override {
+    const uint8_t* next_in = data;
+    size_t avail_in = len;
+    uint8_t* next_out = outbuf;
+    size_t avail_out = sizeof(outbuf);
+
+    while (!finished && ((data == nullptr) || (avail_in != 0u))) {
+      checkInterrupt();
+
+      if (BrotliDecoderDecompressStream(state, &avail_in, &next_in, &avail_out,
+                                        &next_out, nullptr) == 0u) {
+        throw CompressionError("error while decompressing brotli file");
+      }
+
+      if (avail_out < sizeof(outbuf) || avail_in == 0) {
+        nextSink(outbuf, sizeof(outbuf) - avail_out);
+        next_out = outbuf;
+        avail_out = sizeof(outbuf);
+      }
+
+      finished = (BrotliDecoderIsFinished(state) != 0);
+    }
+  }
+};
+
+ref<std::string> decompress(const std::string& method, const std::string& in) {
+  StringSink ssink;
+  auto sink = makeDecompressionSink(method, ssink);
+  (*sink)(in);
+  sink->finish();
+  return ssink.s;
+}
+
+ref<CompressionSink> makeDecompressionSink(const std::string& method,
+                                           Sink& nextSink) {
+  if (method == "none" || method.empty()) {
+    return make_ref<NoneSink>(nextSink);
+  }
+  if (method == "xz") {
+    return make_ref<XzDecompressionSink>(nextSink);
+  } else if (method == "bzip2") {
+    return make_ref<BzipDecompressionSink>(nextSink);
+  } else if (method == "br") {
+    return make_ref<BrotliDecompressionSink>(nextSink);
+  } else {
+    throw UnknownCompressionMethod("unknown compression method '%s'", method);
+  }
+}
+
+struct XzCompressionSink : CompressionSink {
+  Sink& nextSink;
+  uint8_t outbuf[BUFSIZ];
+  lzma_stream strm = LZMA_STREAM_INIT;
+  bool finished = false;
+
+  XzCompressionSink(Sink& nextSink, bool parallel) : nextSink(nextSink) {
+    lzma_ret ret;
+    bool done = false;
+
+    if (parallel) {
+      lzma_mt mt_options = {};
+      mt_options.flags = 0;
+      mt_options.timeout = 300;  // Using the same setting as the xz cmd line
+      mt_options.preset = LZMA_PRESET_DEFAULT;
+      mt_options.filters = NULL;
+      mt_options.check = LZMA_CHECK_CRC64;
+      mt_options.threads = lzma_cputhreads();
+      mt_options.block_size = 0;
+      if (mt_options.threads == 0) {
+        mt_options.threads = 1;
+      }
+      // FIXME: maybe use lzma_stream_encoder_mt_memusage() to control the
+      // number of threads.
+      ret = lzma_stream_encoder_mt(&strm, &mt_options);
+      done = true;
+    }
+
+    if (!done) {
+      ret = lzma_easy_encoder(&strm, 6, LZMA_CHECK_CRC64);
+    }
+
+    if (ret != LZMA_OK) {
+      throw CompressionError("unable to initialise lzma encoder");
+    }
+
+    // FIXME: apply the x86 BCJ filter?
+
+    strm.next_out = outbuf;
+    strm.avail_out = sizeof(outbuf);
+  }
+
+  ~XzCompressionSink() override { lzma_end(&strm); }
+
+  void finish() override {
+    CompressionSink::flush();
+    write(nullptr, 0);
+  }
+
+  void write(const unsigned char* data, size_t len) override {
+    strm.next_in = data;
+    strm.avail_in = len;
+
+    while (!finished && ((data == nullptr) || (strm.avail_in != 0u))) {
+      checkInterrupt();
+
+      lzma_ret ret = lzma_code(&strm, data != nullptr ? LZMA_RUN : LZMA_FINISH);
+      if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
+        throw CompressionError("error %d while compressing xz file", ret);
+      }
+
+      finished = ret == LZMA_STREAM_END;
+
+      if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
+        nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
+        strm.next_out = outbuf;
+        strm.avail_out = sizeof(outbuf);
+      }
+    }
+  }
+};
+
+struct BzipCompressionSink : ChunkedCompressionSink {
+  Sink& nextSink;
+  bz_stream strm;
+  bool finished = false;
+
+  explicit BzipCompressionSink(Sink& nextSink) : nextSink(nextSink) {
+    memset(&strm, 0, sizeof(strm));
+    int ret = BZ2_bzCompressInit(&strm, 9, 0, 30);
+    if (ret != BZ_OK) {
+      throw CompressionError("unable to initialise bzip2 encoder");
+    }
+
+    strm.next_out = reinterpret_cast<char*>(outbuf);
+    strm.avail_out = sizeof(outbuf);
+  }
+
+  ~BzipCompressionSink() override { BZ2_bzCompressEnd(&strm); }
+
+  void finish() override {
+    flush();
+    writeInternal(nullptr, 0);
+  }
+
+  void writeInternal(const unsigned char* data, size_t len) override {
+    assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max());
+
+    strm.next_in = (char*)data;
+    strm.avail_in = len;
+
+    while (!finished && ((data == nullptr) || (strm.avail_in != 0u))) {
+      checkInterrupt();
+
+      int ret = BZ2_bzCompress(&strm, data != nullptr ? BZ_RUN : BZ_FINISH);
+      if (ret != BZ_RUN_OK && ret != BZ_FINISH_OK && ret != BZ_STREAM_END) {
+        throw CompressionError("error %d while compressing bzip2 file", ret);
+      }
+
+      finished = ret == BZ_STREAM_END;
+
+      if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
+        nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
+        strm.next_out = reinterpret_cast<char*>(outbuf);
+        strm.avail_out = sizeof(outbuf);
+      }
+    }
+  }
+};
+
+struct BrotliCompressionSink : ChunkedCompressionSink {
+  Sink& nextSink;
+  uint8_t outbuf[BUFSIZ];
+  BrotliEncoderState* state;
+  bool finished = false;
+
+  explicit BrotliCompressionSink(Sink& nextSink) : nextSink(nextSink) {
+    state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr);
+    if (state == nullptr) {
+      throw CompressionError("unable to initialise brotli encoder");
+    }
+  }
+
+  ~BrotliCompressionSink() override { BrotliEncoderDestroyInstance(state); }
+
+  void finish() override {
+    flush();
+    writeInternal(nullptr, 0);
+  }
+
+  void writeInternal(const unsigned char* data, size_t len) override {
+    const uint8_t* next_in = data;
+    size_t avail_in = len;
+    uint8_t* next_out = outbuf;
+    size_t avail_out = sizeof(outbuf);
+
+    while (!finished && ((data == nullptr) || (avail_in != 0u))) {
+      checkInterrupt();
+
+      if (BrotliEncoderCompressStream(state,
+                                      data != nullptr ? BROTLI_OPERATION_PROCESS
+                                                      : BROTLI_OPERATION_FINISH,
+                                      &avail_in, &next_in, &avail_out,
+                                      &next_out, nullptr) == 0) {
+        throw CompressionError("error while compressing brotli compression");
+      }
+
+      if (avail_out < sizeof(outbuf) || avail_in == 0) {
+        nextSink(outbuf, sizeof(outbuf) - avail_out);
+        next_out = outbuf;
+        avail_out = sizeof(outbuf);
+      }
+
+      finished = (BrotliEncoderIsFinished(state) != 0);
+    }
+  }
+};
+
+ref<CompressionSink> makeCompressionSink(const std::string& method,
+                                         Sink& nextSink, const bool parallel) {
+  if (method == "none") {
+    return make_ref<NoneSink>(nextSink);
+  }
+  if (method == "xz") {
+    return make_ref<XzCompressionSink>(nextSink, parallel);
+  } else if (method == "bzip2") {
+    return make_ref<BzipCompressionSink>(nextSink);
+  } else if (method == "br") {
+    return make_ref<BrotliCompressionSink>(nextSink);
+  } else {
+    throw UnknownCompressionMethod(format("unknown compression method '%s'") %
+                                   method);
+  }
+}
+
+ref<std::string> compress(const std::string& method, const std::string& in,
+                          const bool parallel) {
+  StringSink ssink;
+  auto sink = makeCompressionSink(method, ssink, parallel);
+  (*sink)(in);
+  sink->finish();
+  return ssink.s;
+}
+
+}  // namespace nix