about summary refs log blame commit diff
path: root/third_party/nix/src/libutil/compression.cc
blob: d0895ca5fda4a7a63ace864232354e24596e5e5d (plain) (tree)
1
2
3
4
5
6
7
8
9
                                 




                   

                          
                  
                         
                 
 

                             
 
               
 
                                      
                                                 
                            


                                                              
                       



                                           
     
   
 
                                                                        

  

                                   
                                                           



                                                              
  
 

                                              
                         

                                      
 
                                                                     
                                                                             
                         
                                                                  
     
 


                                    
 
                                                      
 



                             
 


                                                              
 
                                                                       
                       
 
                                                                                
                                                     
                                                                            
       
 
                                        
 




                                                                  
     
   
  
 

                                                       
                 
                        
 
                                                                       

                                                
                       
                                                                   
     
 
                                                    

                                    
 
                                                                   
 



                          
 

                                                                       
 

                               
 
                                 
                       
 
                                        
                                                 
                                                                       
       
 
                                      
 

                                                                  
                                                        

                                        
     
   
  
 



                                                         
 
                                                                         
                                                                   
                           

                                                                    
   
 
                                                                              











                                                                      
                                                                  

                       

                                                                               
                                                                        
       





                                                        
 
                                                       
     
   
  
 





                                                                               

 

                                                                     
                                           
                                        

                       
                                                   
                                 
                                                     
                              
                                                       
          
                                                                              
   

 

                                            
                         

                                      
 
                                                                         
                 
                      
 
                   







                                                                             


                                    



                                                                          
     
 


                                                          
 
                         
                                                                  
     
 
                                       
 


                                    
 
                                                    
 



                             
 


                                                              
 
                                                                       
                       
 
                                                                                
                                                     
                                                                          
       
 
                                        
 




                                                                  
     
   

  

                                                     
                 
                        
 
                                                                     

                                                  
                       
                                                                   
     
 
                                                    

                                    
 
                                                               
 



                              
 

                                                                       
 

                               
 
                                                                       
                       
 
                                                                            
                                                                            
                                                                             
       
 
                                      
 

                                                                  
                                                        

                                        
     
   

  

                                                       
                         


                            
                                                                       
                                                                   
                           

                                                                    

   
                                                                            











                                                                      
                                                                  

                       




                                                                                
                                                                             
       






                                                        
                                                       
     
   

  

                                                                               
                         
                                        

                       
                                                           
                                 
                                                   
                              
                                                     
          

                                                                              
   

 






                                                                           

 
                   
#include "libutil/compression.hh"

#include <cstdio>
#include <cstring>
#include <iostream>

#include <brotli/decode.h>
#include <brotli/encode.h>
#include <bzlib.h>
#include <glog/logging.h>
#include <lzma.h>

#include "libutil/finally.hh"
#include "libutil/util.hh"

namespace nix {

// Don't feed brotli too much at once.
struct ChunkedCompressionSink : CompressionSink {
  uint8_t outbuf[32 * 1024];

  void write(const unsigned char* data, size_t len) override {
    const size_t CHUNK_SIZE = sizeof(outbuf) << 2;
    while (len != 0u) {
      size_t n = std::min(CHUNK_SIZE, len);
      writeInternal(data, n);
      data += n;
      len -= n;
    }
  }

  virtual void writeInternal(const unsigned char* data, size_t len) = 0;
};

struct NoneSink : CompressionSink {
  Sink& nextSink;
  explicit NoneSink(Sink& nextSink) : nextSink(nextSink) {}
  void finish() override { flush(); }
  void write(const unsigned char* data, size_t len) override {
    nextSink(data, len);
  }
};

struct XzDecompressionSink : CompressionSink {
  Sink& nextSink;
  uint8_t outbuf[BUFSIZ];
  lzma_stream strm = LZMA_STREAM_INIT;
  bool finished = false;

  explicit XzDecompressionSink(Sink& nextSink) : nextSink(nextSink) {
    lzma_ret ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED);
    if (ret != LZMA_OK) {
      throw CompressionError("unable to initialise lzma decoder");
    }

    strm.next_out = outbuf;
    strm.avail_out = sizeof(outbuf);
  }

  ~XzDecompressionSink() override { lzma_end(&strm); }

  void finish() override {
    CompressionSink::flush();
    write(nullptr, 0);
  }

  void write(const unsigned char* data, size_t len) override {
    strm.next_in = data;
    strm.avail_in = len;

    while (!finished && ((data == nullptr) || (strm.avail_in != 0u))) {
      checkInterrupt();

      lzma_ret ret = lzma_code(&strm, data != nullptr ? LZMA_RUN : LZMA_FINISH);
      if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
        throw CompressionError("error %d while decompressing xz file", ret);
      }

      finished = ret == LZMA_STREAM_END;

      if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
        nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
        strm.next_out = outbuf;
        strm.avail_out = sizeof(outbuf);
      }
    }
  }
};

struct BzipDecompressionSink : ChunkedCompressionSink {
  Sink& nextSink;
  bz_stream strm;
  bool finished = false;

  explicit BzipDecompressionSink(Sink& nextSink) : nextSink(nextSink) {
    memset(&strm, 0, sizeof(strm));
    int ret = BZ2_bzDecompressInit(&strm, 0, 0);
    if (ret != BZ_OK) {
      throw CompressionError("unable to initialise bzip2 decoder");
    }

    strm.next_out = reinterpret_cast<char*>(outbuf);
    strm.avail_out = sizeof(outbuf);
  }

  ~BzipDecompressionSink() override { BZ2_bzDecompressEnd(&strm); }

  void finish() override {
    flush();
    write(nullptr, 0);
  }

  void writeInternal(const unsigned char* data, size_t len) override {
    assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max());

    strm.next_in = (char*)data;
    strm.avail_in = len;

    while (strm.avail_in != 0u) {
      checkInterrupt();

      int ret = BZ2_bzDecompress(&strm);
      if (ret != BZ_OK && ret != BZ_STREAM_END) {
        throw CompressionError("error while decompressing bzip2 file");
      }

      finished = ret == BZ_STREAM_END;

      if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
        nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
        strm.next_out = reinterpret_cast<char*>(outbuf);
        strm.avail_out = sizeof(outbuf);
      }
    }
  }
};

struct BrotliDecompressionSink : ChunkedCompressionSink {
  Sink& nextSink;
  BrotliDecoderState* state;
  bool finished = false;

  explicit BrotliDecompressionSink(Sink& nextSink) : nextSink(nextSink) {
    state = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
    if (state == nullptr) {
      throw CompressionError("unable to initialize brotli decoder");
    }
  }

  ~BrotliDecompressionSink() override { BrotliDecoderDestroyInstance(state); }

  void finish() override {
    flush();
    writeInternal(nullptr, 0);
  }

  void writeInternal(const unsigned char* data, size_t len) override {
    const uint8_t* next_in = data;
    size_t avail_in = len;
    uint8_t* next_out = outbuf;
    size_t avail_out = sizeof(outbuf);

    while (!finished && ((data == nullptr) || (avail_in != 0u))) {
      checkInterrupt();

      if (BrotliDecoderDecompressStream(state, &avail_in, &next_in, &avail_out,
                                        &next_out, nullptr) == 0u) {
        throw CompressionError("error while decompressing brotli file");
      }

      if (avail_out < sizeof(outbuf) || avail_in == 0) {
        nextSink(outbuf, sizeof(outbuf) - avail_out);
        next_out = outbuf;
        avail_out = sizeof(outbuf);
      }

      finished = (BrotliDecoderIsFinished(state) != 0);
    }
  }
};

ref<std::string> decompress(const std::string& method, const std::string& in) {
  StringSink ssink;
  auto sink = makeDecompressionSink(method, ssink);
  (*sink)(in);
  sink->finish();
  return ssink.s;
}

ref<CompressionSink> makeDecompressionSink(const std::string& method,
                                           Sink& nextSink) {
  if (method == "none" || method.empty()) {
    return make_ref<NoneSink>(nextSink);
  }
  if (method == "xz") {
    return make_ref<XzDecompressionSink>(nextSink);
  } else if (method == "bzip2") {
    return make_ref<BzipDecompressionSink>(nextSink);
  } else if (method == "br") {
    return make_ref<BrotliDecompressionSink>(nextSink);
  } else {
    throw UnknownCompressionMethod("unknown compression method '%s'", method);
  }
}

struct XzCompressionSink : CompressionSink {
  Sink& nextSink;
  uint8_t outbuf[BUFSIZ];
  lzma_stream strm = LZMA_STREAM_INIT;
  bool finished = false;

  XzCompressionSink(Sink& nextSink, bool parallel) : nextSink(nextSink) {
    lzma_ret ret;
    bool done = false;

    if (parallel) {
      lzma_mt mt_options = {};
      mt_options.flags = 0;
      mt_options.timeout = 300;  // Using the same setting as the xz cmd line
      mt_options.preset = LZMA_PRESET_DEFAULT;
      mt_options.filters = NULL;
      mt_options.check = LZMA_CHECK_CRC64;
      mt_options.threads = lzma_cputhreads();
      mt_options.block_size = 0;
      if (mt_options.threads == 0) {
        mt_options.threads = 1;
      }
      // FIXME: maybe use lzma_stream_encoder_mt_memusage() to control the
      // number of threads.
      ret = lzma_stream_encoder_mt(&strm, &mt_options);
      done = true;
    }

    if (!done) {
      ret = lzma_easy_encoder(&strm, 6, LZMA_CHECK_CRC64);
    }

    if (ret != LZMA_OK) {
      throw CompressionError("unable to initialise lzma encoder");
    }

    // FIXME: apply the x86 BCJ filter?

    strm.next_out = outbuf;
    strm.avail_out = sizeof(outbuf);
  }

  ~XzCompressionSink() override { lzma_end(&strm); }

  void finish() override {
    CompressionSink::flush();
    write(nullptr, 0);
  }

  void write(const unsigned char* data, size_t len) override {
    strm.next_in = data;
    strm.avail_in = len;

    while (!finished && ((data == nullptr) || (strm.avail_in != 0u))) {
      checkInterrupt();

      lzma_ret ret = lzma_code(&strm, data != nullptr ? LZMA_RUN : LZMA_FINISH);
      if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
        throw CompressionError("error %d while compressing xz file", ret);
      }

      finished = ret == LZMA_STREAM_END;

      if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
        nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
        strm.next_out = outbuf;
        strm.avail_out = sizeof(outbuf);
      }
    }
  }
};

struct BzipCompressionSink : ChunkedCompressionSink {
  Sink& nextSink;
  bz_stream strm;
  bool finished = false;

  explicit BzipCompressionSink(Sink& nextSink) : nextSink(nextSink) {
    memset(&strm, 0, sizeof(strm));
    int ret = BZ2_bzCompressInit(&strm, 9, 0, 30);
    if (ret != BZ_OK) {
      throw CompressionError("unable to initialise bzip2 encoder");
    }

    strm.next_out = reinterpret_cast<char*>(outbuf);
    strm.avail_out = sizeof(outbuf);
  }

  ~BzipCompressionSink() override { BZ2_bzCompressEnd(&strm); }

  void finish() override {
    flush();
    writeInternal(nullptr, 0);
  }

  void writeInternal(const unsigned char* data, size_t len) override {
    assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max());

    strm.next_in = (char*)data;
    strm.avail_in = len;

    while (!finished && ((data == nullptr) || (strm.avail_in != 0u))) {
      checkInterrupt();

      int ret = BZ2_bzCompress(&strm, data != nullptr ? BZ_RUN : BZ_FINISH);
      if (ret != BZ_RUN_OK && ret != BZ_FINISH_OK && ret != BZ_STREAM_END) {
        throw CompressionError("error %d while compressing bzip2 file", ret);
      }

      finished = ret == BZ_STREAM_END;

      if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) {
        nextSink(outbuf, sizeof(outbuf) - strm.avail_out);
        strm.next_out = reinterpret_cast<char*>(outbuf);
        strm.avail_out = sizeof(outbuf);
      }
    }
  }
};

struct BrotliCompressionSink : ChunkedCompressionSink {
  Sink& nextSink;
  uint8_t outbuf[BUFSIZ];
  BrotliEncoderState* state;
  bool finished = false;

  explicit BrotliCompressionSink(Sink& nextSink) : nextSink(nextSink) {
    state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr);
    if (state == nullptr) {
      throw CompressionError("unable to initialise brotli encoder");
    }
  }

  ~BrotliCompressionSink() override { BrotliEncoderDestroyInstance(state); }

  void finish() override {
    flush();
    writeInternal(nullptr, 0);
  }

  void writeInternal(const unsigned char* data, size_t len) override {
    const uint8_t* next_in = data;
    size_t avail_in = len;
    uint8_t* next_out = outbuf;
    size_t avail_out = sizeof(outbuf);

    while (!finished && ((data == nullptr) || (avail_in != 0u))) {
      checkInterrupt();

      if (BrotliEncoderCompressStream(state,
                                      data != nullptr ? BROTLI_OPERATION_PROCESS
                                                      : BROTLI_OPERATION_FINISH,
                                      &avail_in, &next_in, &avail_out,
                                      &next_out, nullptr) == 0) {
        throw CompressionError("error while compressing brotli compression");
      }

      if (avail_out < sizeof(outbuf) || avail_in == 0) {
        nextSink(outbuf, sizeof(outbuf) - avail_out);
        next_out = outbuf;
        avail_out = sizeof(outbuf);
      }

      finished = (BrotliEncoderIsFinished(state) != 0);
    }
  }
};

ref<CompressionSink> makeCompressionSink(const std::string& method,
                                         Sink& nextSink, const bool parallel) {
  if (method == "none") {
    return make_ref<NoneSink>(nextSink);
  }
  if (method == "xz") {
    return make_ref<XzCompressionSink>(nextSink, parallel);
  } else if (method == "bzip2") {
    return make_ref<BzipCompressionSink>(nextSink);
  } else if (method == "br") {
    return make_ref<BrotliCompressionSink>(nextSink);
  } else {
    throw UnknownCompressionMethod(format("unknown compression method '%s'") %
                                   method);
  }
}

ref<std::string> compress(const std::string& method, const std::string& in,
                          const bool parallel) {
  StringSink ssink;
  auto sink = makeCompressionSink(method, ssink, parallel);
  (*sink)(in);
  sink->finish();
  return ssink.s;
}

}  // namespace nix