about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAspen Smith <root@gws.fyi>2024-02-23T19·42-0500
committeraspen <root@gws.fyi>2024-03-11T11·35+0000
commit54609e8c17e60c1a3feaea5430887a6cc6bdce0f (patch)
tree1ab4cad26f28e4656b0b461b8edce886e1b486d0
parentde727bccf99a1dcce2bb335e56af02f80e462dbc (diff)
feat(tvix/glue): Add AsyncRead wrapper to decompress streams r/7679
Add a new AsyncRead wrapper, DecompressedReader, that wraps an
underlying AsyncRead, but sniffs the magic bytes at the start of the
stream to determine which compression format is being used out of the
three that are supported by builtins.fetchTarball, and switches to the
correct decompression algorithm adapter dynamically.

This will be used in the implementation of builtins.fetchTarball

Change-Id: I892a4683d5c93e67d4c173f3d21199bdc6605922
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11019
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
-rw-r--r--tvix/Cargo.lock125
-rw-r--r--tvix/Cargo.nix424
-rw-r--r--tvix/glue/Cargo.toml7
-rw-r--r--tvix/glue/src/decompression.rs221
-rw-r--r--tvix/glue/src/lib.rs1
-rw-r--r--tvix/glue/src/tests/blob.tar.bz2bin0 -> 116 bytes
-rw-r--r--tvix/glue/src/tests/blob.tar.gzbin0 -> 116 bytes
-rw-r--r--tvix/glue/src/tests/blob.tar.xzbin0 -> 172 bytes
8 files changed, 777 insertions, 1 deletions
diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock
index 4eda0fd016..750bb8364e 100644
--- a/tvix/Cargo.lock
+++ b/tvix/Cargo.lock
@@ -105,6 +105,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
 
 [[package]]
+name = "async-compression"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a116f46a969224200a0a97f29cfd4c50e7534e4b4826bd23ea2c3c533039c82c"
+dependencies = [
+ "bzip2",
+ "flate2",
+ "futures-core",
+ "memchr",
+ "pin-project-lite",
+ "tokio",
+ "xz2",
+]
+
+[[package]]
 name = "async-recursion"
 version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -323,6 +338,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223"
 
 [[package]]
+name = "bzip2"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8"
+dependencies = [
+ "bzip2-sys",
+ "libc",
+]
+
+[[package]]
+name = "bzip2-sys"
+version = "0.1.11+1.0.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+]
+
+[[package]]
 name = "caps"
 version = "0.5.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -798,12 +834,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "27573eac26f4dd11e2b1916c3fe1baa56407c83c71a773a8ba17ec0bca03b6b7"
 
 [[package]]
+name = "filetime"
+version = "0.2.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall 0.4.1",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
 name = "fixedbitset"
 version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
 
 [[package]]
+name = "flate2"
+version = "1.0.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
+
+[[package]]
 name = "fnv"
 version = "1.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1413,6 +1471,28 @@ dependencies = [
 ]
 
 [[package]]
+name = "magic"
+version = "0.16.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a200ae03df8c3dce7a963f6eeaac8feb41bf9001cb7e5ab22e3205aec2f0373d"
+dependencies = [
+ "bitflags 2.4.2",
+ "libc",
+ "magic-sys",
+ "thiserror",
+]
+
+[[package]]
+name = "magic-sys"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eff86ae08895140d628119d407d568f3b657145ee8c265878064f717534bb3bc"
+dependencies = [
+ "libc",
+ "vcpkg",
+]
+
+[[package]]
 name = "matchit"
 version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2156,6 +2236,15 @@ dependencies = [
 
 [[package]]
 name = "redox_syscall"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
+dependencies = [
+ "bitflags 1.3.2",
+]
+
+[[package]]
+name = "redox_syscall"
 version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
@@ -3014,6 +3103,21 @@ dependencies = [
 ]
 
 [[package]]
+name = "tokio-tar"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d5714c010ca3e5c27114c1cdeb9d14641ace49874aa5626d7149e47aedace75"
+dependencies = [
+ "filetime",
+ "futures-core",
+ "libc",
+ "redox_syscall 0.3.5",
+ "tokio",
+ "tokio-stream",
+ "xattr",
+]
+
+[[package]]
 name = "tokio-util"
 version = "0.7.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -3413,6 +3517,7 @@ dependencies = [
 name = "tvix-glue"
 version = "0.1.0"
 dependencies = [
+ "async-compression",
  "async-recursion",
  "bstr",
  "bytes",
@@ -3421,8 +3526,10 @@ dependencies = [
  "futures",
  "hex-literal",
  "lazy_static",
+ "magic",
  "nix 0.27.1",
  "nix-compat",
+ "pin-project",
  "pretty_assertions",
  "reqwest",
  "rstest",
@@ -3433,6 +3540,7 @@ dependencies = [
  "test-case",
  "thiserror",
  "tokio",
+ "tokio-tar",
  "tokio-util",
  "tracing",
  "tvix-build",
@@ -3594,6 +3702,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
 
 [[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
+[[package]]
 name = "version_check"
 version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -3995,6 +4109,17 @@ version = "0.1.0"
 source = "git+https://github.com/tvlfyi/wu-manber.git#0d5b22bea136659f7de60b102a7030e0daaa503d"
 
 [[package]]
+name = "xattr"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f"
+dependencies = [
+ "libc",
+ "linux-raw-sys",
+ "rustix",
+]
+
+[[package]]
 name = "xml-rs"
 version = "0.8.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix
index bc01e1a76b..06a4b8a99c 100644
--- a/tvix/Cargo.nix
+++ b/tvix/Cargo.nix
@@ -386,6 +386,74 @@ rec {
           "zeroize" = [ "dep:zeroize" ];
         };
       };
+      "async-compression" = rec {
+        crateName = "async-compression";
+        version = "0.4.6";
+        edition = "2018";
+        sha256 = "0b6874q56g1cx8ivs9j89d757rsh9kyrrwlp1852094jjrmg85m1";
+        authors = [
+          "Wim Looman <wim@nemo157.com>"
+          "Allen Bui <fairingrey@gmail.com>"
+        ];
+        dependencies = [
+          {
+            name = "bzip2";
+            packageId = "bzip2";
+            optional = true;
+          }
+          {
+            name = "flate2";
+            packageId = "flate2";
+            optional = true;
+          }
+          {
+            name = "futures-core";
+            packageId = "futures-core";
+            usesDefaultFeatures = false;
+          }
+          {
+            name = "memchr";
+            packageId = "memchr";
+          }
+          {
+            name = "pin-project-lite";
+            packageId = "pin-project-lite";
+          }
+          {
+            name = "tokio";
+            packageId = "tokio";
+            optional = true;
+            usesDefaultFeatures = false;
+          }
+          {
+            name = "xz2";
+            packageId = "xz2";
+            optional = true;
+          }
+        ];
+        features = {
+          "all" = [ "all-implementations" "all-algorithms" ];
+          "all-algorithms" = [ "brotli" "bzip2" "deflate" "gzip" "lzma" "xz" "zlib" "zstd" "deflate64" ];
+          "all-implementations" = [ "futures-io" "tokio" ];
+          "brotli" = [ "dep:brotli" ];
+          "bzip2" = [ "dep:bzip2" ];
+          "deflate" = [ "flate2" ];
+          "deflate64" = [ "dep:deflate64" ];
+          "flate2" = [ "dep:flate2" ];
+          "futures-io" = [ "dep:futures-io" ];
+          "gzip" = [ "flate2" ];
+          "libzstd" = [ "dep:libzstd" ];
+          "lzma" = [ "xz2" ];
+          "tokio" = [ "dep:tokio" ];
+          "xz" = [ "xz2" ];
+          "xz2" = [ "dep:xz2" ];
+          "zlib" = [ "flate2" ];
+          "zstd" = [ "libzstd" "zstd-safe" ];
+          "zstd-safe" = [ "dep:zstd-safe" ];
+          "zstdmt" = [ "zstd" "zstd-safe/zstdmt" ];
+        };
+        resolvedDefaultFeatures = [ "bzip2" "flate2" "gzip" "tokio" "xz" "xz2" ];
+      };
       "async-recursion" = rec {
         crateName = "async-recursion";
         version = "1.0.5";
@@ -1045,6 +1113,60 @@ rec {
         };
         resolvedDefaultFeatures = [ "default" "std" ];
       };
+      "bzip2" = rec {
+        crateName = "bzip2";
+        version = "0.4.4";
+        edition = "2015";
+        sha256 = "1y27wgqkx3k2jmh4k26vra2kqjq1qc1asww8hac3cv1zxyk1dcdx";
+        authors = [
+          "Alex Crichton <alex@alexcrichton.com>"
+        ];
+        dependencies = [
+          {
+            name = "bzip2-sys";
+            packageId = "bzip2-sys";
+          }
+          {
+            name = "libc";
+            packageId = "libc";
+          }
+        ];
+        features = {
+          "futures" = [ "dep:futures" ];
+          "static" = [ "bzip2-sys/static" ];
+          "tokio" = [ "tokio-io" "futures" ];
+          "tokio-io" = [ "dep:tokio-io" ];
+        };
+      };
+      "bzip2-sys" = rec {
+        crateName = "bzip2-sys";
+        version = "0.1.11+1.0.8";
+        edition = "2015";
+        links = "bzip2";
+        sha256 = "1p2crnv8d8gpz5c2vlvzl0j55i3yqg5bi0kwsl1531x77xgraskk";
+        libName = "bzip2_sys";
+        libPath = "lib.rs";
+        authors = [
+          "Alex Crichton <alex@alexcrichton.com>"
+        ];
+        dependencies = [
+          {
+            name = "libc";
+            packageId = "libc";
+          }
+        ];
+        buildDependencies = [
+          {
+            name = "cc";
+            packageId = "cc";
+          }
+          {
+            name = "pkg-config";
+            packageId = "pkg-config";
+          }
+        ];
+        features = { };
+      };
       "caps" = rec {
         crateName = "caps";
         version = "0.5.5";
@@ -2339,6 +2461,38 @@ rec {
           "default" = [ "std" ];
         };
       };
+      "filetime" = rec {
+        crateName = "filetime";
+        version = "0.2.23";
+        edition = "2018";
+        sha256 = "1za0sbq7fqidk8aaq9v7m9ms0sv8mmi49g6p5cphpan819q4gr0y";
+        authors = [
+          "Alex Crichton <alex@alexcrichton.com>"
+        ];
+        dependencies = [
+          {
+            name = "cfg-if";
+            packageId = "cfg-if";
+          }
+          {
+            name = "libc";
+            packageId = "libc";
+            target = { target, features }: (target."unix" or false);
+          }
+          {
+            name = "redox_syscall";
+            packageId = "redox_syscall 0.4.1";
+            target = { target, features }: ("redox" == target."os" or null);
+          }
+          {
+            name = "windows-sys";
+            packageId = "windows-sys 0.52.0";
+            target = { target, features }: (target."windows" or false);
+            features = [ "Win32_Foundation" "Win32_Storage_FileSystem" ];
+          }
+        ];
+
+      };
       "fixedbitset" = rec {
         crateName = "fixedbitset";
         version = "0.4.2";
@@ -2352,6 +2506,52 @@ rec {
           "serde" = [ "dep:serde" ];
         };
       };
+      "flate2" = rec {
+        crateName = "flate2";
+        version = "1.0.28";
+        edition = "2018";
+        sha256 = "03llhsh4gqdirnfxxb9g2w9n0721dyn4yjir3pz7z4vjaxb3yc26";
+        authors = [
+          "Alex Crichton <alex@alexcrichton.com>"
+          "Josh Triplett <josh@joshtriplett.org>"
+        ];
+        dependencies = [
+          {
+            name = "crc32fast";
+            packageId = "crc32fast";
+          }
+          {
+            name = "miniz_oxide";
+            packageId = "miniz_oxide";
+            optional = true;
+            usesDefaultFeatures = false;
+            features = [ "with-alloc" ];
+          }
+          {
+            name = "miniz_oxide";
+            packageId = "miniz_oxide";
+            usesDefaultFeatures = false;
+            target = { target, features }: (("wasm32" == target."arch" or null) && (!("emscripten" == target."os" or null)));
+            features = [ "with-alloc" ];
+          }
+        ];
+        features = {
+          "any_zlib" = [ "any_impl" ];
+          "cloudflare-zlib-sys" = [ "dep:cloudflare-zlib-sys" ];
+          "cloudflare_zlib" = [ "any_zlib" "cloudflare-zlib-sys" ];
+          "default" = [ "rust_backend" ];
+          "libz-ng-sys" = [ "dep:libz-ng-sys" ];
+          "libz-sys" = [ "dep:libz-sys" ];
+          "miniz-sys" = [ "rust_backend" ];
+          "miniz_oxide" = [ "dep:miniz_oxide" ];
+          "rust_backend" = [ "miniz_oxide" "any_impl" ];
+          "zlib" = [ "any_zlib" "libz-sys" ];
+          "zlib-default" = [ "any_zlib" "libz-sys/default" ];
+          "zlib-ng" = [ "any_zlib" "libz-ng-sys" ];
+          "zlib-ng-compat" = [ "zlib" "libz-sys/zlib-ng" ];
+        };
+        resolvedDefaultFeatures = [ "any_impl" "default" "miniz_oxide" "rust_backend" ];
+      };
       "fnv" = rec {
         crateName = "fnv";
         version = "1.0.7";
@@ -4100,7 +4300,7 @@ rec {
           "default" = [ "std" "general" "errno" ];
           "rustc-dep-of-std" = [ "core" "compiler_builtins" "no_std" ];
         };
-        resolvedDefaultFeatures = [ "elf" "errno" "general" "ioctl" "no_std" ];
+        resolvedDefaultFeatures = [ "elf" "errno" "general" "ioctl" "no_std" "std" ];
       };
       "litrs" = rec {
         crateName = "litrs";
@@ -4192,6 +4392,80 @@ rec {
         ];
         features = { };
       };
+      "magic" = rec {
+        crateName = "magic";
+        version = "0.16.2";
+        edition = "2018";
+        sha256 = "0g9py31aw19j5sr5lznb068byhgbiynflvizjrxcwgccvw1sw052";
+        authors = [
+          "Daniel Micay <danielmicay@gmail.com>"
+          "Petar Radošević <petar@wunki.org>"
+          "lilydjwg <lilydjwg@gmail.com>"
+          "Jeff Belgum <belgum@bastille.io>"
+          "Onur Aslan <onur@onur.im>"
+          "robo9k <robo9k@symlink.io>"
+        ];
+        dependencies = [
+          {
+            name = "bitflags";
+            packageId = "bitflags 2.4.2";
+          }
+          {
+            name = "libc";
+            packageId = "libc";
+            usesDefaultFeatures = false;
+          }
+          {
+            name = "magic-sys";
+            packageId = "magic-sys";
+          }
+          {
+            name = "thiserror";
+            packageId = "thiserror";
+          }
+        ];
+
+      };
+      "magic-sys" = rec {
+        crateName = "magic-sys";
+        version = "0.3.0";
+        edition = "2015";
+        links = "magic";
+        sha256 = "1g5k9d9igxv4h23nbhp8bqa5gdpkd3ahgm0rh5i0s54mi3h6my7g";
+        authors = [
+          "robo9k <robo9k@symlink.io>"
+        ];
+        dependencies = [
+          {
+            name = "libc";
+            packageId = "libc";
+            usesDefaultFeatures = false;
+          }
+        ];
+        buildDependencies = [
+          {
+            name = "vcpkg";
+            packageId = "vcpkg";
+          }
+        ];
+        features = {
+          "default" = [ "v5-38" ];
+          "v5-05" = [ "v5-04" ];
+          "v5-10" = [ "v5-05" ];
+          "v5-13" = [ "v5-10" ];
+          "v5-20" = [ "v5-13" ];
+          "v5-21" = [ "v5-20" ];
+          "v5-22" = [ "v5-21" ];
+          "v5-23" = [ "v5-22" ];
+          "v5-25" = [ "v5-23" ];
+          "v5-27" = [ "v5-25" ];
+          "v5-32" = [ "v5-27" ];
+          "v5-35" = [ "v5-32" ];
+          "v5-38" = [ "v5-35" ];
+          "v5-40" = [ "v5-38" ];
+        };
+        resolvedDefaultFeatures = [ "default" "v5-04" "v5-05" "v5-10" "v5-13" "v5-20" "v5-21" "v5-22" "v5-23" "v5-25" "v5-27" "v5-32" "v5-35" "v5-38" ];
+      };
       "matchit" = rec {
         crateName = "matchit";
         version = "0.7.3";
@@ -4340,6 +4614,7 @@ rec {
           "simd" = [ "simd-adler32" ];
           "simd-adler32" = [ "dep:simd-adler32" ];
         };
+        resolvedDefaultFeatures = [ "with-alloc" ];
       };
       "mio" = rec {
         crateName = "mio";
@@ -6410,6 +6685,26 @@ rec {
         ];
 
       };
+      "redox_syscall 0.3.5" = rec {
+        crateName = "redox_syscall";
+        version = "0.3.5";
+        edition = "2018";
+        sha256 = "0acgiy2lc1m2vr8cr33l5s7k9wzby8dybyab1a9p753hcbr68xjn";
+        libName = "syscall";
+        authors = [
+          "Jeremy Soller <jackpot51@gmail.com>"
+        ];
+        dependencies = [
+          {
+            name = "bitflags";
+            packageId = "bitflags 1.3.2";
+          }
+        ];
+        features = {
+          "core" = [ "dep:core" ];
+          "rustc-dep-of-std" = [ "core" "bitflags/rustc-dep-of-std" ];
+        };
+      };
       "redox_syscall 0.4.1" = rec {
         crateName = "redox_syscall";
         version = "0.4.1";
@@ -9139,6 +9434,65 @@ rec {
         };
         resolvedDefaultFeatures = [ "default" "fs" "net" "time" ];
       };
+      "tokio-tar" = rec {
+        crateName = "tokio-tar";
+        version = "0.3.1";
+        edition = "2018";
+        sha256 = "0xffvap4g7hlswk5daklk3jaqha6s6wxw72c24kmqgna23018mwx";
+        authors = [
+          "Alex Crichton <alex@alexcrichton.com>"
+          "dignifiedquire <me@dignifiequire.com>"
+          "Artem Vorotnikov <artem@vorotnikov.me>"
+          "Aiden McClelland <me@drbonez.dev>"
+        ];
+        dependencies = [
+          {
+            name = "filetime";
+            packageId = "filetime";
+          }
+          {
+            name = "futures-core";
+            packageId = "futures-core";
+          }
+          {
+            name = "libc";
+            packageId = "libc";
+            target = { target, features }: (target."unix" or false);
+          }
+          {
+            name = "redox_syscall";
+            packageId = "redox_syscall 0.3.5";
+            target = { target, features }: ("redox" == target."os" or null);
+          }
+          {
+            name = "tokio";
+            packageId = "tokio";
+            features = [ "fs" "io-util" "rt" ];
+          }
+          {
+            name = "tokio-stream";
+            packageId = "tokio-stream";
+          }
+          {
+            name = "xattr";
+            packageId = "xattr";
+            optional = true;
+            target = { target, features }: (target."unix" or false);
+          }
+        ];
+        devDependencies = [
+          {
+            name = "tokio";
+            packageId = "tokio";
+            features = [ "full" ];
+          }
+        ];
+        features = {
+          "default" = [ "xattr" ];
+          "xattr" = [ "dep:xattr" ];
+        };
+        resolvedDefaultFeatures = [ "default" "xattr" ];
+      };
       "tokio-util" = rec {
         crateName = "tokio-util";
         version = "0.7.10";
@@ -10769,6 +11123,11 @@ rec {
           else ./glue;
         dependencies = [
           {
+            name = "async-compression";
+            packageId = "async-compression";
+            features = [ "tokio" "gzip" "bzip2" "xz" ];
+          }
+          {
             name = "async-recursion";
             packageId = "async-recursion";
           }
@@ -10789,10 +11148,18 @@ rec {
             packageId = "futures";
           }
           {
+            name = "magic";
+            packageId = "magic";
+          }
+          {
             name = "nix-compat";
             packageId = "nix-compat";
           }
           {
+            name = "pin-project";
+            packageId = "pin-project";
+          }
+          {
             name = "reqwest";
             packageId = "reqwest";
             usesDefaultFeatures = false;
@@ -10819,6 +11186,10 @@ rec {
             packageId = "tokio";
           }
           {
+            name = "tokio-tar";
+            packageId = "tokio-tar";
+          }
+          {
             name = "tokio-util";
             packageId = "tokio-util";
             features = [ "io" "io-util" "compat" ];
@@ -11358,6 +11729,16 @@ rec {
         };
         resolvedDefaultFeatures = [ "alloc" "std" ];
       };
+      "vcpkg" = rec {
+        crateName = "vcpkg";
+        version = "0.2.15";
+        edition = "2015";
+        sha256 = "09i4nf5y8lig6xgj3f7fyrvzd3nlaw4znrihw8psidvv5yk4xkdc";
+        authors = [
+          "Jim McGrath <jimmc2@gmail.com>"
+        ];
+
+      };
       "version_check" = rec {
         crateName = "version_check";
         version = "0.9.4";
@@ -13269,6 +13650,47 @@ rec {
         ];
 
       };
+      "xattr" = rec {
+        crateName = "xattr";
+        version = "1.3.1";
+        edition = "2021";
+        sha256 = "0kqxm36w89vc6qcpn6pizlhgjgzq138sx4hdhbv2g6wk4ld4za4d";
+        authors = [
+          "Steven Allen <steven@stebalien.com>"
+        ];
+        dependencies = [
+          {
+            name = "libc";
+            packageId = "libc";
+            target = { target, features }: (("freebsd" == target."os" or null) || ("netbsd" == target."os" or null));
+          }
+          {
+            name = "linux-raw-sys";
+            packageId = "linux-raw-sys";
+            usesDefaultFeatures = false;
+            target = { target, features }: ("linux" == target."os" or null);
+            features = [ "std" ];
+          }
+          {
+            name = "rustix";
+            packageId = "rustix";
+            usesDefaultFeatures = false;
+            features = [ "fs" "std" ];
+          }
+        ];
+        devDependencies = [
+          {
+            name = "rustix";
+            packageId = "rustix";
+            usesDefaultFeatures = false;
+            features = [ "net" ];
+          }
+        ];
+        features = {
+          "default" = [ "unsupported" ];
+        };
+        resolvedDefaultFeatures = [ "default" "unsupported" ];
+      };
       "xml-rs" = rec {
         crateName = "xml-rs";
         version = "0.8.19";
diff --git a/tvix/glue/Cargo.toml b/tvix/glue/Cargo.toml
index f4ebfe4906..1ec3832d91 100644
--- a/tvix/glue/Cargo.toml
+++ b/tvix/glue/Cargo.toml
@@ -9,7 +9,9 @@ bstr = "1.6.0"
 bytes = "1.4.0"
 data-encoding = "2.3.3"
 futures = "0.3.30"
+magic = "0.16.2"
 nix-compat = { path = "../nix-compat" }
+pin-project = "1.1"
 reqwest = { version = "0.11.22", features = ["rustls-tls-native-roots"], default-features = false }
 tvix-build = { path = "../build", default-features = false, features = []}
 tvix-eval = { path = "../eval" }
@@ -17,6 +19,7 @@ tvix-castore = { path = "../castore" }
 tvix-store = { path = "../store", default-features = false, features = []}
 tracing = "0.1.37"
 tokio = "1.28.0"
+tokio-tar = "0.3.1"
 tokio-util = { version = "0.7.9", features = ["io", "io-util", "compat"] }
 thiserror = "1.0.38"
 serde = "1.0.195"
@@ -24,6 +27,10 @@ serde_json = "1.0"
 sha2 = "0.10.8"
 walkdir = "2.4.0"
 
+[dependencies.async-compression]
+version = "0.4.6"
+features = ["tokio", "gzip", "bzip2", "xz"]
+
 [dependencies.wu-manber]
 git = "https://github.com/tvlfyi/wu-manber.git"
 
diff --git a/tvix/glue/src/decompression.rs b/tvix/glue/src/decompression.rs
new file mode 100644
index 0000000000..7e526932e7
--- /dev/null
+++ b/tvix/glue/src/decompression.rs
@@ -0,0 +1,221 @@
+#![allow(dead_code)] // TODO
+
+use std::{
+    io, mem,
+    pin::Pin,
+    task::{Context, Poll},
+};
+
+use async_compression::tokio::bufread::{BzDecoder, GzipDecoder, XzDecoder};
+use futures::ready;
+use pin_project::pin_project;
+use tokio::io::{AsyncBufRead, AsyncRead, BufReader, ReadBuf};
+
+const GZIP_MAGIC: [u8; 2] = [0x1f, 0x8b];
+const BZIP2_MAGIC: [u8; 3] = *b"BZh";
+const XZ_MAGIC: [u8; 6] = [0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00];
+const BYTES_NEEDED: usize = 6;
+
+#[derive(Debug, Clone, Copy)]
+enum Algorithm {
+    Gzip,
+    Bzip2,
+    Xz,
+}
+
+impl Algorithm {
+    fn from_magic(magic: &[u8]) -> Option<Self> {
+        if magic.starts_with(&GZIP_MAGIC) {
+            Some(Self::Gzip)
+        } else if magic.starts_with(&BZIP2_MAGIC) {
+            Some(Self::Bzip2)
+        } else if magic.starts_with(&XZ_MAGIC) {
+            Some(Self::Xz)
+        } else {
+            None
+        }
+    }
+}
+
+#[pin_project]
+struct WithPreexistingBuffer<R> {
+    buffer: Vec<u8>,
+    #[pin]
+    inner: R,
+}
+
+impl<R> AsyncRead for WithPreexistingBuffer<R>
+where
+    R: AsyncRead,
+{
+    fn poll_read(
+        self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: &mut ReadBuf<'_>,
+    ) -> Poll<io::Result<()>> {
+        let this = self.project();
+        if !this.buffer.is_empty() {
+            // TODO: check if the buffer fits first
+            buf.put_slice(this.buffer);
+            this.buffer.clear();
+        }
+        this.inner.poll_read(cx, buf)
+    }
+}
+
+#[pin_project(project = DecompressedReaderInnerProj)]
+enum DecompressedReaderInner<R> {
+    Unknown {
+        buffer: Vec<u8>,
+        #[pin]
+        inner: Option<R>,
+    },
+    Gzip(#[pin] GzipDecoder<BufReader<WithPreexistingBuffer<R>>>),
+    Bzip2(#[pin] BzDecoder<BufReader<WithPreexistingBuffer<R>>>),
+    Xz(#[pin] XzDecoder<BufReader<WithPreexistingBuffer<R>>>),
+}
+
+impl<R> DecompressedReaderInner<R>
+where
+    R: AsyncBufRead,
+{
+    fn switch_to(&mut self, algorithm: Algorithm) {
+        let (buffer, inner) = match self {
+            DecompressedReaderInner::Unknown { buffer, inner } => {
+                (mem::take(buffer), inner.take().unwrap())
+            }
+            DecompressedReaderInner::Gzip(_)
+            | DecompressedReaderInner::Bzip2(_)
+            | DecompressedReaderInner::Xz(_) => unreachable!(),
+        };
+        let inner = BufReader::new(WithPreexistingBuffer { buffer, inner });
+
+        *self = match algorithm {
+            Algorithm::Gzip => Self::Gzip(GzipDecoder::new(inner)),
+            Algorithm::Bzip2 => Self::Bzip2(BzDecoder::new(inner)),
+            Algorithm::Xz => Self::Xz(XzDecoder::new(inner)),
+        }
+    }
+}
+
+impl<R> AsyncRead for DecompressedReaderInner<R>
+where
+    R: AsyncBufRead,
+{
+    fn poll_read(
+        self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: &mut ReadBuf<'_>,
+    ) -> Poll<io::Result<()>> {
+        match self.project() {
+            DecompressedReaderInnerProj::Unknown { .. } => {
+                unreachable!("Can't call poll_read on Unknown")
+            }
+            DecompressedReaderInnerProj::Gzip(inner) => inner.poll_read(cx, buf),
+            DecompressedReaderInnerProj::Bzip2(inner) => inner.poll_read(cx, buf),
+            DecompressedReaderInnerProj::Xz(inner) => inner.poll_read(cx, buf),
+        }
+    }
+}
+
+#[pin_project]
+pub struct DecompressedReader<R> {
+    #[pin]
+    inner: DecompressedReaderInner<R>,
+    switch_to: Option<Algorithm>,
+}
+
+impl<R> DecompressedReader<R> {
+    pub fn new(inner: R) -> Self {
+        Self {
+            inner: DecompressedReaderInner::Unknown {
+                buffer: vec![0; BYTES_NEEDED],
+                inner: Some(inner),
+            },
+            switch_to: None,
+        }
+    }
+}
+
+impl<R> AsyncRead for DecompressedReader<R>
+where
+    R: AsyncBufRead + Unpin,
+{
+    fn poll_read(
+        self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: &mut ReadBuf<'_>,
+    ) -> Poll<io::Result<()>> {
+        let mut this = self.project();
+        let (buffer, inner) = match this.inner.as_mut().project() {
+            DecompressedReaderInnerProj::Gzip(inner) => return inner.poll_read(cx, buf),
+            DecompressedReaderInnerProj::Bzip2(inner) => return inner.poll_read(cx, buf),
+            DecompressedReaderInnerProj::Xz(inner) => return inner.poll_read(cx, buf),
+            DecompressedReaderInnerProj::Unknown { buffer, inner } => (buffer, inner),
+        };
+
+        let mut our_buf = ReadBuf::new(buffer);
+        if let Err(e) = ready!(inner.as_pin_mut().unwrap().poll_read(cx, &mut our_buf)) {
+            return Poll::Ready(Err(e));
+        }
+
+        let data = our_buf.filled();
+        if data.len() >= BYTES_NEEDED {
+            if let Some(algorithm) = Algorithm::from_magic(data) {
+                this.inner.as_mut().switch_to(algorithm);
+            } else {
+                return Poll::Ready(Err(io::Error::new(
+                    io::ErrorKind::InvalidData,
+                    "tar data not gz, bzip2, or xz compressed",
+                )));
+            }
+            this.inner.poll_read(cx, buf)
+        } else {
+            cx.waker().wake_by_ref();
+            Poll::Pending
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::path::Path;
+
+    use async_compression::tokio::bufread::GzipEncoder;
+    use futures::TryStreamExt;
+    use test_case::test_case;
+    use tokio::io::{AsyncReadExt, BufReader};
+    use tokio_tar::Archive;
+
+    use super::*;
+
+    #[tokio::test]
+    async fn gzip() {
+        let data = b"abcdefghijk";
+        let mut enc = GzipEncoder::new(&data[..]);
+        let mut gzipped = vec![];
+        enc.read_to_end(&mut gzipped).await.unwrap();
+
+        let mut reader = DecompressedReader::new(BufReader::new(&gzipped[..]));
+        let mut round_tripped = vec![];
+        reader.read_to_end(&mut round_tripped).await.unwrap();
+
+        assert_eq!(data[..], round_tripped[..]);
+    }
+
+    #[test_case(include_bytes!("tests/blob.tar.gz"); "gzip")]
+    #[test_case(include_bytes!("tests/blob.tar.bz2"); "bzip2")]
+    #[test_case(include_bytes!("tests/blob.tar.xz"); "xz")]
+    #[tokio::test]
+    async fn compressed_tar(data: &[u8]) {
+        let reader = DecompressedReader::new(BufReader::new(data));
+        let mut archive = Archive::new(reader);
+        let mut entries: Vec<_> = archive.entries().unwrap().try_collect().await.unwrap();
+
+        assert_eq!(entries.len(), 1);
+        assert_eq!(entries[0].path().unwrap().as_ref(), Path::new("empty"));
+        let mut data = String::new();
+        entries[0].read_to_string(&mut data).await.unwrap();
+        assert_eq!(data, "");
+    }
+}
diff --git a/tvix/glue/src/lib.rs b/tvix/glue/src/lib.rs
index b2f586ce52..f04d5ec3a0 100644
--- a/tvix/glue/src/lib.rs
+++ b/tvix/glue/src/lib.rs
@@ -5,6 +5,7 @@ pub mod tvix_build;
 pub mod tvix_io;
 pub mod tvix_store_io;
 
+mod decompression;
 #[cfg(test)]
 mod tests;
 
diff --git a/tvix/glue/src/tests/blob.tar.bz2 b/tvix/glue/src/tests/blob.tar.bz2
new file mode 100644
index 0000000000..d74b913912
--- /dev/null
+++ b/tvix/glue/src/tests/blob.tar.bz2
Binary files differdiff --git a/tvix/glue/src/tests/blob.tar.gz b/tvix/glue/src/tests/blob.tar.gz
new file mode 100644
index 0000000000..c2bae55078
--- /dev/null
+++ b/tvix/glue/src/tests/blob.tar.gz
Binary files differdiff --git a/tvix/glue/src/tests/blob.tar.xz b/tvix/glue/src/tests/blob.tar.xz
new file mode 100644
index 0000000000..324a99d895
--- /dev/null
+++ b/tvix/glue/src/tests/blob.tar.xz
Binary files differ