diff options
author | Aspen Smith <root@gws.fyi> | 2024-02-23T19·42-0500 |
---|---|---|
committer | aspen <root@gws.fyi> | 2024-03-11T11·35+0000 |
commit | 54609e8c17e60c1a3feaea5430887a6cc6bdce0f (patch) | |
tree | 1ab4cad26f28e4656b0b461b8edce886e1b486d0 /tvix | |
parent | de727bccf99a1dcce2bb335e56af02f80e462dbc (diff) |
feat(tvix/glue): Add AsyncRead wrapper to decompress streams r/7679
Add a new AsyncRead wrapper, DecompressedReader, that wraps an underlying AsyncRead, but sniffs the magic bytes at the start of the stream to determine which compression format is being used out of the three that are supported by builtins.fetchTarball, and switches to the correct decompression algorithm adapter dynamically. This will be used in the implementation of builtins.fetchTarball Change-Id: I892a4683d5c93e67d4c173f3d21199bdc6605922 Reviewed-on: https://cl.tvl.fyi/c/depot/+/11019 Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
Diffstat (limited to 'tvix')
-rw-r--r-- | tvix/Cargo.lock | 125 | ||||
-rw-r--r-- | tvix/Cargo.nix | 424 | ||||
-rw-r--r-- | tvix/glue/Cargo.toml | 7 | ||||
-rw-r--r-- | tvix/glue/src/decompression.rs | 221 | ||||
-rw-r--r-- | tvix/glue/src/lib.rs | 1 | ||||
-rw-r--r-- | tvix/glue/src/tests/blob.tar.bz2 | bin | 0 -> 116 bytes | |||
-rw-r--r-- | tvix/glue/src/tests/blob.tar.gz | bin | 0 -> 116 bytes | |||
-rw-r--r-- | tvix/glue/src/tests/blob.tar.xz | bin | 0 -> 172 bytes |
8 files changed, 777 insertions, 1 deletions
diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index 4eda0fd0166e..750bb8364ef2 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -105,6 +105,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] +name = "async-compression" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a116f46a969224200a0a97f29cfd4c50e7534e4b4826bd23ea2c3c533039c82c" +dependencies = [ + "bzip2", + "flate2", + "futures-core", + "memchr", + "pin-project-lite", + "tokio", + "xz2", +] + +[[package]] name = "async-recursion" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -323,6 +338,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" [[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] name = "caps" version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -798,12 +834,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27573eac26f4dd11e2b1916c3fe1baa56407c83c71a773a8ba17ec0bca03b6b7" [[package]] +name = "filetime" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.4.1", + "windows-sys 0.52.0", +] + +[[package]] name = "fixedbitset" version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] +name = "flate2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] name = "fnv" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1413,6 +1471,28 @@ dependencies = [ ] [[package]] +name = "magic" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a200ae03df8c3dce7a963f6eeaac8feb41bf9001cb7e5ab22e3205aec2f0373d" +dependencies = [ + "bitflags 2.4.2", + "libc", + "magic-sys", + "thiserror", +] + +[[package]] +name = "magic-sys" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eff86ae08895140d628119d407d568f3b657145ee8c265878064f717534bb3bc" +dependencies = [ + "libc", + "vcpkg", +] + +[[package]] name = "matchit" version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -2156,6 +2236,15 @@ dependencies = [ [[package]] name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "redox_syscall" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" @@ -3014,6 +3103,21 @@ dependencies = [ ] [[package]] +name = "tokio-tar" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d5714c010ca3e5c27114c1cdeb9d14641ace49874aa5626d7149e47aedace75" +dependencies = [ + "filetime", + "futures-core", + "libc", + "redox_syscall 0.3.5", + "tokio", + "tokio-stream", + "xattr", +] + +[[package]] name = "tokio-util" version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -3413,6 +3517,7 @@ dependencies = [ name = "tvix-glue" version = "0.1.0" dependencies = [ + "async-compression", "async-recursion", "bstr", "bytes", @@ -3421,8 +3526,10 @@ dependencies = [ "futures", "hex-literal", "lazy_static", + "magic", "nix 0.27.1", "nix-compat", + "pin-project", "pretty_assertions", "reqwest", "rstest", @@ -3433,6 +3540,7 @@ dependencies = [ "test-case", "thiserror", "tokio", + "tokio-tar", "tokio-util", "tracing", "tvix-build", @@ -3594,6 +3702,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" [[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -3995,6 +4109,17 @@ version = "0.1.0" source = "git+https://github.com/tvlfyi/wu-manber.git#0d5b22bea136659f7de60b102a7030e0daaa503d" [[package]] +name = "xattr" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" +dependencies = [ + "libc", + "linux-raw-sys", + "rustix", +] + +[[package]] name = "xml-rs" version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index bc01e1a76b3b..06a4b8a99c73 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -386,6 +386,74 @@ rec { "zeroize" = [ "dep:zeroize" ]; }; }; + "async-compression" = rec { + crateName = "async-compression"; + version = "0.4.6"; + edition = "2018"; + sha256 = "0b6874q56g1cx8ivs9j89d757rsh9kyrrwlp1852094jjrmg85m1"; + authors = [ + "Wim Looman <wim@nemo157.com>" + "Allen Bui <fairingrey@gmail.com>" + ]; + dependencies = [ + { + name = "bzip2"; + packageId = "bzip2"; + optional = true; + } + { + name = "flate2"; + packageId = "flate2"; + optional = true; + } + { + name = "futures-core"; + packageId = "futures-core"; + usesDefaultFeatures = false; + } + { + name = "memchr"; + packageId = "memchr"; + } + { + name = "pin-project-lite"; + packageId = "pin-project-lite"; + } + { + name = "tokio"; + packageId = "tokio"; + optional = true; + usesDefaultFeatures = false; + } + { + name = "xz2"; + packageId = "xz2"; + optional = true; + } + ]; + features = { + "all" = [ "all-implementations" "all-algorithms" ]; + "all-algorithms" = [ "brotli" "bzip2" "deflate" "gzip" "lzma" "xz" "zlib" "zstd" "deflate64" ]; + "all-implementations" = [ "futures-io" "tokio" ]; + "brotli" = [ "dep:brotli" ]; + "bzip2" = [ "dep:bzip2" ]; + "deflate" = [ "flate2" ]; + "deflate64" = [ "dep:deflate64" ]; + "flate2" = [ "dep:flate2" ]; + "futures-io" = [ "dep:futures-io" ]; + "gzip" = [ "flate2" ]; + "libzstd" = [ "dep:libzstd" ]; + "lzma" = [ "xz2" ]; + "tokio" = [ "dep:tokio" ]; + "xz" = [ "xz2" ]; + "xz2" = [ "dep:xz2" ]; + "zlib" = [ "flate2" ]; + "zstd" = [ "libzstd" "zstd-safe" ]; + "zstd-safe" = [ "dep:zstd-safe" ]; + "zstdmt" = [ "zstd" "zstd-safe/zstdmt" ]; + }; + resolvedDefaultFeatures = [ "bzip2" "flate2" "gzip" "tokio" "xz" "xz2" ]; + }; "async-recursion" = rec { crateName = "async-recursion"; version = "1.0.5"; @@ -1045,6 +1113,60 @@ rec { }; resolvedDefaultFeatures = [ "default" "std" ]; }; + "bzip2" = rec { + crateName = "bzip2"; + version = "0.4.4"; + edition = "2015"; + sha256 = "1y27wgqkx3k2jmh4k26vra2kqjq1qc1asww8hac3cv1zxyk1dcdx"; + authors = [ + "Alex Crichton <alex@alexcrichton.com>" + ]; + dependencies = [ + { + name = "bzip2-sys"; + packageId = "bzip2-sys"; + } + { + name = "libc"; + packageId = "libc"; + } + ]; + features = { + "futures" = [ "dep:futures" ]; + "static" = [ "bzip2-sys/static" ]; + "tokio" = [ "tokio-io" "futures" ]; + "tokio-io" = [ "dep:tokio-io" ]; + }; + }; + "bzip2-sys" = rec { + crateName = "bzip2-sys"; + version = "0.1.11+1.0.8"; + edition = "2015"; + links = "bzip2"; + sha256 = "1p2crnv8d8gpz5c2vlvzl0j55i3yqg5bi0kwsl1531x77xgraskk"; + libName = "bzip2_sys"; + libPath = "lib.rs"; + authors = [ + "Alex Crichton <alex@alexcrichton.com>" + ]; + dependencies = [ + { + name = "libc"; + packageId = "libc"; + } + ]; + buildDependencies = [ + { + name = "cc"; + packageId = "cc"; + } + { + name = "pkg-config"; + packageId = "pkg-config"; + } + ]; + features = { }; + }; "caps" = rec { crateName = "caps"; version = "0.5.5"; @@ -2339,6 +2461,38 @@ rec { "default" = [ "std" ]; }; }; + "filetime" = rec { + crateName = "filetime"; + version = "0.2.23"; + edition = "2018"; + sha256 = "1za0sbq7fqidk8aaq9v7m9ms0sv8mmi49g6p5cphpan819q4gr0y"; + authors = [ + "Alex Crichton <alex@alexcrichton.com>" + ]; + dependencies = [ + { + name = "cfg-if"; + packageId = "cfg-if"; + } + { + name = "libc"; + packageId = "libc"; + target = { target, features }: (target."unix" or false); + } + { + name = "redox_syscall"; + packageId = "redox_syscall 0.4.1"; + target = { target, features }: ("redox" == target."os" or null); + } + { + name = "windows-sys"; + packageId = "windows-sys 0.52.0"; + target = { target, features }: (target."windows" or false); + features = [ "Win32_Foundation" "Win32_Storage_FileSystem" ]; + } + ]; + + }; "fixedbitset" = rec { crateName = "fixedbitset"; version = "0.4.2"; @@ -2352,6 +2506,52 @@ rec { "serde" = [ "dep:serde" ]; }; }; + "flate2" = rec { + crateName = "flate2"; + version = "1.0.28"; + edition = "2018"; + sha256 = "03llhsh4gqdirnfxxb9g2w9n0721dyn4yjir3pz7z4vjaxb3yc26"; + authors = [ + "Alex Crichton <alex@alexcrichton.com>" + "Josh Triplett <josh@joshtriplett.org>" + ]; + dependencies = [ + { + name = "crc32fast"; + packageId = "crc32fast"; + } + { + name = "miniz_oxide"; + packageId = "miniz_oxide"; + optional = true; + usesDefaultFeatures = false; + features = [ "with-alloc" ]; + } + { + name = "miniz_oxide"; + packageId = "miniz_oxide"; + usesDefaultFeatures = false; + target = { target, features }: (("wasm32" == target."arch" or null) && (!("emscripten" == target."os" or null))); + features = [ "with-alloc" ]; + } + ]; + features = { + "any_zlib" = [ "any_impl" ]; + "cloudflare-zlib-sys" = [ "dep:cloudflare-zlib-sys" ]; + "cloudflare_zlib" = [ "any_zlib" "cloudflare-zlib-sys" ]; + "default" = [ "rust_backend" ]; + "libz-ng-sys" = [ "dep:libz-ng-sys" ]; + "libz-sys" = [ "dep:libz-sys" ]; + "miniz-sys" = [ "rust_backend" ]; + "miniz_oxide" = [ "dep:miniz_oxide" ]; + "rust_backend" = [ "miniz_oxide" "any_impl" ]; + "zlib" = [ "any_zlib" "libz-sys" ]; + "zlib-default" = [ "any_zlib" "libz-sys/default" ]; + "zlib-ng" = [ "any_zlib" "libz-ng-sys" ]; + "zlib-ng-compat" = [ "zlib" "libz-sys/zlib-ng" ]; + }; + resolvedDefaultFeatures = [ "any_impl" "default" "miniz_oxide" "rust_backend" ]; + }; "fnv" = rec { crateName = "fnv"; version = "1.0.7"; @@ -4100,7 +4300,7 @@ rec { "default" = [ "std" "general" "errno" ]; "rustc-dep-of-std" = [ "core" "compiler_builtins" "no_std" ]; }; - resolvedDefaultFeatures = [ "elf" "errno" "general" "ioctl" "no_std" ]; + resolvedDefaultFeatures = [ "elf" "errno" "general" "ioctl" "no_std" "std" ]; }; "litrs" = rec { crateName = "litrs"; @@ -4192,6 +4392,80 @@ rec { ]; features = { }; }; + "magic" = rec { + crateName = "magic"; + version = "0.16.2"; + edition = "2018"; + sha256 = "0g9py31aw19j5sr5lznb068byhgbiynflvizjrxcwgccvw1sw052"; + authors = [ + "Daniel Micay <danielmicay@gmail.com>" + "Petar Radošević <petar@wunki.org>" + "lilydjwg <lilydjwg@gmail.com>" + "Jeff Belgum <belgum@bastille.io>" + "Onur Aslan <onur@onur.im>" + "robo9k <robo9k@symlink.io>" + ]; + dependencies = [ + { + name = "bitflags"; + packageId = "bitflags 2.4.2"; + } + { + name = "libc"; + packageId = "libc"; + usesDefaultFeatures = false; + } + { + name = "magic-sys"; + packageId = "magic-sys"; + } + { + name = "thiserror"; + packageId = "thiserror"; + } + ]; + + }; + "magic-sys" = rec { + crateName = "magic-sys"; + version = "0.3.0"; + edition = "2015"; + links = "magic"; + sha256 = "1g5k9d9igxv4h23nbhp8bqa5gdpkd3ahgm0rh5i0s54mi3h6my7g"; + authors = [ + "robo9k <robo9k@symlink.io>" + ]; + dependencies = [ + { + name = "libc"; + packageId = "libc"; + usesDefaultFeatures = false; + } + ]; + buildDependencies = [ + { + name = "vcpkg"; + packageId = "vcpkg"; + } + ]; + features = { + "default" = [ "v5-38" ]; + "v5-05" = [ "v5-04" ]; + "v5-10" = [ "v5-05" ]; + "v5-13" = [ "v5-10" ]; + "v5-20" = [ "v5-13" ]; + "v5-21" = [ "v5-20" ]; + "v5-22" = [ "v5-21" ]; + "v5-23" = [ "v5-22" ]; + "v5-25" = [ "v5-23" ]; + "v5-27" = [ "v5-25" ]; + "v5-32" = [ "v5-27" ]; + "v5-35" = [ "v5-32" ]; + "v5-38" = [ "v5-35" ]; + "v5-40" = [ "v5-38" ]; + }; + resolvedDefaultFeatures = [ "default" "v5-04" "v5-05" "v5-10" "v5-13" "v5-20" "v5-21" "v5-22" "v5-23" "v5-25" "v5-27" "v5-32" "v5-35" "v5-38" ]; + }; "matchit" = rec { crateName = "matchit"; version = "0.7.3"; @@ -4340,6 +4614,7 @@ rec { "simd" = [ "simd-adler32" ]; "simd-adler32" = [ "dep:simd-adler32" ]; }; + resolvedDefaultFeatures = [ "with-alloc" ]; }; "mio" = rec { crateName = "mio"; @@ -6410,6 +6685,26 @@ rec { ]; }; + "redox_syscall 0.3.5" = rec { + crateName = "redox_syscall"; + version = "0.3.5"; + edition = "2018"; + sha256 = "0acgiy2lc1m2vr8cr33l5s7k9wzby8dybyab1a9p753hcbr68xjn"; + libName = "syscall"; + authors = [ + "Jeremy Soller <jackpot51@gmail.com>" + ]; + dependencies = [ + { + name = "bitflags"; + packageId = "bitflags 1.3.2"; + } + ]; + features = { + "core" = [ "dep:core" ]; + "rustc-dep-of-std" = [ "core" "bitflags/rustc-dep-of-std" ]; + }; + }; "redox_syscall 0.4.1" = rec { crateName = "redox_syscall"; version = "0.4.1"; @@ -9139,6 +9434,65 @@ rec { }; resolvedDefaultFeatures = [ "default" "fs" "net" "time" ]; }; + "tokio-tar" = rec { + crateName = "tokio-tar"; + version = "0.3.1"; + edition = "2018"; + sha256 = "0xffvap4g7hlswk5daklk3jaqha6s6wxw72c24kmqgna23018mwx"; + authors = [ + "Alex Crichton <alex@alexcrichton.com>" + "dignifiedquire <me@dignifiequire.com>" + "Artem Vorotnikov <artem@vorotnikov.me>" + "Aiden McClelland <me@drbonez.dev>" + ]; + dependencies = [ + { + name = "filetime"; + packageId = "filetime"; + } + { + name = "futures-core"; + packageId = "futures-core"; + } + { + name = "libc"; + packageId = "libc"; + target = { target, features }: (target."unix" or false); + } + { + name = "redox_syscall"; + packageId = "redox_syscall 0.3.5"; + target = { target, features }: ("redox" == target."os" or null); + } + { + name = "tokio"; + packageId = "tokio"; + features = [ "fs" "io-util" "rt" ]; + } + { + name = "tokio-stream"; + packageId = "tokio-stream"; + } + { + name = "xattr"; + packageId = "xattr"; + optional = true; + target = { target, features }: (target."unix" or false); + } + ]; + devDependencies = [ + { + name = "tokio"; + packageId = "tokio"; + features = [ "full" ]; + } + ]; + features = { + "default" = [ "xattr" ]; + "xattr" = [ "dep:xattr" ]; + }; + resolvedDefaultFeatures = [ "default" "xattr" ]; + }; "tokio-util" = rec { crateName = "tokio-util"; version = "0.7.10"; @@ -10769,6 +11123,11 @@ rec { else ./glue; dependencies = [ { + name = "async-compression"; + packageId = "async-compression"; + features = [ "tokio" "gzip" "bzip2" "xz" ]; + } + { name = "async-recursion"; packageId = "async-recursion"; } @@ -10789,10 +11148,18 @@ rec { packageId = "futures"; } { + name = "magic"; + packageId = "magic"; + } + { name = "nix-compat"; packageId = "nix-compat"; } { + name = "pin-project"; + packageId = "pin-project"; + } + { name = "reqwest"; packageId = "reqwest"; usesDefaultFeatures = false; @@ -10819,6 +11186,10 @@ rec { packageId = "tokio"; } { + name = "tokio-tar"; + packageId = "tokio-tar"; + } + { name = "tokio-util"; packageId = "tokio-util"; features = [ "io" "io-util" "compat" ]; @@ -11358,6 +11729,16 @@ rec { }; resolvedDefaultFeatures = [ "alloc" "std" ]; }; + "vcpkg" = rec { + crateName = "vcpkg"; + version = "0.2.15"; + edition = "2015"; + sha256 = "09i4nf5y8lig6xgj3f7fyrvzd3nlaw4znrihw8psidvv5yk4xkdc"; + authors = [ + "Jim McGrath <jimmc2@gmail.com>" + ]; + + }; "version_check" = rec { crateName = "version_check"; version = "0.9.4"; @@ -13269,6 +13650,47 @@ rec { ]; }; + "xattr" = rec { + crateName = "xattr"; + version = "1.3.1"; + edition = "2021"; + sha256 = "0kqxm36w89vc6qcpn6pizlhgjgzq138sx4hdhbv2g6wk4ld4za4d"; + authors = [ + "Steven Allen <steven@stebalien.com>" + ]; + dependencies = [ + { + name = "libc"; + packageId = "libc"; + target = { target, features }: (("freebsd" == target."os" or null) || ("netbsd" == target."os" or null)); + } + { + name = "linux-raw-sys"; + packageId = "linux-raw-sys"; + usesDefaultFeatures = false; + target = { target, features }: ("linux" == target."os" or null); + features = [ "std" ]; + } + { + name = "rustix"; + packageId = "rustix"; + usesDefaultFeatures = false; + features = [ "fs" "std" ]; + } + ]; + devDependencies = [ + { + name = "rustix"; + packageId = "rustix"; + usesDefaultFeatures = false; + features = [ "net" ]; + } + ]; + features = { + "default" = [ "unsupported" ]; + }; + resolvedDefaultFeatures = [ "default" "unsupported" ]; + }; "xml-rs" = rec { crateName = "xml-rs"; version = "0.8.19"; diff --git a/tvix/glue/Cargo.toml b/tvix/glue/Cargo.toml index f4ebfe490688..1ec3832d917f 100644 --- a/tvix/glue/Cargo.toml +++ b/tvix/glue/Cargo.toml @@ -9,7 +9,9 @@ bstr = "1.6.0" bytes = "1.4.0" data-encoding = "2.3.3" futures = "0.3.30" +magic = "0.16.2" nix-compat = { path = "../nix-compat" } +pin-project = "1.1" reqwest = { version = "0.11.22", features = ["rustls-tls-native-roots"], default-features = false } tvix-build = { path = "../build", default-features = false, features = []} tvix-eval = { path = "../eval" } @@ -17,6 +19,7 @@ tvix-castore = { path = "../castore" } tvix-store = { path = "../store", default-features = false, features = []} tracing = "0.1.37" tokio = "1.28.0" +tokio-tar = "0.3.1" tokio-util = { version = "0.7.9", features = ["io", "io-util", "compat"] } thiserror = "1.0.38" serde = "1.0.195" @@ -24,6 +27,10 @@ serde_json = "1.0" sha2 = "0.10.8" walkdir = "2.4.0" +[dependencies.async-compression] +version = "0.4.6" +features = ["tokio", "gzip", "bzip2", "xz"] + [dependencies.wu-manber] git = "https://github.com/tvlfyi/wu-manber.git" diff --git a/tvix/glue/src/decompression.rs b/tvix/glue/src/decompression.rs new file mode 100644 index 000000000000..7e526932e717 --- /dev/null +++ b/tvix/glue/src/decompression.rs @@ -0,0 +1,221 @@ +#![allow(dead_code)] // TODO + +use std::{ + io, mem, + pin::Pin, + task::{Context, Poll}, +}; + +use async_compression::tokio::bufread::{BzDecoder, GzipDecoder, XzDecoder}; +use futures::ready; +use pin_project::pin_project; +use tokio::io::{AsyncBufRead, AsyncRead, BufReader, ReadBuf}; + +const GZIP_MAGIC: [u8; 2] = [0x1f, 0x8b]; +const BZIP2_MAGIC: [u8; 3] = *b"BZh"; +const XZ_MAGIC: [u8; 6] = [0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00]; +const BYTES_NEEDED: usize = 6; + +#[derive(Debug, Clone, Copy)] +enum Algorithm { + Gzip, + Bzip2, + Xz, +} + +impl Algorithm { + fn from_magic(magic: &[u8]) -> Option<Self> { + if magic.starts_with(&GZIP_MAGIC) { + Some(Self::Gzip) + } else if magic.starts_with(&BZIP2_MAGIC) { + Some(Self::Bzip2) + } else if magic.starts_with(&XZ_MAGIC) { + Some(Self::Xz) + } else { + None + } + } +} + +#[pin_project] +struct WithPreexistingBuffer<R> { + buffer: Vec<u8>, + #[pin] + inner: R, +} + +impl<R> AsyncRead for WithPreexistingBuffer<R> +where + R: AsyncRead, +{ + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll<io::Result<()>> { + let this = self.project(); + if !this.buffer.is_empty() { + // TODO: check if the buffer fits first + buf.put_slice(this.buffer); + this.buffer.clear(); + } + this.inner.poll_read(cx, buf) + } +} + +#[pin_project(project = DecompressedReaderInnerProj)] +enum DecompressedReaderInner<R> { + Unknown { + buffer: Vec<u8>, + #[pin] + inner: Option<R>, + }, + Gzip(#[pin] GzipDecoder<BufReader<WithPreexistingBuffer<R>>>), + Bzip2(#[pin] BzDecoder<BufReader<WithPreexistingBuffer<R>>>), + Xz(#[pin] XzDecoder<BufReader<WithPreexistingBuffer<R>>>), +} + +impl<R> DecompressedReaderInner<R> +where + R: AsyncBufRead, +{ + fn switch_to(&mut self, algorithm: Algorithm) { + let (buffer, inner) = match self { + DecompressedReaderInner::Unknown { buffer, inner } => { + (mem::take(buffer), inner.take().unwrap()) + } + DecompressedReaderInner::Gzip(_) + | DecompressedReaderInner::Bzip2(_) + | DecompressedReaderInner::Xz(_) => unreachable!(), + }; + let inner = BufReader::new(WithPreexistingBuffer { buffer, inner }); + + *self = match algorithm { + Algorithm::Gzip => Self::Gzip(GzipDecoder::new(inner)), + Algorithm::Bzip2 => Self::Bzip2(BzDecoder::new(inner)), + Algorithm::Xz => Self::Xz(XzDecoder::new(inner)), + } + } +} + +impl<R> AsyncRead for DecompressedReaderInner<R> +where + R: AsyncBufRead, +{ + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll<io::Result<()>> { + match self.project() { + DecompressedReaderInnerProj::Unknown { .. } => { + unreachable!("Can't call poll_read on Unknown") + } + DecompressedReaderInnerProj::Gzip(inner) => inner.poll_read(cx, buf), + DecompressedReaderInnerProj::Bzip2(inner) => inner.poll_read(cx, buf), + DecompressedReaderInnerProj::Xz(inner) => inner.poll_read(cx, buf), + } + } +} + +#[pin_project] +pub struct DecompressedReader<R> { + #[pin] + inner: DecompressedReaderInner<R>, + switch_to: Option<Algorithm>, +} + +impl<R> DecompressedReader<R> { + pub fn new(inner: R) -> Self { + Self { + inner: DecompressedReaderInner::Unknown { + buffer: vec![0; BYTES_NEEDED], + inner: Some(inner), + }, + switch_to: None, + } + } +} + +impl<R> AsyncRead for DecompressedReader<R> +where + R: AsyncBufRead + Unpin, +{ + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll<io::Result<()>> { + let mut this = self.project(); + let (buffer, inner) = match this.inner.as_mut().project() { + DecompressedReaderInnerProj::Gzip(inner) => return inner.poll_read(cx, buf), + DecompressedReaderInnerProj::Bzip2(inner) => return inner.poll_read(cx, buf), + DecompressedReaderInnerProj::Xz(inner) => return inner.poll_read(cx, buf), + DecompressedReaderInnerProj::Unknown { buffer, inner } => (buffer, inner), + }; + + let mut our_buf = ReadBuf::new(buffer); + if let Err(e) = ready!(inner.as_pin_mut().unwrap().poll_read(cx, &mut our_buf)) { + return Poll::Ready(Err(e)); + } + + let data = our_buf.filled(); + if data.len() >= BYTES_NEEDED { + if let Some(algorithm) = Algorithm::from_magic(data) { + this.inner.as_mut().switch_to(algorithm); + } else { + return Poll::Ready(Err(io::Error::new( + io::ErrorKind::InvalidData, + "tar data not gz, bzip2, or xz compressed", + ))); + } + this.inner.poll_read(cx, buf) + } else { + cx.waker().wake_by_ref(); + Poll::Pending + } + } +} + +#[cfg(test)] +mod tests { + use std::path::Path; + + use async_compression::tokio::bufread::GzipEncoder; + use futures::TryStreamExt; + use test_case::test_case; + use tokio::io::{AsyncReadExt, BufReader}; + use tokio_tar::Archive; + + use super::*; + + #[tokio::test] + async fn gzip() { + let data = b"abcdefghijk"; + let mut enc = GzipEncoder::new(&data[..]); + let mut gzipped = vec![]; + enc.read_to_end(&mut gzipped).await.unwrap(); + + let mut reader = DecompressedReader::new(BufReader::new(&gzipped[..])); + let mut round_tripped = vec![]; + reader.read_to_end(&mut round_tripped).await.unwrap(); + + assert_eq!(data[..], round_tripped[..]); + } + + #[test_case(include_bytes!("tests/blob.tar.gz"); "gzip")] + #[test_case(include_bytes!("tests/blob.tar.bz2"); "bzip2")] + #[test_case(include_bytes!("tests/blob.tar.xz"); "xz")] + #[tokio::test] + async fn compressed_tar(data: &[u8]) { + let reader = DecompressedReader::new(BufReader::new(data)); + let mut archive = Archive::new(reader); + let mut entries: Vec<_> = archive.entries().unwrap().try_collect().await.unwrap(); + + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].path().unwrap().as_ref(), Path::new("empty")); + let mut data = String::new(); + entries[0].read_to_string(&mut data).await.unwrap(); + assert_eq!(data, ""); + } +} diff --git a/tvix/glue/src/lib.rs b/tvix/glue/src/lib.rs index b2f586ce52d6..f04d5ec3a0f2 100644 --- a/tvix/glue/src/lib.rs +++ b/tvix/glue/src/lib.rs @@ -5,6 +5,7 @@ pub mod tvix_build; pub mod tvix_io; pub mod tvix_store_io; +mod decompression; #[cfg(test)] mod tests; diff --git a/tvix/glue/src/tests/blob.tar.bz2 b/tvix/glue/src/tests/blob.tar.bz2 new file mode 100644 index 000000000000..d74b9139127f --- /dev/null +++ b/tvix/glue/src/tests/blob.tar.bz2 Binary files differdiff --git a/tvix/glue/src/tests/blob.tar.gz b/tvix/glue/src/tests/blob.tar.gz new file mode 100644 index 000000000000..c2bae55078d7 --- /dev/null +++ b/tvix/glue/src/tests/blob.tar.gz Binary files differdiff --git a/tvix/glue/src/tests/blob.tar.xz b/tvix/glue/src/tests/blob.tar.xz new file mode 100644 index 000000000000..324a99d89549 --- /dev/null +++ b/tvix/glue/src/tests/blob.tar.xz Binary files differ |