From 73880786308d956061951114271f12a395f884b5 Mon Sep 17 00:00:00 2001 From: Ryan Lahfa Date: Wed, 17 Jan 2024 07:45:55 +0100 Subject: feat(tvix/eval): implement `builtins.filterSource` We add a new set of builtins called `import_builtins`, which will contain import-related builtins, such as `builtins.path` and `builtins.filterSource`. Both can import paths into the store, with various knobs to alter the result, e.g. filtering, renaming, expected hashes. We introduce `filtered_ingest` which will drive the filtered ingestion via the Nix function via the generator machinery, and then we register the root node to the path info service inside the store. `builtins.filterSource` is very simple, `builtins.path` is a more complicated model requiring the same logic albeit more sophisticated with name customization, file ingestion method and expected SHA-256. Change-Id: I1083f37808b35f7b37818c8ffb9543d9682b2de2 Reviewed-on: https://cl.tvl.fyi/c/depot/+/10654 Autosubmit: raitobezarius Tested-by: BuildkiteCI Reviewed-by: flokli --- tvix/Cargo.lock | 53 +++++++----- tvix/Cargo.nix | 124 +++++++++++++++++++--------- tvix/cli/src/main.rs | 4 +- tvix/glue/Cargo.toml | 2 + tvix/glue/benches/eval.rs | 5 +- tvix/glue/src/builtins/import.rs | 148 ++++++++++++++++++++++++++++++++++ tvix/glue/src/builtins/mod.rs | 170 ++++++++++++++++++++++++++++++++++++++- tvix/glue/src/tests/mod.rs | 5 +- tvix/glue/src/tvix_store_io.rs | 79 +++++++++++++++++- 9 files changed, 521 insertions(+), 69 deletions(-) create mode 100644 tvix/glue/src/builtins/import.rs (limited to 'tvix') diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index 26c0a47dddf9..585199440b1e 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -34,9 +34,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.8" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628a8f9bd1e24b4e0db2b4bc2d000b001e7dd032d54afa60a68836aeec5aa54a" +checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" dependencies = [ "anstyle", "anstyle-parse", @@ -1001,9 +1001,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.23" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b553656127a00601c8ae5590fcfdc118e4083a7924b6cf4ffc1ea4b99dc429d7" +checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" dependencies = [ "bytes", "fnv", @@ -1044,9 +1044,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" +checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f" [[package]] name = "hex-literal" @@ -1174,9 +1174,9 @@ dependencies = [ [[package]] name = "imbl-sized-chunks" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6957ea0b2541c5ca561d3ef4538044af79f8a05a1eb3a3b148936aaceaa1076" +checksum = "144006fb58ed787dcae3f54575ff4349755b00ccc99f4b4873860b654be1ed63" dependencies = [ "bitmaps", ] @@ -1523,6 +1523,17 @@ dependencies = [ "libc", ] +[[package]] +name = "nix" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" +dependencies = [ + "bitflags 2.4.2", + "cfg-if", + "libc", +] + [[package]] name = "nix-compat" version = "0.1.0" @@ -1841,9 +1852,9 @@ dependencies = [ [[package]] name = "pkg-config" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d3587f8a9e599cc7ec2c00e331f71c4e69a5f9a4b8a6efd5b07466b9736f9a" +checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" [[package]] name = "platforms" @@ -2104,9 +2115,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" +checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" dependencies = [ "either", "rayon-core", @@ -2114,9 +2125,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.12.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ "crossbeam-deque", "crossbeam-utils", @@ -2597,9 +2608,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.12.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2593d31f82ead8df961d8bd23a64c2ccf2eb5dd34b0a34bfb4dd54011c72009e" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" [[package]] name = "smol_str" @@ -3382,6 +3393,7 @@ dependencies = [ "futures", "hex-literal", "lazy_static", + "nix 0.27.1", "nix-compat", "pretty_assertions", "rstest", @@ -3397,6 +3409,7 @@ dependencies = [ "tvix-castore", "tvix-eval", "tvix-store", + "walkdir", "wu-manber", ] @@ -3469,9 +3482,9 @@ checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" [[package]] name = "unicode-bidi" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f2528f27a9eb2b21e69c95319b30bd0efd85d09c379741b0f78ea1d86be2416" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] name = "unicode-ident" @@ -3537,9 +3550,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" dependencies = [ "getrandom", ] diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index f12d95506191..b0c36524e472 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -234,9 +234,9 @@ rec { }; "anstream" = rec { crateName = "anstream"; - version = "0.6.8"; + version = "0.6.11"; edition = "2021"; - sha256 = "0jm5bbnawdl8lrhgljnm6b87s7h01c02vg5ln86lwjz2s6dqz2k2"; + sha256 = "19dndamalavhjwp4i74k8hdijcixb7gsfa6ycwyc1r8xn6y1wbkf"; dependencies = [ { name = "anstyle"; @@ -260,7 +260,6 @@ rec { { name = "colorchoice"; packageId = "colorchoice"; - optional = true; } { name = "utf8parse"; @@ -268,7 +267,7 @@ rec { } ]; features = { - "auto" = [ "dep:anstyle-query" "dep:colorchoice" ]; + "auto" = [ "dep:anstyle-query" ]; "default" = [ "auto" "wincon" ]; "wincon" = [ "dep:anstyle-wincon" ]; }; @@ -2934,9 +2933,9 @@ rec { }; "h2" = rec { crateName = "h2"; - version = "0.3.23"; + version = "0.3.24"; edition = "2018"; - sha256 = "1mr9qjfvk90yzi7wzdi4g4x0ir0qq7yzr42mmv4021m04xhnalxm"; + sha256 = "1jf9488b66nayxzp3iw3b2rb64y49hdbbywnv9wfwrsv14i48b5v"; authors = [ "Carl Lerche " "Sean McArthur " @@ -3083,9 +3082,9 @@ rec { }; "hermit-abi" = rec { crateName = "hermit-abi"; - version = "0.3.3"; + version = "0.3.4"; edition = "2021"; - sha256 = "1dyc8qsjh876n74a3rcz8h43s27nj1sypdhsn2ms61bd3b47wzyp"; + sha256 = "07v5vbwb9kx0yxgdpx15h38ynpzhaqx5ncriryipypi5707hwgax"; authors = [ "Stefan Lankes" ]; @@ -3509,9 +3508,9 @@ rec { }; "imbl-sized-chunks" = rec { crateName = "imbl-sized-chunks"; - version = "0.1.1"; + version = "0.1.2"; edition = "2021"; - sha256 = "0xhhmb7aldl92hxkmsx10n59zxsa0hw4bvykc6jmq72lnah7x5g6"; + sha256 = "0qzdw55na2w6fd44p7y9rh05nxa98gzpaigmwg57sy7db3xhch0l"; authors = [ "Bodil Stokke " "Joe Neeman " @@ -4506,6 +4505,47 @@ rec { }; resolvedDefaultFeatures = [ "feature" "fs" "user" ]; }; + "nix 0.27.1" = rec { + crateName = "nix"; + version = "0.27.1"; + edition = "2021"; + sha256 = "0ly0kkmij5f0sqz35lx9czlbk6zpihb7yh1bsy4irzwfd2f4xc1f"; + authors = [ + "The nix-rust Project Developers" + ]; + dependencies = [ + { + name = "bitflags"; + packageId = "bitflags 2.4.2"; + } + { + name = "cfg-if"; + packageId = "cfg-if"; + } + { + name = "libc"; + packageId = "libc"; + features = [ "extra_traits" ]; + } + ]; + features = { + "aio" = [ "pin-utils" ]; + "dir" = [ "fs" ]; + "memoffset" = [ "dep:memoffset" ]; + "mount" = [ "uio" ]; + "mqueue" = [ "fs" ]; + "net" = [ "socket" ]; + "pin-utils" = [ "dep:pin-utils" ]; + "ptrace" = [ "process" ]; + "sched" = [ "process" ]; + "signal" = [ "process" ]; + "socket" = [ "memoffset" ]; + "ucontext" = [ "signal" ]; + "user" = [ "feature" ]; + "zerocopy" = [ "fs" "uio" ]; + }; + resolvedDefaultFeatures = [ "default" "fs" ]; + }; "nix-compat" = rec { crateName = "nix-compat"; version = "0.1.0"; @@ -5485,9 +5525,9 @@ rec { }; "pkg-config" = rec { crateName = "pkg-config"; - version = "0.3.28"; + version = "0.3.29"; edition = "2015"; - sha256 = "16kgffwncx5hsppsdf54z6jnjkhwywqy601cxk3rqncyi9zmilv9"; + sha256 = "1jy6158v1316khkpmq2sjj1vgbnbnw51wffx7p0k0l9h9vlys019"; authors = [ "Alex Crichton " ]; @@ -6260,9 +6300,9 @@ rec { }; "rayon" = rec { crateName = "rayon"; - version = "1.8.0"; + version = "1.8.1"; edition = "2021"; - sha256 = "1cfdnvchf7j4cpha5jkcrrsr61li9i9lp5ak7xdq6d3pvc1xn9ww"; + sha256 = "0lg0488xwpj5jsfz2gfczcrpclbjl8221mj5vdrhg8bp3883fwps"; authors = [ "Niko Matsakis " "Josh Stone " @@ -6278,14 +6318,16 @@ rec { packageId = "rayon-core"; } ]; - + features = { + "web_spin_lock" = [ "dep:wasm_sync" "rayon-core/web_spin_lock" ]; + }; }; "rayon-core" = rec { crateName = "rayon-core"; - version = "1.12.0"; + version = "1.12.1"; edition = "2021"; links = "rayon-core"; - sha256 = "1vaq0q71yfvcwlmia0iqf6ixj2fibjcf2xjy92n1m1izv1mgpqsw"; + sha256 = "1qpwim68ai5h0j7axa8ai8z0payaawv3id0lrgkqmapx7lx8fr8l"; authors = [ "Niko Matsakis " "Josh Stone " @@ -6300,7 +6342,9 @@ rec { packageId = "crossbeam-utils"; } ]; - + features = { + "web_spin_lock" = [ "dep:wasm_sync" ]; + }; }; "redox_syscall 0.2.16" = rec { crateName = "redox_syscall"; @@ -7903,9 +7947,9 @@ rec { }; "smallvec" = rec { crateName = "smallvec"; - version = "1.12.0"; + version = "1.13.1"; edition = "2018"; - sha256 = "17h0f8f02m6xnjzk82jbsdfypwncq9j3mllb3nbdzn7ah8gx74r5"; + sha256 = "1mzk9j117pn3k1gabys0b7nz8cdjsx5xc6q7fwnm8r0an62d7v76"; authors = [ "The Servo Project Developers" ]; @@ -10684,6 +10728,10 @@ rec { packageId = "tvix-store"; usesDefaultFeatures = false; } + { + name = "walkdir"; + packageId = "walkdir"; + } { name = "wu-manber"; packageId = "wu-manber"; @@ -10703,6 +10751,11 @@ rec { name = "lazy_static"; packageId = "lazy_static"; } + { + name = "nix"; + packageId = "nix 0.27.1"; + features = [ "fs" ]; + } { name = "pretty_assertions"; packageId = "pretty_assertions"; @@ -10985,9 +11038,9 @@ rec { }; "unicode-bidi" = rec { crateName = "unicode-bidi"; - version = "0.3.14"; + version = "0.3.15"; edition = "2018"; - sha256 = "05i4ps31vskq1wdp8yf315fxivyh1frijly9d4gb5clygbr2h9bg"; + sha256 = "0xcdxm7h0ydyprwpcbh436rbs6s6lph7f3gr527lzgv6lw053y88"; libName = "unicode_bidi"; authors = [ "The Servo Project Developers" @@ -11135,9 +11188,9 @@ rec { }; "uuid" = rec { crateName = "uuid"; - version = "1.6.1"; + version = "1.7.0"; edition = "2018"; - sha256 = "0q45jxahvysldn3iy04m8xmr8hgig80855y9gq9di8x72v7myfay"; + sha256 = "0aivp5ys7sg2izlj2sn6rr8p43vdcwg64naj8n0kqbd15iqcj37h"; authors = [ "Ashley Mannix" "Christopher Armstrong" @@ -11148,39 +11201,32 @@ rec { { name = "getrandom"; packageId = "getrandom"; - rename = "getrandom"; optional = true; } ]; features = { "arbitrary" = [ "dep:arbitrary" ]; "atomic" = [ "dep:atomic" ]; - "borsh" = [ "dep:borsh" ]; + "borsh" = [ "dep:borsh" "dep:borsh-derive" ]; "bytemuck" = [ "dep:bytemuck" ]; "default" = [ "std" ]; - "fast-rng" = [ "rng" "rand" ]; - "getrandom" = [ "dep:getrandom" ]; - "js" = [ "wasm-bindgen" "getrandom" "getrandom/js" ]; - "macro-diagnostics" = [ "uuid-macro-internal" ]; - "md-5" = [ "dep:md-5" ]; - "md5" = [ "md-5" ]; - "rand" = [ "dep:rand" ]; - "rng" = [ "getrandom" ]; + "fast-rng" = [ "rng" "dep:rand" ]; + "js" = [ "dep:wasm-bindgen" "getrandom?/js" ]; + "macro-diagnostics" = [ "dep:uuid-macro-internal" ]; + "md5" = [ "dep:md-5" ]; + "rng" = [ "dep:getrandom" ]; "serde" = [ "dep:serde" ]; - "sha1" = [ "sha1_smol" ]; - "sha1_smol" = [ "dep:sha1_smol" ]; + "sha1" = [ "dep:sha1_smol" ]; "slog" = [ "dep:slog" ]; - "uuid-macro-internal" = [ "dep:uuid-macro-internal" ]; "v1" = [ "atomic" ]; "v3" = [ "md5" ]; "v4" = [ "rng" ]; "v5" = [ "sha1" ]; "v6" = [ "atomic" ]; "v7" = [ "atomic" "rng" ]; - "wasm-bindgen" = [ "dep:wasm-bindgen" ]; "zerocopy" = [ "dep:zerocopy" ]; }; - resolvedDefaultFeatures = [ "default" "getrandom" "rng" "std" "v4" ]; + resolvedDefaultFeatures = [ "default" "rng" "std" "v4" ]; }; "valuable" = rec { crateName = "valuable"; diff --git a/tvix/cli/src/main.rs b/tvix/cli/src/main.rs index b7394a984b3f..d8a51d28915c 100644 --- a/tvix/cli/src/main.rs +++ b/tvix/cli/src/main.rs @@ -10,6 +10,7 @@ use tvix_eval::builtins::impure_builtins; use tvix_eval::observer::{DisassemblingObserver, TracingObserver}; use tvix_eval::{EvalIO, Value}; use tvix_glue::builtins::add_fetcher_builtins; +use tvix_glue::builtins::add_import_builtins; use tvix_glue::tvix_io::TvixIO; use tvix_glue::tvix_store_io::TvixStoreIO; use tvix_glue::{builtins::add_derivation_builtins, configure_nix_path}; @@ -130,7 +131,8 @@ fn interpret(code: &str, path: Option, args: &Args, explain: bool) -> b eval.strict = args.strict; eval.builtins.extend(impure_builtins()); add_derivation_builtins(&mut eval, tvix_store_io.clone()); - add_fetcher_builtins(&mut eval, tvix_store_io); + add_fetcher_builtins(&mut eval, tvix_store_io.clone()); + add_import_builtins(&mut eval, tvix_store_io); configure_nix_path(&mut eval, &args.nix_search_path); let source_map = eval.source_map(); diff --git a/tvix/glue/Cargo.toml b/tvix/glue/Cargo.toml index bce4556704b6..5a0296f97511 100644 --- a/tvix/glue/Cargo.toml +++ b/tvix/glue/Cargo.toml @@ -20,6 +20,7 @@ thiserror = "1.0.38" serde = "1.0.195" serde_json = "1.0" sha2 = "0.10.8" +walkdir = "2.4.0" [dependencies.wu-manber] git = "https://github.com/tvlfyi/wu-manber.git" @@ -28,6 +29,7 @@ git = "https://github.com/tvlfyi/wu-manber.git" criterion = { version = "0.5", features = ["html_reports"] } hex-literal = "0.4.1" lazy_static = "1.4.0" +nix = { version = "0.27.1", features = [ "fs" ] } pretty_assertions = "1.4.0" rstest = "0.18.2" tempfile = "3.8.1" diff --git a/tvix/glue/benches/eval.rs b/tvix/glue/benches/eval.rs index 2bbe10fe93b4..3e3dc533265f 100644 --- a/tvix/glue/benches/eval.rs +++ b/tvix/glue/benches/eval.rs @@ -8,7 +8,7 @@ use tvix_castore::{ }; use tvix_eval::{builtins::impure_builtins, EvalIO}; use tvix_glue::{ - builtins::{add_derivation_builtins, add_fetcher_builtins}, + builtins::{add_derivation_builtins, add_fetcher_builtins, add_import_builtins}, configure_nix_path, tvix_io::TvixIO, tvix_store_io::TvixStoreIO, @@ -47,7 +47,8 @@ fn interpret(code: &str) { eval.builtins.extend(impure_builtins()); add_derivation_builtins(&mut eval, tvix_store_io.clone()); - add_fetcher_builtins(&mut eval, tvix_store_io); + add_fetcher_builtins(&mut eval, tvix_store_io.clone()); + add_import_builtins(&mut eval, tvix_store_io); configure_nix_path( &mut eval, // The benchmark requires TVIX_BENCH_NIX_PATH to be set, so barf out diff --git a/tvix/glue/src/builtins/import.rs b/tvix/glue/src/builtins/import.rs new file mode 100644 index 000000000000..536585102d45 --- /dev/null +++ b/tvix/glue/src/builtins/import.rs @@ -0,0 +1,148 @@ +//! Implements builtins used to import paths in the store. + +use futures::pin_mut; +use std::path::Path; +use tvix_eval::{ + builtin_macros::builtins, + generators::{self, GenCo}, + ErrorKind, Value, +}; + +use std::rc::Rc; + +async fn filtered_ingest( + state: Rc, + co: GenCo, + path: &Path, + filter: Option<&Value>, +) -> Result { + let mut entries_per_depths: Vec> = vec![Vec::new()]; + let mut it = walkdir::WalkDir::new(path) + .follow_links(false) + .follow_root_links(false) + .contents_first(false) + .sort_by_file_name() + .into_iter(); + + // Skip root node. + entries_per_depths[0].push( + it.next() + .ok_or_else(|| ErrorKind::IO { + path: Some(path.to_path_buf()), + error: std::io::Error::new(std::io::ErrorKind::NotFound, "No root node emitted") + .into(), + })? + .map_err(|err| ErrorKind::IO { + path: Some(path.to_path_buf()), + error: std::io::Error::from(err).into(), + })?, + ); + + while let Some(entry) = it.next() { + // Entry could be a NotFound, if the root path specified does not exist. + let entry = entry.map_err(|err| ErrorKind::IO { + path: err.path().map(|p| p.to_path_buf()), + error: std::io::Error::from(err).into(), + })?; + + // As per Nix documentation `:doc builtins.filterSource`. + let file_type = if entry.file_type().is_dir() { + "directory" + } else if entry.file_type().is_file() { + "regular" + } else if entry.file_type().is_symlink() { + "symlink" + } else { + "unknown" + }; + + let should_keep: bool = if let Some(filter) = filter { + generators::request_force( + &co, + generators::request_call_with( + &co, + filter.clone(), + [ + Value::String(Box::new(entry.path().as_os_str().as_encoded_bytes().into())), + Value::String(Box::new(file_type.into())), + ], + ) + .await, + ) + .await + .as_bool()? + } else { + true + }; + + if !should_keep { + if file_type == "directory" { + it.skip_current_dir(); + } + continue; + } + + if entry.depth() >= entries_per_depths.len() { + debug_assert!( + entry.depth() == entries_per_depths.len(), + "Received unexpected entry with depth {} during descent, previously at {}", + entry.depth(), + entries_per_depths.len() + ); + + entries_per_depths.push(vec![entry]); + } else { + entries_per_depths[entry.depth()].push(entry); + } + + // FUTUREWORK: determine when it's the right moment to flush a level to the ingester. + } + + let entries_stream = tvix_castore::import::leveled_entries_to_stream(entries_per_depths); + + pin_mut!(entries_stream); + + state + .ingest_entries_sync(entries_stream) + .map_err(|err| ErrorKind::IO { + path: Some(path.to_path_buf()), + error: err.into(), + }) +} + +#[builtins(state = "Rc")] +mod import_builtins { + use std::rc::Rc; + + use super::*; + + use tvix_eval::generators::Gen; + use tvix_eval::{generators::GenCo, ErrorKind, Value}; + + use crate::tvix_store_io::TvixStoreIO; + + #[builtin("filterSource")] + async fn builtin_filter_source( + state: Rc, + co: GenCo, + #[lazy] filter: Value, + path: Value, + ) -> Result { + let p = path.to_path()?; + let root_node = filtered_ingest(state.clone(), co, &p, Some(&filter)).await?; + let name = tvix_store::import::path_to_name(&p)?; + + Ok(state + .register_node_in_path_info_service_sync(name, &p, root_node) + .map_err(|err| ErrorKind::IO { + path: Some(p.to_path_buf()), + error: err.into(), + })? + .to_absolute_path() + .into()) + } +} + +pub use import_builtins::builtins as import_builtins; + +use crate::tvix_store_io::TvixStoreIO; diff --git a/tvix/glue/src/builtins/mod.rs b/tvix/glue/src/builtins/mod.rs index 8fc3e0ada7bc..78e5fe87ec4c 100644 --- a/tvix/glue/src/builtins/mod.rs +++ b/tvix/glue/src/builtins/mod.rs @@ -8,6 +8,8 @@ mod derivation; mod derivation_error; mod fetchers; +mod import; + pub use derivation_error::Error as DerivationError; /// Adds derivation-related builtins to the passed [tvix_eval::Evaluation]. @@ -35,14 +37,26 @@ pub fn add_fetcher_builtins(eval: &mut tvix_eval::Evaluation, io: Rc(eval: &mut tvix_eval::Evaluation, io: Rc) { + eval.builtins.extend(import::import_builtins(io)); + + // TODO(raitobezarius): evaluate expressing filterSource as Nix code using path (b/372) +} + #[cfg(test)] mod tests { - use std::{rc::Rc, sync::Arc}; + use std::{fs, rc::Rc, sync::Arc}; use crate::tvix_store_io::TvixStoreIO; - use super::{add_derivation_builtins, add_fetcher_builtins}; + use super::{add_derivation_builtins, add_fetcher_builtins, add_import_builtins}; use nix_compat::store_path::hash_placeholder; + use tempfile::TempDir; use test_case::test_case; use tvix_build::buildservice::DummyBuildService; use tvix_eval::{EvalIO, EvaluationResult}; @@ -69,7 +83,8 @@ mod tests { let mut eval = tvix_eval::Evaluation::new(io.clone() as Rc, false); add_derivation_builtins(&mut eval, io.clone()); - add_fetcher_builtins(&mut eval, io); + add_fetcher_builtins(&mut eval, io.clone()); + add_import_builtins(&mut eval, io); // run the evaluation itself. eval.evaluate(str, None) @@ -333,4 +348,153 @@ mod tests { "warnings should not be empty" ); } + + /// Invokes `builtins.filterSource` on various carefully-crated subdirs, and + /// ensures the resulting store paths matches what Nix produces. + /// @fixtures is replaced to the fixtures directory. + #[cfg(target_family = "unix")] + #[test_case(r#"(builtins.filterSource (p: t: true) @fixtures)"#, "/nix/store/bqh6kd0x3vps2rzagzpl7qmbbgnx19cp-import_fixtures"; "complicated directory: filter nothing")] + #[test_case(r#"(builtins.filterSource (p: t: false) @fixtures)"#, "/nix/store/giq6czz24lpjg97xxcxk6rg950lcpib1-import_fixtures"; "complicated directory: filter everything")] + #[test_case(r#"(builtins.filterSource (p: t: t != "directory") @fixtures/a_dir)"#, "/nix/store/8vbqaxapywkvv1hacdja3pi075r14d43-a_dir"; "simple directory with one file: filter directories")] + #[test_case(r#"(builtins.filterSource (p: t: t != "regular") @fixtures/a_dir)"#, "/nix/store/zphlqc93s2iq4xm393l06hzf8hp85r4z-a_dir"; "simple directory with one file: filter files")] + #[test_case(r#"(builtins.filterSource (p: t: t != "symlink") @fixtures/a_dir)"#, "/nix/store/8vbqaxapywkvv1hacdja3pi075r14d43-a_dir"; "simple directory with one file: filter symlinks")] + #[test_case(r#"(builtins.filterSource (p: t: true) @fixtures/a_dir)"#, "/nix/store/8vbqaxapywkvv1hacdja3pi075r14d43-a_dir"; "simple directory with one file: filter nothing")] + #[test_case(r#"(builtins.filterSource (p: t: false) @fixtures/a_dir)"#, "/nix/store/zphlqc93s2iq4xm393l06hzf8hp85r4z-a_dir"; "simple directory with one file: filter everything")] + #[test_case(r#"builtins.filterSource (p: t: t != "directory") @fixtures/b_dir"#, "/nix/store/xzsfzdgrxg93icaamjm8zq1jq6xvf2fz-b_dir"; "simple directory with one directory: filter directories")] + #[test_case(r#"builtins.filterSource (p: t: t != "regular") @fixtures/b_dir"#, "/nix/store/8rjx64mm7173xp60rahv7cl3ixfkv3rf-b_dir"; "simple directory with one directory: filter files")] + #[test_case(r#"builtins.filterSource (p: t: t != "symlink") @fixtures/b_dir"#, "/nix/store/8rjx64mm7173xp60rahv7cl3ixfkv3rf-b_dir"; "simple directory with one directory: filter symlinks")] + #[test_case(r#"builtins.filterSource (p: t: true) @fixtures/b_dir"#, "/nix/store/8rjx64mm7173xp60rahv7cl3ixfkv3rf-b_dir"; "simple directory with one directory: filter nothing")] + #[test_case(r#"builtins.filterSource (p: t: false) @fixtures/b_dir"#, "/nix/store/xzsfzdgrxg93icaamjm8zq1jq6xvf2fz-b_dir"; "simple directory with one directory: filter everything")] + #[test_case(r#"builtins.filterSource (p: t: t != "directory") @fixtures/c_dir"#, "/nix/store/riigfmmzzrq65zqiffcjk5sbqr9c9h09-c_dir"; "simple directory with one symlink to a file: filter directory")] + #[test_case(r#"builtins.filterSource (p: t: t != "regular") @fixtures/c_dir"#, "/nix/store/riigfmmzzrq65zqiffcjk5sbqr9c9h09-c_dir"; "simple directory with one symlink to a file: filter files")] + #[test_case(r#"builtins.filterSource (p: t: t != "symlink") @fixtures/c_dir"#, "/nix/store/y5g1fz04vzjvf422q92qmv532axj5q26-c_dir"; "simple directory with one symlink to a file: filter symlinks")] + #[test_case(r#"builtins.filterSource (p: t: true) @fixtures/c_dir"#, "/nix/store/riigfmmzzrq65zqiffcjk5sbqr9c9h09-c_dir"; "simple directory with one symlink to a file: filter nothing")] + #[test_case(r#"builtins.filterSource (p: t: false) @fixtures/c_dir"#, "/nix/store/y5g1fz04vzjvf422q92qmv532axj5q26-c_dir"; "simple directory with one symlink to a file: filter everything")] + #[test_case(r#"builtins.filterSource (p: t: t != "directory") @fixtures/d_dir"#, "/nix/store/f2d1aixwiqy4lbzrd040ala2s4m2z199-d_dir"; "simple directory with dangling symlink: filter directory")] + #[test_case(r#"builtins.filterSource (p: t: t != "regular") @fixtures/d_dir"#, "/nix/store/f2d1aixwiqy4lbzrd040ala2s4m2z199-d_dir"; "simple directory with dangling symlink: filter file")] + #[test_case(r#"builtins.filterSource (p: t: t != "symlink") @fixtures/d_dir"#, "/nix/store/7l371xax8kknhpska4wrmyll1mzlhzvl-d_dir"; "simple directory with dangling symlink: filter symlinks")] + #[test_case(r#"builtins.filterSource (p: t: true) @fixtures/d_dir"#, "/nix/store/f2d1aixwiqy4lbzrd040ala2s4m2z199-d_dir"; "simple directory with dangling symlink: filter nothing")] + #[test_case(r#"builtins.filterSource (p: t: false) @fixtures/d_dir"#, "/nix/store/7l371xax8kknhpska4wrmyll1mzlhzvl-d_dir"; "simple directory with dangling symlink: filter everything")] + #[test_case(r#"builtins.filterSource (p: t: t != "directory") @fixtures/symlink_to_a_dir"#, "/nix/store/apmdprm8fwl2zrjpbyfcd99zrnhvf47q-symlink_to_a_dir"; "simple symlinked directory with one file: filter directories")] + #[test_case(r#"builtins.filterSource (p: t: t != "regular") @fixtures/symlink_to_a_dir"#, "/nix/store/apmdprm8fwl2zrjpbyfcd99zrnhvf47q-symlink_to_a_dir"; "simple symlinked directory with one file: filter file")] + #[test_case(r#"builtins.filterSource (p: t: t != "symlink") @fixtures/symlink_to_a_dir"#, "/nix/store/apmdprm8fwl2zrjpbyfcd99zrnhvf47q-symlink_to_a_dir"; "simple symlinked directory with one file: filter symlinks")] + #[test_case(r#"builtins.filterSource (p: t: true) @fixtures/symlink_to_a_dir"#, "/nix/store/apmdprm8fwl2zrjpbyfcd99zrnhvf47q-symlink_to_a_dir"; "simple symlinked directory with one file: filter nothing")] + #[test_case(r#"builtins.filterSource (p: t: false) @fixtures/symlink_to_a_dir"#, "/nix/store/apmdprm8fwl2zrjpbyfcd99zrnhvf47q-symlink_to_a_dir"; "simple symlinked directory with one file: filter everything")] + fn builtins_filter_source_succeed(code: &str, expected_outpath: &str) { + // populate the fixtures dir + let temp = TempDir::new().expect("create temporary directory"); + let p = temp.path().join("import_fixtures"); + + // create the fixtures directory. + // We produce them at runtime rather than shipping it inside the source + // tree, as git can't model certain things - like directories without any + // items. + { + fs::create_dir(&p).expect("creating import_fixtures"); + + // `/a_dir` contains an empty `a_file` file + fs::create_dir(p.join("a_dir")).expect("creating /a_dir"); + fs::write(p.join("a_dir").join("a_file"), "").expect("creating /a_dir/a_file"); + + // `/a_file` is an empty file + fs::write(p.join("a_file"), "").expect("creating /a_file"); + + // `/b_dir` contains an empty "a_dir" directory + fs::create_dir_all(p.join("b_dir").join("a_dir")).expect("creating /b_dir/a_dir"); + + // `/c_dir` contains a `symlink_to_a_file` symlink, pointing to `../a_dir/a_file`. + fs::create_dir(p.join("c_dir")).expect("creating /c_dir"); + std::os::unix::fs::symlink( + "../a_dir/a_file", + p.join("c_dir").join("symlink_to_a_file"), + ) + .expect("creating /c_dir/symlink_to_a_file"); + + // `/d_dir` contains a `dangling_symlink`, pointing to `a_dir/a_file`, + // which does not exist. + fs::create_dir(p.join("d_dir")).expect("creating /d_dir"); + std::os::unix::fs::symlink("a_dir/a_file", p.join("d_dir").join("dangling_symlink")) + .expect("creating /d_dir/dangling_symlink"); + + // `/symlink_to_a_dir` is a symlink to `a_dir`, which exists. + std::os::unix::fs::symlink("a_dir", p.join("symlink_to_a_dir")) + .expect("creating /symlink_to_a_dir"); + } + + // replace @fixtures with the temporary path containing the fixtures + let code_replaced = code.replace("@fixtures", &p.to_string_lossy()); + + let eval_result = eval(&code_replaced); + + let value = eval_result.value.expect("must succeed"); + + match value { + tvix_eval::Value::String(s) => { + assert_eq!(expected_outpath, s.as_bstr()); + } + _ => panic!("unexpected value type: {:?}", value), + } + + assert!(eval_result.errors.is_empty(), "errors should be empty"); + } + + // All tests filter out some unsupported (not representable in castore) nodes, confirming + // invalid, but filtered-out nodes don't prevent ingestion of a path. + #[cfg(target_family = "unix")] + // There is a set of invalid filetypes. + // We write a filter function for most subsets, excluding one that filters all of them. + // We expect these cases to make the evaluation fail as there are still invalid files present + // after the filtering. + #[test_case(r#"(builtins.filterSource (p: t: t == "unknown") @fixtures)"#, false; "complicated directory: filter unsupported types")] + #[test_case(r#"(builtins.filterSource (p: t: (builtins.baseNameOf p) != "a_charnode") @fixtures)"#, false; "complicated directory: filter character device nodes")] + #[test_case(r#"(builtins.filterSource (p: t: (builtins.baseNameOf p) != "a_socket") @fixtures)"#, false; "complicated directory: filter sockets")] + #[test_case(r#"(builtins.filterSource (p: t: (builtins.baseNameOf p) != "a_fifo") @fixtures)"#, false; "complicated directory: filter FIFOs")] + // We filter all invalid filetypes, so the evaluation has to succeed. + #[test_case(r#"(builtins.filterSource (p: t: t != "unknown") @fixtures)"#, true; "complicated directory: filter out unsupported types")] + fn builtins_filter_source_unsupported_files(code: &str, success: bool) { + use nix::sys::stat; + use nix::unistd; + use std::os::unix::net::UnixListener; + use tempfile::TempDir; + + // We prepare a directory containing some unsupported file nodes: + // - character device + // - socket + // - FIFO + // and we run the evaluation inside that CWD. + // + // block devices cannot be tested because we don't have the right permissions. + let temp = TempDir::with_prefix("foo").expect("Failed to create a temporary directory"); + + // read, write, execute to the owner. + unistd::mkfifo(&temp.path().join("a_fifo"), stat::Mode::S_IRWXU) + .expect("Failed to create the FIFO"); + + UnixListener::bind(temp.path().join("a_socket")).expect("Failed to create the socket"); + + stat::mknod( + &temp.path().join("a_charnode"), + stat::SFlag::S_IFCHR, + stat::Mode::S_IRWXU, + 0, + ) + .expect("Failed to create a character device node"); + + let code_replaced = code.replace("@fixtures", &temp.path().to_string_lossy()); + let eval_result = eval(&code_replaced); + + if success { + assert!( + eval_result.value.is_some(), + "unexpected failure on a directory of unsupported file types but all filtered: {:?}", + eval_result.errors + ); + } else { + assert!( + eval_result.value.is_none(), + "unexpected success on unsupported file type ingestion: {:?}", + eval_result.value + ); + } + } } diff --git a/tvix/glue/src/tests/mod.rs b/tvix/glue/src/tests/mod.rs index ad6571e4d212..469ed17a5135 100644 --- a/tvix/glue/src/tests/mod.rs +++ b/tvix/glue/src/tests/mod.rs @@ -13,7 +13,7 @@ use tvix_store::pathinfoservice::{MemoryPathInfoService, PathInfoService}; use rstest::rstest; use crate::{ - builtins::{add_derivation_builtins, add_fetcher_builtins}, + builtins::{add_derivation_builtins, add_fetcher_builtins, add_import_builtins}, tvix_store_io::TvixStoreIO, }; @@ -54,7 +54,8 @@ fn eval_test(code_path: PathBuf, expect_success: bool) { eval.strict = true; add_derivation_builtins(&mut eval, tvix_store_io.clone()); - add_fetcher_builtins(&mut eval, tvix_store_io); + add_fetcher_builtins(&mut eval, tvix_store_io.clone()); + add_import_builtins(&mut eval, tvix_store_io.clone()); let result = eval.evaluate(code, Some(code_path.clone())); let failed = match result.value { diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs index 025a8a4bf01c..375501b65a5a 100644 --- a/tvix/glue/src/tvix_store_io.rs +++ b/tvix/glue/src/tvix_store_io.rs @@ -2,6 +2,7 @@ use async_recursion::async_recursion; use bytes::Bytes; +use futures::Stream; use futures::{StreamExt, TryStreamExt}; use nix_compat::{ nixhash::CAHash, @@ -18,6 +19,7 @@ use tokio::io::AsyncReadExt; use tracing::{error, instrument, warn, Level}; use tvix_build::buildservice::BuildService; use tvix_eval::{EvalIO, FileType, StdIO}; +use walkdir::DirEntry; use tvix_castore::{ blobservice::BlobService, @@ -282,6 +284,79 @@ impl TvixStoreIO { self.tokio_handle .block_on(async { self.store_path_to_node(store_path, sub_path).await }) } + + /// This forwards the ingestion to the [`tvix_castore::import::ingest_entries`] + /// with a [`tokio::runtime::Handle::block_on`] call for synchronicity. + pub(crate) fn ingest_entries_sync(&self, entries_stream: S) -> io::Result + where + S: Stream + std::marker::Unpin, + { + self.tokio_handle.block_on(async move { + tvix_castore::import::ingest_entries( + &self.blob_service, + &self.directory_service, + entries_stream, + ) + .await + .map_err(|err| std::io::Error::new(io::ErrorKind::Other, err)) + }) + } + + pub(crate) async fn node_to_path_info( + &self, + name: &str, + path: &Path, + root_node: Node, + ) -> io::Result<(PathInfo, StorePath)> { + // Ask the PathInfoService for the NAR size and sha256 + let (nar_size, nar_sha256) = self + .path_info_service + .as_ref() + .calculate_nar(&root_node) + .await?; + + // Calculate the output path. This might still fail, as some names are illegal. + let output_path = nix_compat::store_path::build_nar_based_store_path(&nar_sha256, name) + .map_err(|_| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("invalid name: {}", name), + ) + })?; + + // assemble a new root_node with a name that is derived from the nar hash. + let root_node = root_node.rename(output_path.to_string().into_bytes().into()); + tvix_store::import::log_node(&root_node, path); + + let path_info = + tvix_store::import::derive_nar_ca_path_info(nar_size, nar_sha256, root_node); + + Ok((path_info, output_path.to_owned())) + } + + pub(crate) async fn register_node_in_path_info_service( + &self, + name: &str, + path: &Path, + root_node: Node, + ) -> io::Result { + let (path_info, output_path) = self.node_to_path_info(name, path, root_node).await?; + let _path_info = self.path_info_service.as_ref().put(path_info).await?; + + Ok(output_path) + } + + pub(crate) fn register_node_in_path_info_service_sync( + &self, + name: &str, + path: &Path, + root_node: Node, + ) -> io::Result { + self.tokio_handle.block_on(async { + self.register_node_in_path_info_service(name, path, root_node) + .await + }) + } } impl EvalIO for TvixStoreIO { @@ -475,9 +550,8 @@ mod tests { use tvix_eval::{EvalIO, EvaluationResult}; use tvix_store::pathinfoservice::MemoryPathInfoService; - use crate::builtins::{add_derivation_builtins, add_fetcher_builtins}; - use super::TvixStoreIO; + use crate::builtins::{add_derivation_builtins, add_fetcher_builtins, add_import_builtins}; /// evaluates a given nix expression and returns the result. /// Takes care of setting up the evaluator so it knows about the @@ -504,6 +578,7 @@ mod tests { add_derivation_builtins(&mut eval, io.clone()); add_fetcher_builtins(&mut eval, io.clone()); + add_import_builtins(&mut eval, io); // run the evaluation itself. eval.evaluate(str, None) -- cgit 1.4.1