diff options
Diffstat (limited to 'tvix/glue')
22 files changed, 766 insertions, 852 deletions
diff --git a/tvix/glue/Cargo.toml b/tvix/glue/Cargo.toml index 6968210c5efb..5fac947125af 100644 --- a/tvix/glue/Cargo.toml +++ b/tvix/glue/Cargo.toml @@ -4,48 +4,43 @@ version = "0.1.0" edition = "2021" [dependencies] -async-compression = { version = "0.4.9", features = ["tokio", "gzip", "bzip2", "xz"]} -bstr = "1.6.0" -bytes = "1.4.0" -data-encoding = "2.3.3" -futures = "0.3.30" -magic = "0.16.2" +async-compression = { workspace = true, features = ["tokio", "gzip", "bzip2", "xz"] } +bstr = { workspace = true } +bytes = { workspace = true } +data-encoding = { workspace = true } +futures = { workspace = true } +magic = { workspace = true } nix-compat = { path = "../nix-compat" } -pin-project = "1.1" -reqwest = { version = "0.11.22", features = ["rustls-tls-native-roots"], default-features = false } +pin-project = { workspace = true } +reqwest = { workspace = true, features = ["rustls-tls-native-roots"] } tvix-build = { path = "../build", default-features = false, features = []} tvix-eval = { path = "../eval" } tvix-castore = { path = "../castore" } tvix-store = { path = "../store", default-features = false, features = []} tvix-tracing = { path = "../tracing" } -tracing = "0.1.37" -tracing-indicatif = "0.3.6" -tokio = "1.28.0" -tokio-tar = "0.3.1" -tokio-util = { version = "0.7.9", features = ["io", "io-util", "compat"] } -thiserror = "1.0.38" -serde = "1.0.195" -serde_json = "1.0" -sha2 = "0.10.8" -sha1 = "0.10.6" -md-5 = "0.10.6" -url = "2.4.0" -walkdir = "2.4.0" - -[dependencies.wu-manber] -git = "https://github.com/tvlfyi/wu-manber.git" - -[target.'cfg(not(target_env = "msvc"))'.dependencies] -tikv-jemallocator = "0.5" +tracing = { workspace = true } +tracing-indicatif = { workspace = true } +tokio = { workspace = true } +tokio-tar = { workspace = true } +tokio-util = { workspace = true, features = ["io", "io-util", "compat"] } +thiserror = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +sha2 = { workspace = true } +sha1 = { workspace = true } +md-5 = { workspace = true } +url = { workspace = true } +walkdir = { workspace = true } +clap = { workspace = true } [dev-dependencies] -criterion = { version = "0.5", features = ["html_reports"] } -hex-literal = "0.4.1" -lazy_static = "1.4.0" -nix = { version = "0.27.1", features = [ "fs" ] } -pretty_assertions = "1.4.0" -rstest = "0.19.0" -tempfile = "3.8.1" +criterion = { workspace = true, features = ["html_reports"] } +hex-literal = { workspace = true } +mimalloc = { workspace = true } +nix = { workspace = true, features = ["fs"] } +pretty_assertions = { workspace = true } +rstest = { workspace = true } +tempfile = { workspace = true } [features] default = ["nix_tests"] diff --git a/tvix/glue/benches/eval.rs b/tvix/glue/benches/eval.rs index 9e0154cad787..ce14bdcc13ec 100644 --- a/tvix/glue/benches/eval.rs +++ b/tvix/glue/benches/eval.rs @@ -1,8 +1,8 @@ +use clap::Parser; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use lazy_static::lazy_static; +use mimalloc::MiMalloc; +use std::sync::LazyLock; use std::{env, rc::Rc, sync::Arc, time::Duration}; -#[cfg(not(target_env = "msvc"))] -use tikv_jemallocator::Jemalloc; use tvix_build::buildservice::DummyBuildService; use tvix_eval::{builtins::impure_builtins, EvalIO}; use tvix_glue::{ @@ -11,15 +11,13 @@ use tvix_glue::{ tvix_io::TvixIO, tvix_store_io::TvixStoreIO, }; -use tvix_store::utils::construct_services; +use tvix_store::utils::{construct_services, ServiceUrlsMemory}; -#[cfg(not(target_env = "msvc"))] #[global_allocator] -static GLOBAL: Jemalloc = Jemalloc; +static GLOBAL: MiMalloc = MiMalloc; -lazy_static! { - static ref TOKIO_RUNTIME: tokio::runtime::Runtime = tokio::runtime::Runtime::new().unwrap(); -} +static TOKIO_RUNTIME: LazyLock<tokio::runtime::Runtime> = + LazyLock::new(|| tokio::runtime::Runtime::new().unwrap()); fn interpret(code: &str) { // TODO: this is a bit annoying. @@ -27,38 +25,41 @@ fn interpret(code: &str) { // piece of code. b/262 let (blob_service, directory_service, path_info_service, nar_calculation_service) = TOKIO_RUNTIME - .block_on(async { construct_services("memory://", "memory://", "memory://").await }) + .block_on(async { + construct_services(ServiceUrlsMemory::parse_from(std::iter::empty::<&str>())).await + }) .unwrap(); // We assemble a complete store in memory. let tvix_store_io = Rc::new(TvixStoreIO::new( blob_service, directory_service, - path_info_service.into(), + path_info_service, nar_calculation_service.into(), Arc::<DummyBuildService>::default(), TOKIO_RUNTIME.handle().clone(), )); - let mut eval = tvix_eval::Evaluation::new( - Box::new(TvixIO::new(tvix_store_io.clone() as Rc<dyn EvalIO>)) as Box<dyn EvalIO>, - true, - ); + let mut eval_builder = tvix_eval::Evaluation::builder(Box::new(TvixIO::new( + tvix_store_io.clone() as Rc<dyn EvalIO>, + )) as Box<dyn EvalIO>) + .enable_import() + .add_builtins(impure_builtins()); - eval.builtins.extend(impure_builtins()); - add_derivation_builtins(&mut eval, Rc::clone(&tvix_store_io)); - add_fetcher_builtins(&mut eval, Rc::clone(&tvix_store_io)); - add_import_builtins(&mut eval, tvix_store_io); - configure_nix_path( - &mut eval, + eval_builder = add_derivation_builtins(eval_builder, Rc::clone(&tvix_store_io)); + eval_builder = add_fetcher_builtins(eval_builder, Rc::clone(&tvix_store_io)); + eval_builder = add_import_builtins(eval_builder, tvix_store_io); + eval_builder = configure_nix_path( + eval_builder, // The benchmark requires TVIX_BENCH_NIX_PATH to be set, so barf out // early, rather than benchmarking tvix returning an error. &Some(env::var("TVIX_BENCH_NIX_PATH").expect("TVIX_BENCH_NIX_PATH must be set")), ); + let eval = eval_builder.build(); let result = eval.evaluate(code, None); - assert!(result.errors.is_empty()); + assert!(result.errors.is_empty(), "{:#?}", result.errors); } fn eval_nixpkgs(c: &mut Criterion) { @@ -67,6 +68,12 @@ fn eval_nixpkgs(c: &mut Criterion) { interpret(black_box("(import <nixpkgs> {}).hello.outPath")); }) }); + + c.bench_function("firefox outpath", |b| { + b.iter(|| { + interpret(black_box("(import <nixpkgs> {}).firefox.outPath")); + }) + }); } criterion_group!( diff --git a/tvix/glue/default.nix b/tvix/glue/default.nix index 14c7e214f25b..0ead94a504c3 100644 --- a/tvix/glue/default.nix +++ b/tvix/glue/default.nix @@ -3,15 +3,15 @@ (depot.tvix.crates.workspaceMembers.tvix-glue.build.override { runTests = true; testPreRun = '' - export SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt; + export SSL_CERT_FILE=/dev/null ''; }).overrideAttrs (old: rec { meta.ci.targets = lib.filter (x: lib.hasPrefix "with-features" x || x == "no-features") (lib.attrNames passthru); - passthru = depot.tvix.utils.mkFeaturePowerset { + passthru = old.passthru // (depot.tvix.utils.mkFeaturePowerset { inherit (old) crateName; features = [ "nix_tests" ]; override.testPreRun = '' - export SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt; + export SSL_CERT_FILE=/dev/null ''; - }; + }); }) diff --git a/tvix/glue/src/builtins/derivation.rs b/tvix/glue/src/builtins/derivation.rs index 473aa9d5e315..3048fd8390c2 100644 --- a/tvix/glue/src/builtins/derivation.rs +++ b/tvix/glue/src/builtins/derivation.rs @@ -179,12 +179,10 @@ pub(crate) mod derivation_builtins { use nix_compat::nixhash::CAHash; use nix_compat::store_path::{build_ca_path, hash_placeholder}; use sha2::Sha256; - use tvix_castore::proto as castorepb; - use tvix_castore::proto::node::Node; - use tvix_castore::proto::FileNode; + use tvix_castore::Node; use tvix_eval::generators::Gen; use tvix_eval::{NixContext, NixContextElement, NixString}; - use tvix_store::proto::{NarInfo, PathInfo}; + use tvix_store::pathinfoservice::PathInfo; #[builtin("placeholder")] async fn builtin_placeholder(co: GenCo, input: Value) -> Result<Value, ErrorKind> { @@ -302,7 +300,7 @@ pub(crate) mod derivation_builtins { // Remove the original default `out` output. drv.outputs.clear(); - let mut output_names = vec![]; + let mut output_names = Vec::with_capacity(outputs.len()); for output in outputs { let output_name = generators::request_force(&co, output) @@ -347,9 +345,9 @@ pub(crate) mod derivation_builtins { input_context.mimic(&val_str); if arg_name == "builder" { - drv.builder = val_str.to_str()?.to_owned(); + val_str.to_str()?.clone_into(&mut drv.builder); } else { - drv.system = val_str.to_str()?.to_owned(); + val_str.to_str()?.clone_into(&mut drv.system); } // Either populate drv.environment or structured_attrs. @@ -381,11 +379,7 @@ pub(crate) mod derivation_builtins { return Ok(val); } - let (val_json, context) = match val.into_contextful_json(&co).await? { - Ok(v) => v, - Err(cek) => return Ok(Value::from(cek)), - }; - + let (val_json, context) = val.into_contextful_json(&co).await?; input_context.extend(context.into_iter()); // No need to check for dups, we only iterate over every attribute name once @@ -570,21 +564,11 @@ pub(crate) mod derivation_builtins { let blob_digest = blob_writer.close().await?; let ca_hash = CAHash::Text(Sha256::digest(&content).into()); - let store_path = - build_ca_path(name.to_str()?, &ca_hash, content.iter_ctx_plain(), false) - .map_err(|_e| { - nix_compat::derivation::DerivationError::InvalidOutputName( - name.to_str_lossy().into_owned(), - ) - }) - .map_err(DerivationError::InvalidDerivation)?; - - let root_node = Node::File(FileNode { - name: store_path.to_string().into(), - digest: blob_digest.into(), + let root_node = Node::File { + digest: blob_digest, size: blob_size, executable: false, - }); + }; // calculate the nar hash let (nar_size, nar_sha256) = state @@ -593,40 +577,38 @@ pub(crate) mod derivation_builtins { .await .map_err(|e| ErrorKind::TvixError(Rc::new(e)))?; - // assemble references from plain context. - let reference_paths: Vec<StorePathRef> = content - .iter_ctx_plain() - .map(|elem| StorePathRef::from_absolute_path(elem.as_bytes())) - .collect::<Result<_, _>>() - .map_err(|e| ErrorKind::TvixError(Rc::new(e)))?; - // persist via pathinfo service. state .path_info_service .put(PathInfo { - node: Some(castorepb::Node { - node: Some(root_node), - }), - references: reference_paths - .iter() - .map(|x| bytes::Bytes::copy_from_slice(x.digest())) - .collect(), - narinfo: Some(NarInfo { - nar_size, - nar_sha256: nar_sha256.to_vec().into(), - signatures: vec![], - reference_names: reference_paths - .into_iter() - .map(|x| x.to_string()) - .collect(), - deriver: None, - ca: Some(ca_hash.into()), - }), + store_path: build_ca_path( + name.to_str()?, + &ca_hash, + content.iter_ctx_plain(), + false, + ) + .map_err(|_e| { + nix_compat::derivation::DerivationError::InvalidOutputName( + name.to_str_lossy().into_owned(), + ) + }) + .map_err(DerivationError::InvalidDerivation)?, + node: root_node, + // assemble references from plain context. + references: content + .iter_ctx_plain() + .map(|elem| StorePath::from_absolute_path(elem.as_bytes())) + .collect::<Result<_, _>>() + .map_err(|e| ErrorKind::TvixError(Rc::new(e)))?, + nar_size, + nar_sha256, + signatures: vec![], + deriver: None, + ca: Some(ca_hash), }) .await - .map_err(|e| ErrorKind::TvixError(Rc::new(e)))?; - - Ok::<_, ErrorKind>(store_path) + .map_err(|e| ErrorKind::TvixError(Rc::new(e))) + .map(|path_info| path_info.store_path) })?; let abs_path = store_path.to_absolute_path(); diff --git a/tvix/glue/src/builtins/errors.rs b/tvix/glue/src/builtins/errors.rs index af8a24e6abb8..ec85942bb1ee 100644 --- a/tvix/glue/src/builtins/errors.rs +++ b/tvix/glue/src/builtins/errors.rs @@ -64,13 +64,13 @@ pub enum FetcherError { #[derive(Debug, Error)] pub enum ImportError { #[error("non-file '{0}' cannot be imported in 'flat' mode")] - FlatImportOfNonFile(String), + FlatImportOfNonFile(PathBuf), #[error("hash mismatch at ingestion of '{0}', expected: '{1}', got: '{2}'")] - HashMismatch(String, NixHash, NixHash), + HashMismatch(PathBuf, NixHash, NixHash), - #[error("path '{}' is not in the Nix store", .0.display())] - PathNotInStore(PathBuf), + #[error("path '{}' is not absolute or invalid", .0.display())] + PathNotAbsoluteOrInvalid(PathBuf), } impl From<ImportError> for tvix_eval::ErrorKind { diff --git a/tvix/glue/src/builtins/fetchers.rs b/tvix/glue/src/builtins/fetchers.rs index 1ad43b383353..2d9b30586703 100644 --- a/tvix/glue/src/builtins/fetchers.rs +++ b/tvix/glue/src/builtins/fetchers.rs @@ -73,8 +73,7 @@ async fn extract_fetch_args( let sha256 = match sha256_str { Some(sha256_str) => { let nixhash = nixhash::from_str(&sha256_str, Some("sha256")) - // TODO: DerivationError::InvalidOutputHash should be moved to ErrorKind::InvalidHash and used here instead - .map_err(|e| ErrorKind::TvixError(Rc::new(e)))?; + .map_err(|e| ErrorKind::InvalidHash(e.to_string()))?; Some(nixhash.digest_as_bytes().try_into().expect("is sha256")) } diff --git a/tvix/glue/src/builtins/import.rs b/tvix/glue/src/builtins/import.rs index a3273eca15dc..83b91165c09e 100644 --- a/tvix/glue/src/builtins/import.rs +++ b/tvix/glue/src/builtins/import.rs @@ -1,8 +1,9 @@ //! Implements builtins used to import paths in the store. -use crate::builtins::errors::ImportError; +use crate::tvix_store_io::TvixStoreIO; use std::path::Path; use tvix_castore::import::ingest_entries; +use tvix_castore::Node; use tvix_eval::{ builtin_macros::builtins, generators::{self, GenCo}, @@ -16,7 +17,7 @@ async fn filtered_ingest( co: GenCo, path: &Path, filter: Option<&Value>, -) -> Result<tvix_castore::proto::node::Node, ErrorKind> { +) -> Result<Node, ErrorKind> { let mut entries: Vec<walkdir::DirEntry> = vec![]; let mut it = walkdir::WalkDir::new(path) .follow_links(false) @@ -88,10 +89,11 @@ async fn filtered_ingest( let dir_entries = entries.into_iter().rev().map(Ok); state.tokio_handle.block_on(async { - let entries = tvix_castore::import::fs::dir_entries_to_ingestion_stream( + let entries = tvix_castore::import::fs::dir_entries_to_ingestion_stream::<'_, _, _, &[u8]>( &state.blob_service, dir_entries, path, + None, // TODO re-scan ); ingest_entries(&state.directory_service, entries) .await @@ -104,169 +106,134 @@ async fn filtered_ingest( #[builtins(state = "Rc<TvixStoreIO>")] mod import_builtins { - use std::os::unix::ffi::OsStrExt; - use std::rc::Rc; - use super::*; + use crate::builtins::ImportError; use crate::tvix_store_io::TvixStoreIO; + use bstr::ByteSlice; use nix_compat::nixhash::{CAHash, NixHash}; - use nix_compat::store_path::StorePath; + use nix_compat::store_path::{build_ca_path, StorePathRef}; use sha2::Digest; + use std::rc::Rc; use tokio::io::AsyncWriteExt; - use tvix_castore::proto::node::Node; - use tvix_castore::proto::FileNode; + use tvix_eval::builtins::coerce_value_to_path; use tvix_eval::generators::Gen; use tvix_eval::{generators::GenCo, ErrorKind, Value}; use tvix_eval::{FileType, NixContextElement, NixString}; + use tvix_store::path_info::PathInfo; - #[builtin("path")] - async fn builtin_path( + // This is a helper used by both builtins.path and builtins.filterSource. + async fn import_helper( state: Rc<TvixStoreIO>, co: GenCo, - args: Value, + path: std::path::PathBuf, + name: Option<&Value>, + filter: Option<&Value>, + recursive_ingestion: bool, + expected_sha256: Option<[u8; 32]>, ) -> Result<Value, ErrorKind> { - let args = args.to_attrs()?; - let path = args.select_required("path")?; - let path = generators::request_force(&co, path.clone()) - .await - .to_path()?; - let name: String = if let Some(name) = args.select("name") { - generators::request_force(&co, name.clone()) + let name: String = match name { + Some(name) => generators::request_force(&co, name.clone()) .await .to_str()? .as_bstr() - .to_string() - } else { - tvix_store::import::path_to_name(&path) + .to_string(), + None => tvix_store::import::path_to_name(&path) .expect("Failed to derive the default name out of the path") - .to_string() + .to_string(), }; - let filter = args.select("filter"); - let recursive_ingestion = args - .select("recursive") - .map(|r| r.as_bool()) - .transpose()? - .unwrap_or(true); // Yes, yes, Nix, by default, puts `recursive = true;`. - let expected_sha256 = args - .select("sha256") - .map(|h| { - h.to_str().and_then(|expected| { - let expected = expected.into_bstring().to_string(); - // TODO: ensure that we fail if this is not a valid str. - nix_compat::nixhash::from_str(&expected, None).map_err(|_err| { - // TODO: a better error would be nice, we use - // DerivationError::InvalidOutputHash usually for derivation construction. - // This is not a derivation construction, should we move it outside and - // generalize? - ErrorKind::TypeError { - expected: "sha256", - actual: "not a sha256", - } - }) - }) - }) - .transpose()?; - - // Check if the path points to a regular file. - // If it does, the filter function is never executed. - // TODO: follow symlinks and check their type instead - let (root_node, ca_hash) = match state.file_type(path.as_ref())? { + // As a first step, we ingest the contents, and get back a root node, + // and optionally the sha256 a flat file. + let (root_node, ca) = match std::fs::metadata(&path)?.file_type().into() { + // Check if the path points to a regular file. + // If it does, the filter function is never executed, and we copy to the blobservice directly. + // If recursive is false, we need to calculate the sha256 digest of the raw contents, + // as that affects the output path calculation. FileType::Regular => { - let mut file = state.open(path.as_ref())?; - // This is a single file, copy it to the blobservice directly. - let mut hash = sha2::Sha256::new(); + let mut file = state.open(&path)?; + + let mut flat_sha256 = (!recursive_ingestion).then(sha2::Sha256::new); let mut blob_size = 0; + let mut blob_writer = state .tokio_handle .block_on(async { state.blob_service.open_write().await }); - let mut buf = [0u8; 4096]; + // read piece by piece and write to blob_writer. + // This is a bit manual due to EvalIO being sync, while everything else async. + { + let mut buf = [0u8; 4096]; - loop { - // read bytes into buffer, break out if EOF - let len = file.read(&mut buf)?; - if len == 0 { - break; - } - blob_size += len as u64; + loop { + // read bytes into buffer, break out if EOF + let len = file.read(&mut buf)?; + if len == 0 { + break; + } + blob_size += len as u64; - let data = &buf[0..len]; + let data = &buf[0..len]; - // add to blobwriter - state - .tokio_handle - .block_on(async { blob_writer.write_all(data).await })?; + // add to blobwriter + state + .tokio_handle + .block_on(async { blob_writer.write_all(data).await })?; - // update the sha256 hash function. We can skip that if we're not using it. - if !recursive_ingestion { - hash.update(data); + // update blob_sha256 if needed. + if let Some(h) = flat_sha256.as_mut() { + h.update(data) + } } } - // close the blob writer, get back the b3 digest. - let blob_digest = state - .tokio_handle - .block_on(async { blob_writer.close().await })?; - - let root_node = Node::File(FileNode { - // The name gets set further down, while constructing the PathInfo. - name: "".into(), - digest: blob_digest.into(), - size: blob_size, - executable: false, - }); - - let ca_hash = if recursive_ingestion { - let (_nar_size, nar_sha256) = state - .tokio_handle - .block_on(async { - state - .nar_calculation_service - .as_ref() - .calculate_nar(&root_node) - .await - }) - .map_err(|e| tvix_eval::ErrorKind::TvixError(Rc::new(e)))?; - CAHash::Nar(NixHash::Sha256(nar_sha256)) - } else { - CAHash::Flat(NixHash::Sha256(hash.finalize().into())) - }; - - (root_node, ca_hash) + // close the blob writer, construct the root node and the blob_sha256 (later used for output path calculation) + ( + Node::File { + digest: state + .tokio_handle + .block_on(async { blob_writer.close().await })?, + size: blob_size, + executable: false, + }, + { + // If non-recursive ingestion is requested… + if let Some(flat_sha256) = flat_sha256 { + let actual_sha256 = flat_sha256.finalize().into(); + + // compare the recorded flat hash with an upfront one if provided. + if let Some(expected_sha256) = expected_sha256 { + if actual_sha256 != expected_sha256 { + return Err(ImportError::HashMismatch( + path, + NixHash::Sha256(expected_sha256), + NixHash::Sha256(actual_sha256), + ) + .into()); + } + } + + Some(CAHash::Flat(NixHash::Sha256(actual_sha256))) + } else { + None + } + }, + ) } - FileType::Directory => { - if !recursive_ingestion { - return Err(ImportError::FlatImportOfNonFile( - path.to_string_lossy().to_string(), - ))?; - } - - // do the filtered ingest - let root_node = filtered_ingest(state.clone(), co, path.as_ref(), filter).await?; - - // calculate the NAR sha256 - let (_nar_size, nar_sha256) = state - .tokio_handle - .block_on(async { - state - .nar_calculation_service - .as_ref() - .calculate_nar(&root_node) - .await - }) - .map_err(|e| tvix_eval::ErrorKind::TvixError(Rc::new(e)))?; - - let ca_hash = CAHash::Nar(NixHash::Sha256(nar_sha256)); - - (root_node, ca_hash) + FileType::Directory if !recursive_ingestion => { + return Err(ImportError::FlatImportOfNonFile(path))? } + + // do the filtered ingest + FileType::Directory => ( + filtered_ingest(state.clone(), co, path.as_ref(), filter).await?, + None, + ), FileType::Symlink => { // FUTUREWORK: Nix follows a symlink if it's at the root, // except if it's not resolve-able (NixOS/nix#7761).i return Err(tvix_eval::ErrorKind::IO { - path: Some(path.to_path_buf()), + path: Some(path), error: Rc::new(std::io::Error::new( std::io::ErrorKind::Unsupported, "builtins.path pointing to a symlink is ill-defined.", @@ -275,7 +242,7 @@ mod import_builtins { } FileType::Unknown => { return Err(tvix_eval::ErrorKind::IO { - path: Some(path.to_path_buf()), + path: Some(path), error: Rc::new(std::io::Error::new( std::io::ErrorKind::Unsupported, "unsupported file type", @@ -284,32 +251,67 @@ mod import_builtins { } }; - let (path_info, _hash, output_path) = state.tokio_handle.block_on(async { - state - .node_to_path_info(name.as_ref(), path.as_ref(), &ca_hash, root_node) - .await - })?; - - if let Some(expected_sha256) = expected_sha256 { - if *ca_hash.hash() != expected_sha256 { - Err(ImportError::HashMismatch( - path.to_string_lossy().to_string(), - expected_sha256, - ca_hash.hash().into_owned(), - ))?; + // Calculate the NAR sha256. + let (nar_size, nar_sha256) = state + .tokio_handle + .block_on(async { + state + .nar_calculation_service + .as_ref() + .calculate_nar(&root_node) + .await + }) + .map_err(|e| tvix_eval::ErrorKind::TvixError(Rc::new(e)))?; + + // Calculate the CA hash for the recursive cases, this is only already + // `Some(_)` for flat ingestion. + let ca = match ca { + None => { + // If an upfront-expected NAR hash was specified, compare. + if let Some(expected_nar_sha256) = expected_sha256 { + if expected_nar_sha256 != nar_sha256 { + return Err(ImportError::HashMismatch( + path, + NixHash::Sha256(expected_nar_sha256), + NixHash::Sha256(nar_sha256), + ) + .into()); + } + } + CAHash::Nar(NixHash::Sha256(nar_sha256)) } - } + Some(ca) => ca, + }; + + let store_path = build_ca_path(&name, &ca, Vec::<&str>::new(), false) + .map_err(|e| tvix_eval::ErrorKind::TvixError(Rc::new(e)))?; - state + let path_info = state .tokio_handle - .block_on(async { state.path_info_service.as_ref().put(path_info).await }) + .block_on(async { + state + .path_info_service + .as_ref() + .put(PathInfo { + store_path, + node: root_node, + // There's no reference scanning on path contents ingested like this. + references: vec![], + nar_size, + nar_sha256, + signatures: vec![], + deriver: None, + ca: Some(ca), + }) + .await + }) .map_err(|e| tvix_eval::ErrorKind::IO { - path: Some(path.to_path_buf()), + path: Some(path), error: Rc::new(e.into()), })?; // We need to attach context to the final output path. - let outpath = output_path.to_absolute_path(); + let outpath = path_info.store_path.to_absolute_path(); Ok( NixString::new_context_from(NixContextElement::Plain(outpath.clone()).into(), outpath) @@ -317,45 +319,72 @@ mod import_builtins { ) } - #[builtin("filterSource")] - async fn builtin_filter_source( + #[builtin("path")] + async fn builtin_path( state: Rc<TvixStoreIO>, co: GenCo, - #[lazy] filter: Value, - path: Value, + args: Value, ) -> Result<Value, ErrorKind> { - let p = path.to_path()?; - let root_node = filtered_ingest(Rc::clone(&state), co, &p, Some(&filter)).await?; - let name = tvix_store::import::path_to_name(&p)?; + let args = args.to_attrs()?; - let outpath = state - .tokio_handle - .block_on(async { - let (_, nar_sha256) = state - .nar_calculation_service - .as_ref() - .calculate_nar(&root_node) - .await?; + let path = match coerce_value_to_path( + &co, + generators::request_force(&co, args.select_required("path")?.clone()).await, + ) + .await? + { + Ok(path) => path, + Err(cek) => return Ok(cek.into()), + }; - state - .register_node_in_path_info_service( - name, - &p, - &CAHash::Nar(NixHash::Sha256(nar_sha256)), - root_node, - ) - .await + let filter = args.select("filter"); + + // Construct a sha256 hasher, which is needed for flat ingestion. + let recursive_ingestion = args + .select("recursive") + .map(|r| r.as_bool()) + .transpose()? + .unwrap_or(true); // Yes, yes, Nix, by default, puts `recursive = true;`. + + let expected_sha256 = args + .select("sha256") + .map(|h| { + h.to_str().and_then(|expected| { + match nix_compat::nixhash::from_str(expected.to_str()?, Some("sha256")) { + Ok(NixHash::Sha256(digest)) => Ok(digest), + Ok(_) => unreachable!(), + Err(e) => Err(ErrorKind::InvalidHash(e.to_string())), + } + }) }) - .map_err(|err| ErrorKind::IO { - path: Some(p.to_path_buf()), - error: err.into(), - })? - .to_absolute_path(); + .transpose()?; - Ok( - NixString::new_context_from(NixContextElement::Plain(outpath.clone()).into(), outpath) - .into(), + import_helper( + state, + co, + path, + args.select("name"), + filter, + recursive_ingestion, + expected_sha256, ) + .await + } + + #[builtin("filterSource")] + async fn builtin_filter_source( + state: Rc<TvixStoreIO>, + co: GenCo, + #[lazy] filter: Value, + path: Value, + ) -> Result<Value, ErrorKind> { + let path = + match coerce_value_to_path(&co, generators::request_force(&co, path).await).await? { + Ok(path) => path, + Err(cek) => return Ok(cek.into()), + }; + + import_helper(state, co, path, None, Some(&filter), true, None).await } #[builtin("storePath")] @@ -364,39 +393,33 @@ mod import_builtins { co: GenCo, path: Value, ) -> Result<Value, ErrorKind> { - let p = std::str::from_utf8(match &path { - Value::String(s) => s.as_bytes(), - Value::Path(p) => p.as_os_str().as_bytes(), + let p = match &path { + Value::String(s) => Path::new(s.as_bytes().to_os_str()?), + Value::Path(p) => p.as_path(), _ => { return Err(ErrorKind::TypeError { expected: "string or path", actual: path.type_of(), }) } - })?; - - let path_exists = if let Ok((store_path, sub_path)) = StorePath::from_absolute_path_full(p) - { - if !sub_path.as_os_str().is_empty() { - false - } else { - state.store_path_exists(store_path.as_ref()).await? - } - } else { - false }; - if !path_exists { - return Err(ImportError::PathNotInStore(p.into()).into()); - } + // For this builtin, the path needs to start with an absolute store path. + let (store_path, _sub_path) = StorePathRef::from_absolute_path_full(p) + .map_err(|_e| ImportError::PathNotAbsoluteOrInvalid(p.to_path_buf()))?; - Ok(Value::String(NixString::new_context_from( - [NixContextElement::Plain(p.into())].into(), - p, - ))) + if state.path_exists(p)? { + Ok(Value::String(NixString::new_context_from( + [NixContextElement::Plain(store_path.to_absolute_path())].into(), + p.as_os_str().as_encoded_bytes(), + ))) + } else { + Err(ErrorKind::IO { + path: Some(p.to_path_buf()), + error: Rc::new(std::io::ErrorKind::NotFound.into()), + }) + } } } pub use import_builtins::builtins as import_builtins; - -use crate::tvix_store_io::TvixStoreIO; diff --git a/tvix/glue/src/builtins/mod.rs b/tvix/glue/src/builtins/mod.rs index 3d6263286dc4..6149423acff0 100644 --- a/tvix/glue/src/builtins/mod.rs +++ b/tvix/glue/src/builtins/mod.rs @@ -18,13 +18,14 @@ pub use errors::{DerivationError, FetcherError, ImportError}; /// /// As they need to interact with `known_paths`, we also need to pass in /// `known_paths`. -pub fn add_derivation_builtins<IO>(eval: &mut tvix_eval::Evaluation<IO>, io: Rc<TvixStoreIO>) { - eval.builtins - .extend(derivation::derivation_builtins::builtins(Rc::clone(&io))); - - // Add the actual `builtins.derivation` from compiled Nix code - eval.src_builtins - .push(("derivation", include_str!("derivation.nix"))); +pub fn add_derivation_builtins<'co, 'ro, 'env, IO>( + eval_builder: tvix_eval::EvaluationBuilder<'co, 'ro, 'env, IO>, + io: Rc<TvixStoreIO>, +) -> tvix_eval::EvaluationBuilder<'co, 'ro, 'env, IO> { + eval_builder + .add_builtins(derivation::derivation_builtins::builtins(Rc::clone(&io))) + // Add the actual `builtins.derivation` from compiled Nix code + .add_src_builtin("derivation", include_str!("derivation.nix")) } /// Adds fetcher builtins to the passed [tvix_eval::Evaluation]: @@ -32,9 +33,11 @@ pub fn add_derivation_builtins<IO>(eval: &mut tvix_eval::Evaluation<IO>, io: Rc< /// * `fetchurl` /// * `fetchTarball` /// * `fetchGit` -pub fn add_fetcher_builtins<IO>(eval: &mut tvix_eval::Evaluation<IO>, io: Rc<TvixStoreIO>) { - eval.builtins - .extend(fetchers::fetcher_builtins::builtins(Rc::clone(&io))); +pub fn add_fetcher_builtins<'co, 'ro, 'env, IO>( + eval_builder: tvix_eval::EvaluationBuilder<'co, 'ro, 'env, IO>, + io: Rc<TvixStoreIO>, +) -> tvix_eval::EvaluationBuilder<'co, 'ro, 'env, IO> { + eval_builder.add_builtins(fetchers::fetcher_builtins::builtins(Rc::clone(&io))) } /// Adds import-related builtins to the passed [tvix_eval::Evaluation]. @@ -42,10 +45,12 @@ pub fn add_fetcher_builtins<IO>(eval: &mut tvix_eval::Evaluation<IO>, io: Rc<Tvi /// These are `filterSource` and `path` /// /// As they need to interact with the store implementation, we pass [`TvixStoreIO`]. -pub fn add_import_builtins<IO>(eval: &mut tvix_eval::Evaluation<IO>, io: Rc<TvixStoreIO>) { - eval.builtins.extend(import::import_builtins(io)); - +pub fn add_import_builtins<'co, 'ro, 'env, IO>( + eval_builder: tvix_eval::EvaluationBuilder<'co, 'ro, 'env, IO>, + io: Rc<TvixStoreIO>, +) -> tvix_eval::EvaluationBuilder<'co, 'ro, 'env, IO> { // TODO(raitobezarius): evaluate expressing filterSource as Nix code using path (b/372) + eval_builder.add_builtins(import::import_builtins(io)) } #[cfg(test)] @@ -55,12 +60,13 @@ mod tests { use crate::tvix_store_io::TvixStoreIO; use super::{add_derivation_builtins, add_fetcher_builtins, add_import_builtins}; + use clap::Parser; use nix_compat::store_path::hash_placeholder; use rstest::rstest; use tempfile::TempDir; use tvix_build::buildservice::DummyBuildService; use tvix_eval::{EvalIO, EvaluationResult}; - use tvix_store::utils::construct_services; + use tvix_store::utils::{construct_services, ServiceUrlsMemory}; /// evaluates a given nix expression and returns the result. /// Takes care of setting up the evaluator so it knows about the @@ -69,23 +75,25 @@ mod tests { // We assemble a complete store in memory. let runtime = tokio::runtime::Runtime::new().expect("Failed to build a Tokio runtime"); let (blob_service, directory_service, path_info_service, nar_calculation_service) = runtime - .block_on(async { construct_services("memory://", "memory://", "memory://").await }) + .block_on(async { + construct_services(ServiceUrlsMemory::parse_from(std::iter::empty::<&str>())).await + }) .expect("Failed to construct store services in memory"); let io = Rc::new(TvixStoreIO::new( blob_service, directory_service, - path_info_service.into(), + path_info_service, nar_calculation_service.into(), Arc::<DummyBuildService>::default(), runtime.handle().clone(), )); - let mut eval = tvix_eval::Evaluation::new(io.clone() as Rc<dyn EvalIO>, false); - - add_derivation_builtins(&mut eval, Rc::clone(&io)); - add_fetcher_builtins(&mut eval, Rc::clone(&io)); - add_import_builtins(&mut eval, io); + let mut eval_builder = tvix_eval::Evaluation::builder(io.clone() as Rc<dyn EvalIO>); + eval_builder = add_derivation_builtins(eval_builder, Rc::clone(&io)); + eval_builder = add_fetcher_builtins(eval_builder, Rc::clone(&io)); + eval_builder = add_import_builtins(eval_builder, io); + let eval = eval_builder.build(); // run the evaluation itself. eval.evaluate(str, None) @@ -529,14 +537,13 @@ mod tests { assert!(eval_result.errors.is_empty(), "errors should be empty"); } - // Space is an illegal character. + /// Space is an illegal character, but if we specify a name without spaces, it's ok. #[rstest] - #[case( + #[case::rename_success( r#"(builtins.path { name = "valid-name"; path = @fixtures + "/te st"; recursive = true; })"#, true )] - // Space is still an illegal character. - #[case( + #[case::rename_with_spaces_fail( r#"(builtins.path { name = "invalid name"; path = @fixtures + "/te st"; recursive = true; })"#, false )] @@ -575,14 +582,13 @@ mod tests { } } - // Space is an illegal character. + /// Space is an illegal character, but if we specify a name without spaces, it's ok. #[rstest] - #[case( + #[case::rename_success( r#"(builtins.path { name = "valid-name"; path = @fixtures + "/te st"; recursive = false; })"#, true )] - // Space is still an illegal character. - #[case( + #[case::rename_with_spaces_fail( r#"(builtins.path { name = "invalid name"; path = @fixtures + "/te st"; recursive = false; })"#, false )] @@ -623,20 +629,20 @@ mod tests { } #[rstest] - #[case( + #[case::flat_success( r#"(builtins.path { name = "valid-name"; path = @fixtures + "/te st"; recursive = false; sha256 = "sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="; })"#, true )] - #[case( - r#"(builtins.path { name = "valid-name"; path = @fixtures + "/te st"; recursive = true; sha256 = "sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="; })"#, + #[case::flat_fail( + r#"(builtins.path { name = "valid-name"; path = @fixtures + "/te st"; recursive = false; sha256 = "sha256-d6xi4mKdjkX2JFicDIv5niSzpyI0m/Hnm8GGAIU04kY="; })"#, false )] - #[case( + #[case::recursive_success( r#"(builtins.path { name = "valid-name"; path = @fixtures + "/te st"; recursive = true; sha256 = "sha256-d6xi4mKdjkX2JFicDIv5niSzpyI0m/Hnm8GGAIU04kY="; })"#, true )] - #[case( - r#"(builtins.path { name = "valid-name"; path = @fixtures + "/te st"; recursive = false; sha256 = "sha256-d6xi4mKdjkX2JFicDIv5niSzpyI0m/Hnm8GGAIU04kY="; })"#, + #[case::recursive_fail( + r#"(builtins.path { name = "valid-name"; path = @fixtures + "/te st"; recursive = true; sha256 = "sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="; })"#, false )] fn builtins_path_fod_locking(#[case] code: &str, #[case] exp_success: bool) { diff --git a/tvix/glue/src/fetchers/mod.rs b/tvix/glue/src/fetchers/mod.rs index eb035a5a905c..c12598e96328 100644 --- a/tvix/glue/src/fetchers/mod.rs +++ b/tvix/glue/src/fetchers/mod.rs @@ -10,12 +10,11 @@ use tokio::io::{AsyncBufRead, AsyncRead, AsyncWrite, AsyncWriteExt, BufReader}; use tokio_util::io::{InspectReader, InspectWriter}; use tracing::{instrument, warn, Span}; use tracing_indicatif::span_ext::IndicatifSpanExt; -use tvix_castore::{ - blobservice::BlobService, - directoryservice::DirectoryService, - proto::{node::Node, FileNode}, +use tvix_castore::{blobservice::BlobService, directoryservice::DirectoryService, Node}; +use tvix_store::{ + nar::NarCalculationService, + pathinfoservice::{PathInfo, PathInfoService}, }; -use tvix_store::{nar::NarCalculationService, pathinfoservice::PathInfoService, proto::PathInfo}; use url::Url; use crate::builtins::FetcherError; @@ -331,12 +330,11 @@ where // Construct and return the FileNode describing the downloaded contents. Ok(( - Node::File(FileNode { - name: vec![].into(), - digest: blob_writer.close().await?.into(), + Node::File { + digest: blob_writer.close().await?, size: blob_size, executable: false, - }), + }, CAHash::Flat(actual_hash), blob_size, )) @@ -531,12 +529,11 @@ where // Construct and return the FileNode describing the downloaded contents, // make it executable. - let root_node = Node::File(FileNode { - name: vec![].into(), - digest: blob_digest.into(), + let root_node = Node::File { + digest: blob_digest, size: file_size, executable: true, - }); + }; Ok((root_node, CAHash::Nar(actual_hash), file_size)) } @@ -560,9 +557,6 @@ where // Calculate the store path to return, by calculating from ca_hash. let store_path = build_ca_path(name, &ca_hash, Vec::<String>::new(), false)?; - // Rename the node name to match the Store Path. - let node = node.rename(store_path.to_string().into()); - // If the resulting hash is not a CAHash::Nar, we also need to invoke // `calculate_nar` to calculate this representation, as it's required in // the [PathInfo]. @@ -580,25 +574,22 @@ where // Construct the PathInfo and persist it. let path_info = PathInfo { - node: Some(tvix_castore::proto::Node { node: Some(node) }), + store_path: store_path.to_owned(), + node: node.clone(), references: vec![], - narinfo: Some(tvix_store::proto::NarInfo { - nar_size, - nar_sha256: nar_sha256.to_vec().into(), - signatures: vec![], - reference_names: vec![], - deriver: None, - ca: Some(ca_hash.into()), - }), + nar_size, + nar_sha256, + signatures: vec![], + deriver: None, + ca: Some(ca_hash), }; - let path_info = self - .path_info_service + self.path_info_service .put(path_info) .await .map_err(|e| FetcherError::Io(e.into()))?; - Ok((store_path, path_info.node.unwrap().node.unwrap())) + Ok((store_path, node)) } } diff --git a/tvix/glue/src/known_paths.rs b/tvix/glue/src/known_paths.rs index edc57c38f247..7934bfe0baa2 100644 --- a/tvix/glue/src/known_paths.rs +++ b/tvix/glue/src/known_paths.rs @@ -25,27 +25,27 @@ pub struct KnownPaths { /// /// Keys are derivation paths, values are a tuple of the "hash derivation /// modulo" and the Derivation struct itself. - derivations: HashMap<StorePath, ([u8; 32], Derivation)>, + derivations: HashMap<StorePath<String>, ([u8; 32], Derivation)>, /// A map from output path to (one) drv path. /// Note that in the case of FODs, multiple drvs can produce the same output /// path. We use one of them. - outputs_to_drvpath: HashMap<StorePath, StorePath>, + outputs_to_drvpath: HashMap<StorePath<String>, StorePath<String>>, /// A map from output path to fetches (and their names). - outputs_to_fetches: HashMap<StorePath, (String, Fetch)>, + outputs_to_fetches: HashMap<StorePath<String>, (String, Fetch)>, } impl KnownPaths { /// Fetch the opaque "hash derivation modulo" for a given derivation path. - pub fn get_hash_derivation_modulo(&self, drv_path: &StorePath) -> Option<&[u8; 32]> { + pub fn get_hash_derivation_modulo(&self, drv_path: &StorePath<String>) -> Option<&[u8; 32]> { self.derivations .get(drv_path) .map(|(hash_derivation_modulo, _derivation)| hash_derivation_modulo) } /// Return a reference to the Derivation for a given drv path. - pub fn get_drv_by_drvpath(&self, drv_path: &StorePath) -> Option<&Derivation> { + pub fn get_drv_by_drvpath(&self, drv_path: &StorePath<String>) -> Option<&Derivation> { self.derivations .get(drv_path) .map(|(_hash_derivation_modulo, derivation)| derivation) @@ -54,7 +54,10 @@ impl KnownPaths { /// Return the drv path of the derivation producing the passed output path. /// Note there can be multiple Derivations producing the same output path in /// flight; this function will only return one of them. - pub fn get_drv_path_for_output_path(&self, output_path: &StorePath) -> Option<&StorePath> { + pub fn get_drv_path_for_output_path( + &self, + output_path: &StorePath<String>, + ) -> Option<&StorePath<String>> { self.outputs_to_drvpath.get(output_path) } @@ -63,7 +66,7 @@ impl KnownPaths { /// be fully calculated. /// All input derivations this refers to must also be inserted to this /// struct. - pub fn add_derivation(&mut self, drv_path: StorePath, drv: Derivation) { + pub fn add_derivation(&mut self, drv_path: StorePath<String>, drv: Derivation) { // check input derivations to have been inserted. #[cfg(debug_assertions)] { @@ -124,57 +127,86 @@ impl KnownPaths { /// Return the name and fetch producing the passed output path. /// Note there can also be (multiple) Derivations producing the same output path. - pub fn get_fetch_for_output_path(&self, output_path: &StorePath) -> Option<(String, Fetch)> { + pub fn get_fetch_for_output_path( + &self, + output_path: &StorePath<String>, + ) -> Option<(String, Fetch)> { self.outputs_to_fetches .get(output_path) .map(|(name, fetch)| (name.to_owned(), fetch.to_owned())) } + + /// Returns an iterator over all known derivations and their store path. + pub fn get_derivations(&self) -> impl Iterator<Item = (&StorePath<String>, &Derivation)> { + self.derivations.iter().map(|(k, v)| (k, &v.1)) + } } #[cfg(test)] mod tests { + use std::sync::LazyLock; + + use hex_literal::hex; use nix_compat::{derivation::Derivation, nixbase32, nixhash, store_path::StorePath}; use url::Url; - use crate::fetchers::Fetch; - use super::KnownPaths; - use hex_literal::hex; - use lazy_static::lazy_static; + use crate::fetchers::Fetch; - lazy_static! { - static ref BAR_DRV: Derivation = Derivation::from_aterm_bytes(include_bytes!( + static BAR_DRV: LazyLock<Derivation> = LazyLock::new(|| { + Derivation::from_aterm_bytes(include_bytes!( "tests/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv" )) - .expect("must parse"); - static ref FOO_DRV: Derivation = Derivation::from_aterm_bytes(include_bytes!( + .expect("must parse") + }); + + static FOO_DRV: LazyLock<Derivation> = LazyLock::new(|| { + Derivation::from_aterm_bytes(include_bytes!( "tests/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv" )) - .expect("must parse"); - static ref BAR_DRV_PATH: StorePath = - StorePath::from_bytes(b"ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv").expect("must parse"); - static ref FOO_DRV_PATH: StorePath = - StorePath::from_bytes(b"ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv").expect("must parse"); - static ref BAR_OUT_PATH: StorePath = - StorePath::from_bytes(b"mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar").expect("must parse"); - static ref FOO_OUT_PATH: StorePath = - StorePath::from_bytes(b"fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo").expect("must parse"); - - static ref FETCH_URL : Fetch = Fetch::URL{ - url: Url::parse("https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch").unwrap(), - exp_hash: Some(nixhash::from_sri_str("sha256-Xa1Jbl2Eq5+L0ww+Ph1osA3Z/Dxe/RkN1/dITQCdXFk=").unwrap()) - }; - static ref FETCH_URL_OUT_PATH: StorePath = StorePath::from_bytes(b"06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch").unwrap(); - - static ref FETCH_TARBALL : Fetch = Fetch::Tarball{ - url: Url::parse("https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz").unwrap(), - exp_nar_sha256: Some(nixbase32::decode_fixed("1hf6cgaci1n186kkkjq106ryf8mmlq9vnwgfwh625wa8hfgdn4dm").unwrap()) - }; - static ref FETCH_TARBALL_OUT_PATH: StorePath = StorePath::from_bytes(b"7adgvk5zdfq4pwrhsm3n9lzypb12gw0g-source").unwrap(); + .expect("must parse") + }); + + static BAR_DRV_PATH: LazyLock<StorePath<String>> = LazyLock::new(|| { + StorePath::from_bytes(b"ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv").expect("must parse") + }); + + static FOO_DRV_PATH: LazyLock<StorePath<String>> = LazyLock::new(|| { + StorePath::from_bytes(b"ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv").expect("must parse") + }); + + static BAR_OUT_PATH: LazyLock<StorePath<String>> = LazyLock::new(|| { + StorePath::from_bytes(b"mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar").expect("must parse") + }); + + static FOO_OUT_PATH: LazyLock<StorePath<String>> = LazyLock::new(|| { + StorePath::from_bytes(b"fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo").expect("must parse") + }); + + static FETCH_URL: LazyLock<Fetch> = LazyLock::new(|| { + Fetch::URL { + url: Url::parse("https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch").unwrap(), + exp_hash: Some(nixhash::from_sri_str("sha256-Xa1Jbl2Eq5+L0ww+Ph1osA3Z/Dxe/RkN1/dITQCdXFk=").unwrap()) } + }); + + static FETCH_URL_OUT_PATH: LazyLock<StorePath<String>> = LazyLock::new(|| { + StorePath::from_bytes(b"06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch").unwrap() + }); - /// ensure we don't allow acdding a Derivation that depends on another, - /// not-yet-added Derivation. + static FETCH_TARBALL: LazyLock<Fetch> = LazyLock::new(|| { + Fetch::Tarball { + url: Url::parse("https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz").unwrap(), + exp_nar_sha256: Some(nixbase32::decode_fixed("1hf6cgaci1n186kkkjq106ryf8mmlq9vnwgfwh625wa8hfgdn4dm").unwrap()) + } + }); + + static FETCH_TARBALL_OUT_PATH: LazyLock<StorePath<String>> = LazyLock::new(|| { + StorePath::from_bytes(b"7adgvk5zdfq4pwrhsm3n9lzypb12gw0g-source").unwrap() + }); + + /// Ensure that we don't allow adding a derivation that depends on another, + /// not-yet-added derivation. #[test] #[should_panic] fn drv_reject_if_missing_input_drv() { @@ -269,5 +301,21 @@ mod tests { ); } + #[test] + fn get_derivations_working() { + let mut known_paths = KnownPaths::default(); + + // Add BAR_DRV + known_paths.add_derivation(BAR_DRV_PATH.clone(), BAR_DRV.clone()); + + // We should be able to find BAR_DRV_PATH and BAR_DRV as a pair in get_derivations. + assert_eq!( + Some((&BAR_DRV_PATH.clone(), &BAR_DRV.clone())), + known_paths + .get_derivations() + .find(|(s, d)| (*s, *d) == (&BAR_DRV_PATH, &BAR_DRV)) + ); + } + // TODO: add test panicking about missing digest } diff --git a/tvix/glue/src/lib.rs b/tvix/glue/src/lib.rs index a5dbdb8742fc..320d1f6fede2 100644 --- a/tvix/glue/src/lib.rs +++ b/tvix/glue/src/lib.rs @@ -1,7 +1,6 @@ pub mod builtins; pub mod fetchers; pub mod known_paths; -pub mod refscan; pub mod tvix_build; pub mod tvix_io; pub mod tvix_store_io; @@ -14,12 +13,14 @@ mod tests; /// Tell the Evaluator to resolve `<nix>` to the path `/__corepkgs__`, /// which has special handling in [tvix_io::TvixIO]. /// This is used in nixpkgs to import `fetchurl.nix` from `<nix>`. -pub fn configure_nix_path<IO>( - eval: &mut tvix_eval::Evaluation<IO>, +pub fn configure_nix_path<'co, 'ro, 'env, IO>( + eval_builder: tvix_eval::EvaluationBuilder<'co, 'ro, 'env, IO>, nix_search_path: &Option<String>, -) { - eval.nix_path = nix_search_path - .as_ref() - .map(|p| format!("nix=/__corepkgs__:{}", p)) - .or_else(|| Some("nix=/__corepkgs__".to_string())); +) -> tvix_eval::EvaluationBuilder<'co, 'ro, 'env, IO> { + eval_builder.nix_path( + nix_search_path + .as_ref() + .map(|p| format!("nix=/__corepkgs__:{}", p)) + .or_else(|| Some("nix=/__corepkgs__".to_string())), + ) } diff --git a/tvix/glue/src/refscan.rs b/tvix/glue/src/refscan.rs deleted file mode 100644 index 0e0bb6c77828..000000000000 --- a/tvix/glue/src/refscan.rs +++ /dev/null @@ -1,115 +0,0 @@ -//! Simple scanner for non-overlapping, known references of Nix store paths in a -//! given string. -//! -//! This is used for determining build references (see -//! //tvix/eval/docs/build-references.md for more details). -//! -//! The scanner itself is using the Wu-Manber string-matching algorithm, using -//! our fork of the `wu-mamber` crate. - -use std::collections::BTreeSet; -use wu_manber::TwoByteWM; - -pub const STORE_PATH_LEN: usize = "/nix/store/00000000000000000000000000000000".len(); - -/// Represents a "primed" reference scanner with an automaton that knows the set -/// of store paths to scan for. -pub struct ReferenceScanner<P: Ord + AsRef<[u8]>> { - candidates: Vec<P>, - searcher: Option<TwoByteWM>, - matches: Vec<usize>, -} - -impl<P: Clone + Ord + AsRef<[u8]>> ReferenceScanner<P> { - /// Construct a new `ReferenceScanner` that knows how to scan for the given - /// candidate store paths. - pub fn new(candidates: Vec<P>) -> Self { - let searcher = if candidates.is_empty() { - None - } else { - Some(TwoByteWM::new(&candidates)) - }; - - ReferenceScanner { - searcher, - candidates, - matches: Default::default(), - } - } - - /// Scan the given str for all non-overlapping matches and collect them - /// in the scanner. - pub fn scan<S: AsRef<[u8]>>(&mut self, haystack: S) { - if haystack.as_ref().len() < STORE_PATH_LEN { - return; - } - - if let Some(searcher) = &self.searcher { - for m in searcher.find(haystack) { - self.matches.push(m.pat_idx); - } - } - } - - /// Finalise the reference scanner and return the resulting matches. - pub fn finalise(self) -> BTreeSet<P> { - self.matches - .into_iter() - .map(|idx| self.candidates[idx].clone()) - .collect() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - // The actual derivation of `nixpkgs.hello`. - const HELLO_DRV: &str = r#"Derive([("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1","","")],[("/nix/store/6z1jfnqqgyqr221zgbpm30v91yfj3r45-bash-5.1-p16.drv",["out"]),("/nix/store/ap9g09fxbicj836zm88d56dn3ff4clxl-stdenv-linux.drv",["out"]),("/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",["out"])],["/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs",""),("builder","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash"),("cmakeFlags",""),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck","1"),("doInstallCheck",""),("mesonFlags",""),("name","hello-2.12.1"),("nativeBuildInputs",""),("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1"),("outputs","out"),("patches",""),("pname","hello"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/pa10z4ngm0g83kx9mssrqzz30s84vq7k-hello-2.12.1.tar.gz"),("stdenv","/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux"),("strictDeps",""),("system","x86_64-linux"),("version","2.12.1")])"#; - - #[test] - fn test_no_patterns() { - let mut scanner: ReferenceScanner<String> = ReferenceScanner::new(vec![]); - - scanner.scan(HELLO_DRV); - - let result = scanner.finalise(); - - assert_eq!(result.len(), 0); - } - - #[test] - fn test_single_match() { - let mut scanner = ReferenceScanner::new(vec![ - "/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".to_string(), - ]); - scanner.scan(HELLO_DRV); - - let result = scanner.finalise(); - - assert_eq!(result.len(), 1); - assert!(result.contains("/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16")); - } - - #[test] - fn test_multiple_matches() { - let candidates = vec![ - // these exist in the drv: - "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".to_string(), - "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".to_string(), - "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".to_string(), - // this doesn't: - "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".to_string(), - ]; - - let mut scanner = ReferenceScanner::new(candidates.clone()); - scanner.scan(HELLO_DRV); - - let result = scanner.finalise(); - assert_eq!(result.len(), 3); - - for c in candidates[..3].iter() { - assert!(result.contains(c)); - } - } -} diff --git a/tvix/glue/src/tests/dummy/.keep b/tvix/glue/src/tests/dummy/.keep new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tvix/glue/src/tests/dummy/.keep diff --git a/tvix/glue/src/tests/mod.rs b/tvix/glue/src/tests/mod.rs index 5b474be73657..dbde42064a77 100644 --- a/tvix/glue/src/tests/mod.rs +++ b/tvix/glue/src/tests/mod.rs @@ -1,10 +1,11 @@ use std::{rc::Rc, sync::Arc}; +use clap::Parser; use pretty_assertions::assert_eq; use std::path::PathBuf; use tvix_build::buildservice::DummyBuildService; -use tvix_eval::{EvalIO, Value}; -use tvix_store::utils::construct_services; +use tvix_eval::{EvalIO, EvalMode, Value}; +use tvix_store::utils::{construct_services, ServiceUrlsMemory}; use rstest::rstest; @@ -35,28 +36,32 @@ fn eval_test(code_path: PathBuf, expect_success: bool) { let tokio_runtime = tokio::runtime::Runtime::new().unwrap(); let (blob_service, directory_service, path_info_service, nar_calculation_service) = tokio_runtime - .block_on(async { construct_services("memory://", "memory://", "memory://").await }) + .block_on(async { + construct_services(ServiceUrlsMemory::parse_from(std::iter::empty::<&str>())).await + }) .unwrap(); let tvix_store_io = Rc::new(TvixStoreIO::new( blob_service, directory_service, - path_info_service.into(), + path_info_service, nar_calculation_service.into(), Arc::new(DummyBuildService::default()), tokio_runtime.handle().clone(), )); // Wrap with TvixIO, so <nix/fetchurl.nix can be imported. - let mut eval = tvix_eval::Evaluation::new( - Box::new(TvixIO::new(tvix_store_io.clone() as Rc<dyn EvalIO>)) as Box<dyn EvalIO>, - true, - ); - - eval.strict = true; - add_derivation_builtins(&mut eval, tvix_store_io.clone()); - add_fetcher_builtins(&mut eval, tvix_store_io.clone()); - add_import_builtins(&mut eval, tvix_store_io.clone()); - configure_nix_path(&mut eval, &None); + let mut eval_builder = tvix_eval::Evaluation::builder(Box::new(TvixIO::new( + tvix_store_io.clone() as Rc<dyn EvalIO>, + )) as Box<dyn EvalIO>) + .enable_import() + .mode(EvalMode::Strict); + + eval_builder = add_derivation_builtins(eval_builder, Rc::clone(&tvix_store_io)); + eval_builder = add_fetcher_builtins(eval_builder, Rc::clone(&tvix_store_io)); + eval_builder = add_import_builtins(eval_builder, tvix_store_io); + eval_builder = configure_nix_path(eval_builder, &None); + + let eval = eval_builder.build(); let result = eval.evaluate(code, Some(code_path.clone())); let failed = match result.value { diff --git a/tvix/glue/src/tests/tvix_tests/eval-okay-context-introspection.exp b/tvix/glue/src/tests/tvix_tests/eval-okay-context-introspection.exp index a136b0035e0a..e27068dac1be 100644 --- a/tvix/glue/src/tests/tvix_tests/eval-okay-context-introspection.exp +++ b/tvix/glue/src/tests/tvix_tests/eval-okay-context-introspection.exp @@ -1 +1 @@ -[ true true true true true true true true true true true true true ] +[ true true true true true true true true true true true true true true ] diff --git a/tvix/glue/src/tests/tvix_tests/eval-okay-context-introspection.nix b/tvix/glue/src/tests/tvix_tests/eval-okay-context-introspection.nix index e5719e00c3ae..3fe8b4cbbd51 100644 --- a/tvix/glue/src/tests/tvix_tests/eval-okay-context-introspection.nix +++ b/tvix/glue/src/tests/tvix_tests/eval-okay-context-introspection.nix @@ -80,4 +80,5 @@ in # `toJSON` tests (builtins.hasContext (builtins.toJSON an-str)) (builtins.hasContext (builtins.toJSON an-list)) + (builtins.hasContext (builtins.toJSON { __toString = _: "${drv}"; })) ] diff --git a/tvix/glue/src/tests/tvix_tests/eval-okay-storePath2.exp b/tvix/glue/src/tests/tvix_tests/eval-okay-storePath2.exp new file mode 100644 index 000000000000..26a467d2799d --- /dev/null +++ b/tvix/glue/src/tests/tvix_tests/eval-okay-storePath2.exp @@ -0,0 +1 @@ +{ plain = "/nix/store/vqsvbisgiqrqa1y0qljigq4ds5h38gym-dummy"; withSubPath = "/nix/store/vqsvbisgiqrqa1y0qljigq4ds5h38gym-dummy/.keep"; } diff --git a/tvix/glue/src/tests/tvix_tests/eval-okay-storePath2.nix b/tvix/glue/src/tests/tvix_tests/eval-okay-storePath2.nix new file mode 100644 index 000000000000..2d1fc45871f3 --- /dev/null +++ b/tvix/glue/src/tests/tvix_tests/eval-okay-storePath2.nix @@ -0,0 +1,8 @@ +let + path = builtins.unsafeDiscardStringContext "${../dummy}"; + storePath = builtins.storePath path; +in +{ + plain = builtins.storePath path; + withSubPath = builtins.storePath (path + "/.keep"); +} diff --git a/tvix/glue/src/tests/tvix_tests/eval-okay-toxml-context.nix b/tvix/glue/src/tests/tvix_tests/eval-okay-toxml-context.nix index 933aa46022dd..352470980383 100644 --- a/tvix/glue/src/tests/tvix_tests/eval-okay-toxml-context.nix +++ b/tvix/glue/src/tests/tvix_tests/eval-okay-toxml-context.nix @@ -4,7 +4,7 @@ inherit (derivation { name = "test"; builder = "/bin/sh"; - system = builtins.currentSystem; + system = "x86_64-linux"; }) drvPath; })) diff --git a/tvix/glue/src/tvix_build.rs b/tvix/glue/src/tvix_build.rs index e9eb1725ef3e..fa73224992e6 100644 --- a/tvix/glue/src/tvix_build.rs +++ b/tvix/glue/src/tvix_build.rs @@ -1,16 +1,14 @@ //! This module contains glue code translating from -//! [nix_compat::derivation::Derivation] to [tvix_build::proto::BuildRequest]. +//! [nix_compat::derivation::Derivation] to [tvix_build::buildservice::BuildRequest]. -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::{BTreeMap, HashSet}; +use std::path::PathBuf; use bytes::Bytes; -use nix_compat::{derivation::Derivation, nixbase32}; +use nix_compat::{derivation::Derivation, nixbase32, store_path::StorePath}; use sha2::{Digest, Sha256}; -use tvix_build::proto::{ - build_request::{AdditionalFile, BuildConstraints, EnvVar}, - BuildRequest, -}; -use tvix_castore::proto::{self, node::Node}; +use tvix_build::buildservice::{AdditionalFile, BuildConstraints, BuildRequest, EnvVar}; +use tvix_castore::Node; /// These are the environment variables that Nix sets in its sandbox for every /// build. @@ -29,17 +27,30 @@ const NIX_ENVIRONMENT_VARS: [(&str, &str); 12] = [ ("TMPDIR", "/build"), ]; -/// Takes a [Derivation] and turns it into a [BuildRequest]. +/// Get an iterator of store paths whose nixbase32 hashes will be the needles for refscanning +/// Importantly, the returned order will match the one used by [derivation_to_build_request] +/// so users may use this function to map back from the found needles to a store path +pub(crate) fn get_refscan_needles( + derivation: &Derivation, +) -> impl Iterator<Item = &StorePath<String>> { + derivation + .outputs + .values() + .filter_map(|output| output.path.as_ref()) + .chain(derivation.input_sources.iter()) + .chain(derivation.input_derivations.keys()) +} + +/// Takes a [Derivation] and turns it into a [buildservice::BuildRequest]. /// It assumes the Derivation has been validated. /// It needs two lookup functions: /// - one translating input sources to a castore node /// (`fn_input_sources_to_node`) /// - one translating a tuple of drv path and (a subset of their) output names to /// castore nodes of the selected outpus (`fn_input_drvs_to_output_nodes`). -#[allow(clippy::mutable_key_type)] pub(crate) fn derivation_to_build_request( derivation: &Derivation, - inputs: BTreeSet<Node>, + inputs: BTreeMap<StorePath<String>, Node>, ) -> std::io::Result<BuildRequest> { debug_assert!(derivation.validate(true).is_ok(), "drv must validate"); @@ -48,16 +59,6 @@ pub(crate) fn derivation_to_build_request( command_args.push(derivation.builder.clone()); command_args.extend_from_slice(&derivation.arguments); - // produce output_paths, which is the absolute path of each output (sorted) - let mut output_paths: Vec<String> = derivation - .outputs - .values() - .map(|e| e.path_str()[1..].to_owned()) - .collect(); - - // Sort the outputs. We can use sort_unstable, as these are unique strings. - output_paths.sort_unstable(); - // Produce environment_vars and additional files. // We use a BTreeMap while producing, and only realize the resulting Vec // while populating BuildRequest, so we don't need to worry about ordering. @@ -86,23 +87,41 @@ pub(crate) fn derivation_to_build_request( // TODO: handle __json (structured attrs, provide JSON file and source-able bash script) // Produce constraints. - let constraints = Some(BuildConstraints { - system: derivation.system.clone(), - min_memory: 0, - available_ro_paths: vec![], - // in case this is a fixed-output derivation, allow network access. - network_access: derivation.outputs.len() == 1 - && derivation - .outputs - .get("out") - .expect("invalid derivation") - .is_fixed(), - provide_bin_sh: true, - }); + let mut constraints = HashSet::from([ + BuildConstraints::System(derivation.system.clone()), + BuildConstraints::ProvideBinSh, + ]); + + if derivation.outputs.len() == 1 + && derivation + .outputs + .get("out") + .expect("Tvix bug: Derivation has no out output") + .is_fixed() + { + constraints.insert(BuildConstraints::NetworkAccess); + } - let build_request = BuildRequest { + Ok(BuildRequest { + // Importantly, this must match the order of get_refscan_needles, since users may use that + // function to map back from the found needles to a store path + refscan_needles: get_refscan_needles(derivation) + .map(|path| nixbase32::encode(path.digest())) + .collect(), command_args, - outputs: output_paths, + + outputs: { + // produce output_paths, which is the absolute path of each output (sorted) + let mut output_paths: Vec<PathBuf> = derivation + .outputs + .values() + .map(|e| PathBuf::from(e.path_str()[1..].to_owned())) + .collect(); + + // Sort the outputs. We can use sort_unstable, as these are unique strings. + output_paths.sort_unstable(); + output_paths + }, // Turn this into a sorted-by-key Vec<EnvVar>. environment_vars: environment_vars @@ -111,7 +130,15 @@ pub(crate) fn derivation_to_build_request( .collect(), inputs: inputs .into_iter() - .map(|n| proto::Node { node: Some(n) }) + .map(|(path, node)| { + ( + path.to_string() + .as_str() + .try_into() + .expect("Tvix bug: unable to convert store path basename to PathComponent"), + node, + ) + }) .collect(), inputs_dir: nix_compat::store_path::STORE_DIR[1..].into(), constraints, @@ -119,17 +146,12 @@ pub(crate) fn derivation_to_build_request( scratch_paths: vec!["build".into(), "nix/store".into()], additional_files: additional_files .into_iter() - .map(|(path, contents)| AdditionalFile { path, contents }) + .map(|(path, contents)| AdditionalFile { + path: PathBuf::from(path), + contents, + }) .collect(), - }; - - debug_assert!( - build_request.validate().is_ok(), - "invalid BuildRequest: {}", - build_request.validate().unwrap_err() - ); - - Ok(build_request) + }) } /// handle passAsFile, if set. @@ -192,31 +214,26 @@ fn calculate_pass_as_file_env(k: &str) -> (String, String) { #[cfg(test)] mod test { - use std::collections::BTreeSet; - use bytes::Bytes; - use nix_compat::derivation::Derivation; - use tvix_build::proto::{ - build_request::{AdditionalFile, BuildConstraints, EnvVar}, - BuildRequest, - }; - use tvix_castore::{ - fixtures::DUMMY_DIGEST, - proto::{self, node::Node, DirectoryNode}, - }; + use nix_compat::{derivation::Derivation, store_path::StorePath}; + use std::collections::{BTreeMap, HashSet}; + use std::sync::LazyLock; + use tvix_castore::fixtures::DUMMY_DIGEST; + use tvix_castore::{Node, PathComponent}; + + use tvix_build::buildservice::{AdditionalFile, BuildConstraints, BuildRequest, EnvVar}; use crate::tvix_build::NIX_ENVIRONMENT_VARS; use super::derivation_to_build_request; - use lazy_static::lazy_static; - - lazy_static! { - static ref INPUT_NODE_FOO: Node = Node::Directory(DirectoryNode { - name: Bytes::from("mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar"), - digest: DUMMY_DIGEST.clone().into(), - size: 42, - }); - } + + static INPUT_NODE_FOO_NAME: LazyLock<Bytes> = + LazyLock::new(|| "mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar".into()); + + static INPUT_NODE_FOO: LazyLock<Node> = LazyLock::new(|| Node::Directory { + digest: DUMMY_DIGEST.clone(), + size: 42, + }); #[test] fn test_derivation_to_build_request() { @@ -224,9 +241,14 @@ mod test { let derivation = Derivation::from_aterm_bytes(aterm_bytes).expect("must parse"); - let build_request = - derivation_to_build_request(&derivation, BTreeSet::from([INPUT_NODE_FOO.clone()])) - .expect("must succeed"); + let build_request = derivation_to_build_request( + &derivation, + BTreeMap::from([( + StorePath::<String>::from_bytes(&INPUT_NODE_FOO_NAME.clone()).unwrap(), + INPUT_NODE_FOO.clone(), + )]), + ) + .expect("must succeed"); let mut expected_environment_vars = vec![ EnvVar { @@ -263,20 +285,22 @@ mod test { command_args: vec![":".into()], outputs: vec!["nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo".into()], environment_vars: expected_environment_vars, - inputs: vec![proto::Node { - node: Some(INPUT_NODE_FOO.clone()) - }], + inputs: BTreeMap::from([( + PathComponent::try_from(INPUT_NODE_FOO_NAME.clone()).unwrap(), + INPUT_NODE_FOO.clone() + )]), inputs_dir: "nix/store".into(), - constraints: Some(BuildConstraints { - system: derivation.system.clone(), - min_memory: 0, - network_access: false, - available_ro_paths: vec![], - provide_bin_sh: true, - }), + constraints: HashSet::from([ + BuildConstraints::System(derivation.system.clone()), + BuildConstraints::ProvideBinSh + ]), additional_files: vec![], working_dir: "build".into(), scratch_paths: vec!["build".into(), "nix/store".into()], + refscan_needles: vec![ + "fhaj6gmwns62s6ypkcldbaj2ybvkhx3p".into(), + "ss2p4wmxijn652haqyd7dckxwl4c7hxx".into() + ], }, build_request ); @@ -289,7 +313,7 @@ mod test { let derivation = Derivation::from_aterm_bytes(aterm_bytes).expect("must parse"); let build_request = - derivation_to_build_request(&derivation, BTreeSet::from([])).expect("must succeed"); + derivation_to_build_request(&derivation, BTreeMap::from([])).expect("must succeed"); let mut expected_environment_vars = vec![ EnvVar { @@ -334,18 +358,17 @@ mod test { command_args: vec![":".to_string()], outputs: vec!["nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar".into()], environment_vars: expected_environment_vars, - inputs: vec![], + inputs: BTreeMap::new(), inputs_dir: "nix/store".into(), - constraints: Some(BuildConstraints { - system: derivation.system.clone(), - min_memory: 0, - network_access: true, - available_ro_paths: vec![], - provide_bin_sh: true, - }), + constraints: HashSet::from([ + BuildConstraints::System(derivation.system.clone()), + BuildConstraints::NetworkAccess, + BuildConstraints::ProvideBinSh + ]), additional_files: vec![], working_dir: "build".into(), scratch_paths: vec!["build".into(), "nix/store".into()], + refscan_needles: vec!["4q0pg5zpfmznxscq3avycvf9xdvx50n3".into()], }, build_request ); @@ -359,7 +382,7 @@ mod test { let derivation = Derivation::from_aterm_bytes(aterm_bytes).expect("must parse"); let build_request = - derivation_to_build_request(&derivation, BTreeSet::from([])).expect("must succeed"); + derivation_to_build_request(&derivation, BTreeMap::from([])).expect("must succeed"); let mut expected_environment_vars = vec![ // Note how bar and baz are not present in the env anymore, @@ -407,15 +430,12 @@ mod test { command_args: vec![":".to_string()], outputs: vec!["nix/store/pp17lwra2jkx8rha15qabg2q3wij72lj-foo".into()], environment_vars: expected_environment_vars, - inputs: vec![], + inputs: BTreeMap::new(), inputs_dir: "nix/store".into(), - constraints: Some(BuildConstraints { - system: derivation.system.clone(), - min_memory: 0, - network_access: false, - available_ro_paths: vec![], - provide_bin_sh: true, - }), + constraints: HashSet::from([ + BuildConstraints::System(derivation.system.clone()), + BuildConstraints::ProvideBinSh, + ]), additional_files: vec![ // baz env AdditionalFile { @@ -432,6 +452,7 @@ mod test { ], working_dir: "build".into(), scratch_paths: vec!["build".into(), "nix/store".into()], + refscan_needles: vec!["pp17lwra2jkx8rha15qabg2q3wij72lj".into()], }, build_request ); diff --git a/tvix/glue/src/tvix_io.rs b/tvix/glue/src/tvix_io.rs index db0c2cef77aa..323fa8d20ccb 100644 --- a/tvix/glue/src/tvix_io.rs +++ b/tvix/glue/src/tvix_io.rs @@ -1,12 +1,10 @@ //! This module implements a wrapper around tvix-eval's [EvalIO] type, //! adding functionality which is required by tvix-cli: //! -//! 1. Marking plain paths known to the reference scanner. -//! 2. Handling the C++ Nix `__corepkgs__`-hack for nixpkgs bootstrapping. +//! 1. Handling the C++ Nix `__corepkgs__`-hack for nixpkgs bootstrapping. //! //! All uses of [EvalIO] in tvix-cli must make use of this wrapper, -//! otherwise fundamental features like nixpkgs bootstrapping and hash -//! calculation will not work. +//! otherwise nixpkgs bootstrapping will not work. use std::io::{self, Cursor}; use std::path::{Path, PathBuf}; diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs index 4e5488067f8f..67a88e13c54b 100644 --- a/tvix/glue/src/tvix_store_io.rs +++ b/tvix/glue/src/tvix_store_io.rs @@ -1,12 +1,9 @@ //! This module provides an implementation of EvalIO talking to tvix-store. -use bytes::Bytes; use futures::{StreamExt, TryStreamExt}; -use nix_compat::nixhash::NixHash; -use nix_compat::store_path::StorePathRef; use nix_compat::{nixhash::CAHash, store_path::StorePath}; +use std::collections::BTreeMap; use std::{ cell::RefCell, - collections::BTreeSet, io, path::{Path, PathBuf}, sync::Arc, @@ -15,17 +12,15 @@ use tokio_util::io::SyncIoBridge; use tracing::{error, instrument, warn, Level, Span}; use tracing_indicatif::span_ext::IndicatifSpanExt; use tvix_build::buildservice::BuildService; -use tvix_castore::proto::node::Node; use tvix_eval::{EvalIO, FileType, StdIO}; use tvix_store::nar::NarCalculationService; use tvix_castore::{ blobservice::BlobService, directoryservice::{self, DirectoryService}, - proto::NamedNode, - B3Digest, + Node, }; -use tvix_store::{pathinfoservice::PathInfoService, proto::PathInfo}; +use tvix_store::pathinfoservice::{PathInfo, PathInfoService}; use crate::fetchers::Fetcher; use crate::known_paths::KnownPaths; @@ -67,7 +62,7 @@ pub struct TvixStoreIO { >, // Paths known how to produce, by building or fetching. - pub(crate) known_paths: RefCell<KnownPaths>, + pub known_paths: RefCell<KnownPaths>, } impl TvixStoreIO { @@ -104,10 +99,10 @@ impl TvixStoreIO { /// /// In case there is no PathInfo yet, this means we need to build it /// (which currently is stubbed out still). - #[instrument(skip(self, store_path), fields(store_path=%store_path, indicatif.pb_show=1), ret(level = Level::TRACE), err)] + #[instrument(skip(self, store_path), fields(store_path=%store_path, indicatif.pb_show=1), ret(level = Level::TRACE), err(level = Level::TRACE))] async fn store_path_to_node( &self, - store_path: &StorePath, + store_path: &StorePath<String>, sub_path: &Path, ) -> io::Result<Option<Node>> { // Find the root node for the store_path. @@ -121,8 +116,8 @@ impl TvixStoreIO { .get(*store_path.digest()) .await? { - // if we have a PathInfo, we know there will be a root_node (due to validation) - Some(path_info) => path_info.node.expect("no node").node.expect("no node"), + // TODO: use stricter typed BuildRequest here. + Some(path_info) => path_info.node, // If there's no PathInfo found, this normally means we have to // trigger the build (and insert into PathInfoService, after // reference scanning). @@ -185,8 +180,7 @@ impl TvixStoreIO { // derivation_to_build_request needs castore nodes for all inputs. // Provide them, which means, here is where we recursively build // all dependencies. - #[allow(clippy::mutable_key_type)] - let mut input_nodes: BTreeSet<Node> = + let mut inputs: BTreeMap<StorePath<String>, Node> = futures::stream::iter(drv.input_derivations.iter()) .map(|(input_drv_path, output_names)| { // look up the derivation object @@ -201,7 +195,7 @@ impl TvixStoreIO { }; // convert output names to actual paths - let output_paths: Vec<StorePath> = output_names + let output_paths: Vec<StorePath<String>> = output_names .iter() .map(|output_name| { input_drv @@ -214,6 +208,7 @@ impl TvixStoreIO { .clone() }) .collect(); + // For each output, ask for the castore node. // We're in a per-derivation context, so if they're // not built yet they'll all get built together. @@ -228,7 +223,7 @@ impl TvixStoreIO { .await?; if let Some(node) = node { - Ok(node) + Ok((output_path, node)) } else { Err(io::Error::other("no node produced")) } @@ -242,26 +237,29 @@ impl TvixStoreIO { .try_collect() .await?; - // add input sources // FUTUREWORK: merge these who things together - #[allow(clippy::mutable_key_type)] - let input_nodes_input_sources: BTreeSet<Node> = + // add input sources + let input_sources: BTreeMap<_, _> = futures::stream::iter(drv.input_sources.iter()) .then(|input_source| { - Box::pin(async { - let node = self - .store_path_to_node(input_source, Path::new("")) - .await?; - if let Some(node) = node { - Ok(node) - } else { - Err(io::Error::other("no node produced")) + Box::pin({ + let input_source = input_source.clone(); + async move { + let node = self + .store_path_to_node(&input_source, Path::new("")) + .await?; + if let Some(node) = node { + Ok((input_source, node)) + } else { + Err(io::Error::other("no node produced")) + } } }) }) .try_collect() .await?; - input_nodes.extend(input_nodes_input_sources); + + inputs.extend(input_sources); span.pb_set_message(&format!("🔨Building {}", &store_path)); @@ -270,7 +268,7 @@ impl TvixStoreIO { // operations, so dealt with in the Some(…) match arm // synthesize the build request. - let build_request = derivation_to_build_request(&drv, input_nodes)?; + let build_request = derivation_to_build_request(&drv, inputs)?; // create a build let build_result = self @@ -280,45 +278,77 @@ impl TvixStoreIO { .await .map_err(|e| std::io::Error::new(io::ErrorKind::Other, e))?; - // TODO: refscan + // Maps from the index in refscan_needles to the full store path + // Used to map back to the actual store path from the found needles + // Importantly, this must match the order of the needles generated in derivation_to_build_request + let refscan_needles = + crate::tvix_build::get_refscan_needles(&drv).collect::<Vec<_>>(); // For each output, insert a PathInfo. - for output in &build_result.outputs { - let root_node = output.node.as_ref().expect("invalid root node"); + for ((output, output_needles), drv_output) in build_result + .outputs + .iter() + .zip(build_result.outputs_needles.iter()) + .zip(drv.outputs.iter()) + { + let output_node = output + .clone() + .try_into_anonymous_node() + .expect("invalid node"); + + let output_needles: Vec<_> = output_needles + .needles + .iter() + // Map each output needle index back to the refscan_needle + .map(|idx| { + refscan_needles + .get(*idx as usize) + .ok_or(std::io::Error::new( + std::io::ErrorKind::Other, + "invalid build response", + )) + }) + .collect::<Result<_, std::io::Error>>()?; // calculate the nar representation let (nar_size, nar_sha256) = self .nar_calculation_service - .calculate_nar(root_node) + .calculate_nar(&output_node) .await?; // assemble the PathInfo to persist let path_info = PathInfo { - node: Some(tvix_castore::proto::Node { - node: Some(root_node.clone()), - }), - references: vec![], // TODO: refscan - narinfo: Some(tvix_store::proto::NarInfo { - nar_size, - nar_sha256: Bytes::from(nar_sha256.to_vec()), - signatures: vec![], - reference_names: vec![], // TODO: refscan - deriver: Some(tvix_store::proto::StorePath { - name: drv_path + store_path: drv_output + .1 + .path + .as_ref() + .ok_or(std::io::Error::new( + std::io::ErrorKind::Other, + "Tvix bug: missing output store path", + ))? + .to_owned(), + node: output_node, + references: output_needles + .iter() + .map(|s| (**s).to_owned()) + .collect(), + nar_size, + nar_sha256, + signatures: vec![], + deriver: Some( + StorePath::from_name_and_digest_fixed( + drv_path .name() .strip_suffix(".drv") - .expect("missing .drv suffix") - .to_string(), - digest: drv_path.digest().to_vec().into(), - }), - ca: drv.fod_digest().map( - |fod_digest| -> tvix_store::proto::nar_info::Ca { - (&CAHash::Nar(nix_compat::nixhash::NixHash::Sha256( - fod_digest, - ))) - .into() - }, + .expect("missing .drv suffix"), + *drv_path.digest(), + ) + .expect( + "Tvix bug: StorePath without .drv suffix must be valid", ), + ), + ca: drv.fod_digest().map(|fod_digest| { + CAHash::Nar(nix_compat::nixhash::NixHash::Sha256(fod_digest)) }), }; @@ -329,16 +359,17 @@ impl TvixStoreIO { } // find the output for the store path requested + let s = store_path.to_string(); + build_result .outputs .into_iter() - .find(|output_node| { - output_node.node.as_ref().expect("invalid node").get_name() - == store_path.to_string().as_bytes() + .map(|e| e.try_into_name_and_node().expect("invalid node")) + .find(|(output_name, _output_node)| { + output_name.as_ref() == s.as_bytes() }) .expect("build didn't produce the store path") - .node - .expect("invalid node") + .1 } } } @@ -352,81 +383,15 @@ impl TvixStoreIO { .await .map_err(|e| std::io::Error::new(io::ErrorKind::Other, e)) } - - pub(crate) async fn node_to_path_info( - &self, - name: &str, - path: &Path, - ca: &CAHash, - root_node: Node, - ) -> io::Result<(PathInfo, NixHash, StorePath)> { - // Ask the PathInfoService for the NAR size and sha256 - // We always need it no matter what is the actual hash mode - // because the path info construct a narinfo which *always* - // require a SHA256 of the NAR representation and the NAR size. - let (nar_size, nar_sha256) = self - .nar_calculation_service - .as_ref() - .calculate_nar(&root_node) - .await?; - - // Calculate the output path. This might still fail, as some names are illegal. - let output_path = - nix_compat::store_path::build_ca_path(name, ca, Vec::<String>::new(), false).map_err( - |_| { - std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("invalid name: {}", name), - ) - }, - )?; - - // assemble a new root_node with a name that is derived from the nar hash. - let root_node = root_node.rename(output_path.to_string().into_bytes().into()); - tvix_store::import::log_node(&root_node, path); - - let path_info = - tvix_store::import::derive_nar_ca_path_info(nar_size, nar_sha256, Some(ca), root_node); - - Ok(( - path_info, - NixHash::Sha256(nar_sha256), - output_path.to_owned(), - )) - } - - pub(crate) async fn register_node_in_path_info_service( - &self, - name: &str, - path: &Path, - ca: &CAHash, - root_node: Node, - ) -> io::Result<StorePath> { - let (path_info, _, output_path) = self.node_to_path_info(name, path, ca, root_node).await?; - let _path_info = self.path_info_service.as_ref().put(path_info).await?; - - Ok(output_path) - } - - pub async fn store_path_exists<'a>(&'a self, store_path: StorePathRef<'a>) -> io::Result<bool> { - Ok(self - .path_info_service - .as_ref() - .get(*store_path.digest()) - .await? - .is_some()) - } } impl EvalIO for TvixStoreIO { #[instrument(skip(self), ret(level = Level::TRACE), err)] fn path_exists(&self, path: &Path) -> io::Result<bool> { - if let Ok((store_path, sub_path)) = - StorePath::from_absolute_path_full(&path.to_string_lossy()) - { + if let Ok((store_path, sub_path)) = StorePath::from_absolute_path_full(path) { if self .tokio_handle - .block_on(self.store_path_to_node(&store_path, &sub_path))? + .block_on(self.store_path_to_node(&store_path, sub_path))? .is_some() { Ok(true) @@ -443,35 +408,21 @@ impl EvalIO for TvixStoreIO { #[instrument(skip(self), err)] fn open(&self, path: &Path) -> io::Result<Box<dyn io::Read>> { - if let Ok((store_path, sub_path)) = - StorePath::from_absolute_path_full(&path.to_string_lossy()) - { + if let Ok((store_path, sub_path)) = StorePath::from_absolute_path_full(path) { if let Some(node) = self .tokio_handle - .block_on(async { self.store_path_to_node(&store_path, &sub_path).await })? + .block_on(async { self.store_path_to_node(&store_path, sub_path).await })? { // depending on the node type, treat open differently match node { - Node::Directory(_) => { + Node::Directory { .. } => { // This would normally be a io::ErrorKind::IsADirectory (still unstable) Err(io::Error::new( io::ErrorKind::Unsupported, format!("tried to open directory at {:?}", path), )) } - Node::File(file_node) => { - let digest: B3Digest = - file_node.digest.clone().try_into().map_err(|_e| { - error!( - file_node = ?file_node, - "invalid digest" - ); - io::Error::new( - io::ErrorKind::InvalidData, - format!("invalid digest length in file node: {:?}", file_node), - ) - })?; - + Node::File { digest, .. } => { self.tokio_handle.block_on(async { let resp = self.blob_service.as_ref().open_read(&digest).await?; match resp { @@ -493,7 +444,7 @@ impl EvalIO for TvixStoreIO { } }) } - Node::Symlink(_symlink_node) => Err(io::Error::new( + Node::Symlink { .. } => Err(io::Error::new( io::ErrorKind::Unsupported, "open for symlinks is unsupported", ))?, @@ -511,17 +462,15 @@ impl EvalIO for TvixStoreIO { #[instrument(skip(self), ret(level = Level::TRACE), err)] fn file_type(&self, path: &Path) -> io::Result<FileType> { - if let Ok((store_path, sub_path)) = - StorePath::from_absolute_path_full(&path.to_string_lossy()) - { + if let Ok((store_path, sub_path)) = StorePath::from_absolute_path_full(path) { if let Some(node) = self .tokio_handle - .block_on(async { self.store_path_to_node(&store_path, &sub_path).await })? + .block_on(async { self.store_path_to_node(&store_path, sub_path).await })? { match node { - Node::Directory(_) => Ok(FileType::Directory), - Node::File(_) => Ok(FileType::Regular), - Node::Symlink(_) => Ok(FileType::Symlink), + Node::Directory { .. } => Ok(FileType::Directory), + Node::File { .. } => Ok(FileType::Regular), + Node::Symlink { .. } => Ok(FileType::Symlink), } } else { self.std_io.file_type(path) @@ -533,36 +482,24 @@ impl EvalIO for TvixStoreIO { #[instrument(skip(self), ret(level = Level::TRACE), err)] fn read_dir(&self, path: &Path) -> io::Result<Vec<(bytes::Bytes, FileType)>> { - if let Ok((store_path, sub_path)) = - StorePath::from_absolute_path_full(&path.to_string_lossy()) - { + if let Ok((store_path, sub_path)) = StorePath::from_absolute_path_full(path) { if let Some(node) = self .tokio_handle - .block_on(async { self.store_path_to_node(&store_path, &sub_path).await })? + .block_on(async { self.store_path_to_node(&store_path, sub_path).await })? { match node { - Node::Directory(directory_node) => { + Node::Directory { digest, .. } => { // fetch the Directory itself. - let digest: B3Digest = - directory_node.digest.clone().try_into().map_err(|_e| { - io::Error::new( - io::ErrorKind::InvalidData, - format!( - "invalid digest length in directory node: {:?}", - directory_node - ), - ) - })?; - - if let Some(directory) = self.tokio_handle.block_on(async { - self.directory_service.as_ref().get(&digest).await + if let Some(directory) = self.tokio_handle.block_on({ + let digest = digest.clone(); + async move { self.directory_service.as_ref().get(&digest).await } })? { let mut children: Vec<(bytes::Bytes, FileType)> = Vec::new(); - for node in directory.nodes() { + for (name, node) in directory.into_nodes() { children.push(match node { - Node::Directory(e) => (e.name, FileType::Directory), - Node::File(e) => (e.name, FileType::Regular), - Node::Symlink(e) => (e.name, FileType::Symlink), + Node::Directory { .. } => (name.into(), FileType::Directory), + Node::File { .. } => (name.clone().into(), FileType::Regular), + Node::Symlink { .. } => (name.into(), FileType::Symlink), }) } Ok(children) @@ -579,14 +516,14 @@ impl EvalIO for TvixStoreIO { ))? } } - Node::File(_file_node) => { + Node::File { .. } => { // This would normally be a io::ErrorKind::NotADirectory (still unstable) Err(io::Error::new( io::ErrorKind::Unsupported, "tried to readdir path {:?}, which is a file", ))? } - Node::Symlink(_symlink_node) => Err(io::Error::new( + Node::Symlink { .. } => Err(io::Error::new( io::ErrorKind::Unsupported, "read_dir for symlinks is unsupported", ))?, @@ -601,7 +538,7 @@ impl EvalIO for TvixStoreIO { #[instrument(skip(self), ret(level = Level::TRACE), err)] fn import_path(&self, path: &Path) -> io::Result<PathBuf> { - let output_path = self.tokio_handle.block_on(async { + let path_info = self.tokio_handle.block_on({ tvix_store::import::import_path_as_nar_ca( path, tvix_store::import::path_to_name(path)?, @@ -610,10 +547,10 @@ impl EvalIO for TvixStoreIO { &self.path_info_service, &self.nar_calculation_service, ) - .await })?; - Ok(output_path.to_absolute_path().into()) + // From the returned PathInfo, extract the store path and return it. + Ok(path_info.store_path.to_absolute_path().into()) } #[instrument(skip(self), ret(level = Level::TRACE))] @@ -627,10 +564,11 @@ mod tests { use std::{path::Path, rc::Rc, sync::Arc}; use bstr::ByteSlice; + use clap::Parser; use tempfile::TempDir; use tvix_build::buildservice::DummyBuildService; use tvix_eval::{EvalIO, EvaluationResult}; - use tvix_store::utils::construct_services; + use tvix_store::utils::{construct_services, ServiceUrlsMemory}; use super::TvixStoreIO; use crate::builtins::{add_derivation_builtins, add_fetcher_builtins, add_import_builtins}; @@ -642,22 +580,27 @@ mod tests { let tokio_runtime = tokio::runtime::Runtime::new().unwrap(); let (blob_service, directory_service, path_info_service, nar_calculation_service) = tokio_runtime - .block_on(async { construct_services("memory://", "memory://", "memory://").await }) + .block_on(async { + construct_services(ServiceUrlsMemory::parse_from(std::iter::empty::<&str>())) + .await + }) .unwrap(); let io = Rc::new(TvixStoreIO::new( blob_service, directory_service, - path_info_service.into(), + path_info_service, nar_calculation_service.into(), Arc::<DummyBuildService>::default(), tokio_runtime.handle().clone(), )); - let mut eval = tvix_eval::Evaluation::new(io.clone() as Rc<dyn EvalIO>, true); - add_derivation_builtins(&mut eval, io.clone()); - add_fetcher_builtins(&mut eval, io.clone()); - add_import_builtins(&mut eval, io); + let mut eval_builder = + tvix_eval::Evaluation::builder(io.clone() as Rc<dyn EvalIO>).enable_import(); + eval_builder = add_derivation_builtins(eval_builder, Rc::clone(&io)); + eval_builder = add_fetcher_builtins(eval_builder, Rc::clone(&io)); + eval_builder = add_import_builtins(eval_builder, io); + let eval = eval_builder.build(); // run the evaluation itself. eval.evaluate(str, None) |