diff options
Diffstat (limited to 'tvix/glue')
-rw-r--r-- | tvix/glue/Cargo.toml | 20 | ||||
-rw-r--r-- | tvix/glue/default.nix | 5 | ||||
-rw-r--r-- | tvix/glue/src/.skip-subtree | 1 | ||||
-rw-r--r-- | tvix/glue/src/derivation.nix | 36 | ||||
-rw-r--r-- | tvix/glue/src/derivation.rs | 601 | ||||
-rw-r--r-- | tvix/glue/src/errors.rs | 26 | ||||
-rw-r--r-- | tvix/glue/src/fetchurl.nix | 53 | ||||
-rw-r--r-- | tvix/glue/src/known_paths.rs | 186 | ||||
-rw-r--r-- | tvix/glue/src/lib.rs | 28 | ||||
-rw-r--r-- | tvix/glue/src/refscan.rs | 115 | ||||
-rw-r--r-- | tvix/glue/src/tvix_io.rs | 80 | ||||
-rw-r--r-- | tvix/glue/src/tvix_store_io.rs | 359 |
12 files changed, 1510 insertions, 0 deletions
diff --git a/tvix/glue/Cargo.toml b/tvix/glue/Cargo.toml new file mode 100644 index 000000000000..4ebfda870399 --- /dev/null +++ b/tvix/glue/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "tvix-glue" +version = "0.1.0" +edition = "2021" + +[dependencies] +nix-compat = { path = "../nix-compat" } +tvix-eval = { path = "../eval" } +tvix-castore = { path = "../castore" } +tvix-store = { path = "../store", default-features = false, features = []} +bytes = "1.4.0" +tracing = "0.1.37" +tokio = "1.28.0" +thiserror = "1.0.38" + +[dependencies.wu-manber] +git = "https://github.com/tvlfyi/wu-manber.git" + +[dev-dependencies] +test-case = "2.2.2" diff --git a/tvix/glue/default.nix b/tvix/glue/default.nix new file mode 100644 index 000000000000..975b0a5a7007 --- /dev/null +++ b/tvix/glue/default.nix @@ -0,0 +1,5 @@ +{ depot, ... }: + +(depot.tvix.crates.workspaceMembers.tvix-glue.build.override { + runTests = true; +}) diff --git a/tvix/glue/src/.skip-subtree b/tvix/glue/src/.skip-subtree new file mode 100644 index 000000000000..a16a2afe1f1e --- /dev/null +++ b/tvix/glue/src/.skip-subtree @@ -0,0 +1 @@ +Because of the derivation.nix file ... diff --git a/tvix/glue/src/derivation.nix b/tvix/glue/src/derivation.nix new file mode 100644 index 000000000000..9355cc3a96f0 --- /dev/null +++ b/tvix/glue/src/derivation.nix @@ -0,0 +1,36 @@ +# LGPL-2.1-or-later +# +# taken from: https://github.com/NixOS/nix/blob/master/src/libexpr/primops/derivation.nix +# +# TODO: rewrite in native Rust code + +/* This is the implementation of the ‘derivation’ builtin function. + It's actually a wrapper around the ‘derivationStrict’ primop. */ + +drvAttrs @ { outputs ? [ "out" ], ... }: + +let + + strict = derivationStrict drvAttrs; + + commonAttrs = drvAttrs // (builtins.listToAttrs outputsList) // + { + all = map (x: x.value) outputsList; + inherit drvAttrs; + }; + + outputToAttrListElement = outputName: + { + name = outputName; + value = commonAttrs // { + outPath = builtins.getAttr outputName strict; + drvPath = strict.drvPath; + type = "derivation"; + inherit outputName; + }; + }; + + outputsList = map outputToAttrListElement outputs; + +in +(builtins.head outputsList).value diff --git a/tvix/glue/src/derivation.rs b/tvix/glue/src/derivation.rs new file mode 100644 index 000000000000..86a271a3966d --- /dev/null +++ b/tvix/glue/src/derivation.rs @@ -0,0 +1,601 @@ +//! Implements `builtins.derivation`, the core of what makes Nix build packages. +use nix_compat::derivation::{Derivation, Output}; +use nix_compat::nixhash; +use std::cell::RefCell; +use std::collections::{btree_map, BTreeSet}; +use std::rc::Rc; +use tvix_eval::builtin_macros::builtins; +use tvix_eval::generators::{self, emit_warning_kind, GenCo}; +use tvix_eval::{ + AddContext, CatchableErrorKind, CoercionKind, ErrorKind, NixAttrs, NixList, Value, WarningKind, +}; + +use crate::errors::Error; +use crate::known_paths::{KnownPaths, PathKind, PathName}; + +// Constants used for strangely named fields in derivation inputs. +const STRUCTURED_ATTRS: &str = "__structuredAttrs"; +const IGNORE_NULLS: &str = "__ignoreNulls"; + +/// Helper function for populating the `drv.outputs` field from a +/// manually specified set of outputs, instead of the default +/// `outputs`. +async fn populate_outputs( + co: &GenCo, + drv: &mut Derivation, + outputs: NixList, +) -> Result<(), ErrorKind> { + // Remove the original default `out` output. + drv.outputs.clear(); + + for output in outputs { + let output_name = generators::request_force(co, output) + .await + .to_str() + .context("determining output name")?; + + if drv + .outputs + .insert(output_name.as_str().into(), Default::default()) + .is_some() + { + return Err(Error::DuplicateOutput(output_name.as_str().into()).into()); + } + } + + Ok(()) +} + +/// Populate the inputs of a derivation from the build references +/// found when scanning the derivation's parameters. +fn populate_inputs<I: IntoIterator<Item = PathName>>( + drv: &mut Derivation, + known_paths: &KnownPaths, + references: I, +) { + for reference in references.into_iter() { + let reference = &known_paths[&reference]; + match &reference.kind { + PathKind::Plain => { + drv.input_sources.insert(reference.path.clone()); + } + + PathKind::Output { name, derivation } => { + match drv.input_derivations.entry(derivation.clone()) { + btree_map::Entry::Vacant(entry) => { + entry.insert(BTreeSet::from([name.clone()])); + } + + btree_map::Entry::Occupied(mut entry) => { + entry.get_mut().insert(name.clone()); + } + } + } + + PathKind::Derivation { output_names } => { + match drv.input_derivations.entry(reference.path.clone()) { + btree_map::Entry::Vacant(entry) => { + entry.insert(output_names.clone()); + } + + btree_map::Entry::Occupied(mut entry) => { + entry.get_mut().extend(output_names.clone().into_iter()); + } + } + } + } + } +} + +/// Populate the output configuration of a derivation based on the +/// parameters passed to the call, configuring a fixed-output derivation output +/// if necessary. +/// +/// This function handles all possible combinations of the +/// parameters, including invalid ones. +/// +/// Due to the support for SRI hashes, and how these are passed along to +/// builtins.derivation, outputHash and outputHashAlgo can have values which +/// need to be further modified before constructing the Derivation struct. +/// +/// If outputHashAlgo is an SRI hash, outputHashAlgo must either be an empty +/// string, or the hash algorithm as specified in the (single) SRI (entry). +/// SRI strings with multiple hash algorithms are not supported. +/// +/// In case an SRI string was used, the (single) fixed output is populated +/// with the hash algo name, and the hash digest is populated with the +/// (lowercase) hex encoding of the digest. +/// +/// These values are only rewritten for the outputs, not what's passed to env. +fn handle_fixed_output( + drv: &mut Derivation, + hash_str: Option<String>, // in nix: outputHash + hash_algo_str: Option<String>, // in nix: outputHashAlgo + hash_mode_str: Option<String>, // in nix: outputHashmode +) -> Result<(), ErrorKind> { + // If outputHash is provided, ensure hash_algo_str is compatible. + // If outputHash is not provided, do nothing. + if let Some(hash_str) = hash_str { + // treat an empty algo as None + let hash_algo_str = match hash_algo_str { + Some(s) if s.is_empty() => None, + Some(s) => Some(s), + None => None, + }; + + // construct a NixHash. + let nixhash = nixhash::from_str(&hash_str, hash_algo_str.as_deref()) + .map_err(Error::InvalidOutputHash)?; + + // construct the fixed output. + drv.outputs.insert( + "out".to_string(), + Output { + path: "".to_string(), + ca_hash: match hash_mode_str.as_deref() { + None | Some("flat") => Some(nixhash::CAHash::Flat(nixhash)), + Some("recursive") => Some(nixhash::CAHash::Nar(nixhash)), + Some(other) => return Err(Error::InvalidOutputHashMode(other.to_string()))?, + }, + }, + ); + } + Ok(()) +} + +/// Handles derivation parameters which are not just forwarded to +/// the environment. The return value indicates whether the +/// parameter should be included in the environment. +async fn handle_derivation_parameters( + drv: &mut Derivation, + co: &GenCo, + name: &str, + value: &Value, + val_str: &str, +) -> Result<Result<bool, CatchableErrorKind>, ErrorKind> { + match name { + IGNORE_NULLS => return Ok(Ok(false)), + + // Command line arguments to the builder. + "args" => { + let args = value.to_list()?; + for arg in args { + match strong_coerce_to_string(co, arg).await? { + Err(cek) => return Ok(Err(cek)), + Ok(s) => drv.arguments.push(s), + } + } + + // The arguments do not appear in the environment. + return Ok(Ok(false)); + } + + // Explicitly specified drv outputs (instead of default [ "out" ]) + "outputs" => { + let outputs = value + .to_list() + .context("looking at the `outputs` parameter of the derivation")?; + + populate_outputs(co, drv, outputs).await?; + } + + "builder" => { + drv.builder = val_str.to_string(); + } + + "system" => { + drv.system = val_str.to_string(); + } + + _ => {} + } + + Ok(Ok(true)) +} + +async fn strong_coerce_to_string( + co: &GenCo, + val: Value, +) -> Result<Result<String, CatchableErrorKind>, ErrorKind> { + let val = generators::request_force(co, val).await; + match generators::request_string_coerce(co, val, CoercionKind::Strong).await { + Err(cek) => Ok(Err(cek)), + Ok(val_str) => Ok(Ok(val_str.as_str().to_string())), + } +} + +#[builtins(state = "Rc<RefCell<KnownPaths>>")] +mod derivation_builtins { + use super::*; + use nix_compat::store_path::hash_placeholder; + use tvix_eval::generators::Gen; + + #[builtin("placeholder")] + async fn builtin_placeholder(co: GenCo, input: Value) -> Result<Value, ErrorKind> { + let placeholder = hash_placeholder( + input + .to_str() + .context("looking at output name in builtins.placeholder")? + .as_str(), + ); + + Ok(placeholder.into()) + } + + /// Strictly construct a Nix derivation from the supplied arguments. + /// + /// This is considered an internal function, users usually want to + /// use the higher-level `builtins.derivation` instead. + #[builtin("derivationStrict")] + async fn builtin_derivation_strict( + state: Rc<RefCell<KnownPaths>>, + co: GenCo, + input: Value, + ) -> Result<Value, ErrorKind> { + let input = input.to_attrs()?; + let name = generators::request_force(&co, input.select_required("name")?.clone()) + .await + .to_str() + .context("determining derivation name")?; + + if name.is_empty() { + return Err(ErrorKind::Abort("derivation has empty name".to_string())); + } + + // Check whether attributes should be passed as a JSON file. + // TODO: the JSON serialisation has to happen here. + if let Some(sa) = input.select(STRUCTURED_ATTRS) { + if generators::request_force(&co, sa.clone()).await.as_bool()? { + return Err(ErrorKind::NotImplemented(STRUCTURED_ATTRS)); + } + } + + // Check whether null attributes should be ignored or passed through. + let ignore_nulls = match input.select(IGNORE_NULLS) { + Some(b) => generators::request_force(&co, b.clone()).await.as_bool()?, + None => false, + }; + + let mut drv = Derivation::default(); + drv.outputs.insert("out".to_string(), Default::default()); + + async fn select_string( + co: &GenCo, + attrs: &NixAttrs, + key: &str, + ) -> Result<Result<Option<String>, CatchableErrorKind>, ErrorKind> { + if let Some(attr) = attrs.select(key) { + match strong_coerce_to_string(co, attr.clone()).await? { + Err(cek) => return Ok(Err(cek)), + Ok(str) => return Ok(Ok(Some(str))), + } + } + + Ok(Ok(None)) + } + + for (name, value) in input.clone().into_iter_sorted() { + let value = generators::request_force(&co, value).await; + if ignore_nulls && matches!(value, Value::Null) { + continue; + } + + match strong_coerce_to_string(&co, value.clone()).await? { + Err(cek) => return Ok(Value::Catchable(cek)), + Ok(val_str) => { + // handle_derivation_parameters tells us whether the + // argument should be added to the environment; continue + // to the next one otherwise + match handle_derivation_parameters( + &mut drv, + &co, + name.as_str(), + &value, + &val_str, + ) + .await? + { + Err(cek) => return Ok(Value::Catchable(cek)), + Ok(false) => continue, + _ => (), + } + + // Most of these are also added to the builder's environment in "raw" form. + if drv + .environment + .insert(name.as_str().to_string(), val_str.into()) + .is_some() + { + return Err(Error::DuplicateEnvVar(name.as_str().to_string()).into()); + } + } + } + } + + // Configure fixed-output derivations if required. + { + let output_hash = match select_string(&co, &input, "outputHash") + .await + .context("evaluating the `outputHash` parameter")? + { + Err(cek) => return Ok(Value::Catchable(cek)), + Ok(s) => s, + }; + let output_hash_algo = match select_string(&co, &input, "outputHashAlgo") + .await + .context("evaluating the `outputHashAlgo` parameter")? + { + Err(cek) => return Ok(Value::Catchable(cek)), + Ok(s) => s, + }; + let output_hash_mode = match select_string(&co, &input, "outputHashMode") + .await + .context("evaluating the `outputHashMode` parameter")? + { + Err(cek) => return Ok(Value::Catchable(cek)), + Ok(s) => s, + }; + handle_fixed_output(&mut drv, output_hash, output_hash_algo, output_hash_mode)?; + } + + // Scan references in relevant attributes to detect any build-references. + let references = { + let state = state.borrow(); + if state.is_empty() { + // skip reference scanning, create an empty result + Default::default() + } else { + let mut refscan = state.reference_scanner(); + drv.arguments.iter().for_each(|s| refscan.scan(s)); + drv.environment.values().for_each(|s| refscan.scan(s)); + refscan.scan(&drv.builder); + refscan.finalise() + } + }; + + // Each output name needs to exist in the environment, at this + // point initialised as an empty string because that is the + // way of Golang ;) + for output in drv.outputs.keys() { + if drv + .environment + .insert(output.to_string(), String::new().into()) + .is_some() + { + emit_warning_kind(&co, WarningKind::ShadowedOutput(output.to_string())).await; + } + } + + let mut known_paths = state.borrow_mut(); + populate_inputs(&mut drv, &known_paths, references); + + // At this point, derivation fields are fully populated from + // eval data structures. + drv.validate(false).map_err(Error::InvalidDerivation)?; + + // Calculate the derivation_or_fod_hash for the current derivation. + // This one is still intermediate (so not added to known_paths) + let derivation_or_fod_hash_tmp = + drv.derivation_or_fod_hash(|drv| known_paths.get_hash_derivation_modulo(drv)); + + // Mutate the Derivation struct and set output paths + drv.calculate_output_paths(&name, &derivation_or_fod_hash_tmp) + .map_err(Error::InvalidDerivation)?; + + let derivation_path = drv + .calculate_derivation_path(&name) + .map_err(Error::InvalidDerivation)?; + + // recompute the hash derivation modulo and add to known_paths + let derivation_or_fod_hash_final = + drv.derivation_or_fod_hash(|drv| known_paths.get_hash_derivation_modulo(drv)); + + known_paths.add_hash_derivation_modulo( + derivation_path.to_absolute_path(), + &derivation_or_fod_hash_final, + ); + + // mark all the new paths as known + let output_names: Vec<String> = drv.outputs.keys().map(Clone::clone).collect(); + known_paths.drv(derivation_path.to_absolute_path(), &output_names); + + for (output_name, output) in &drv.outputs { + known_paths.output( + &output.path, + output_name, + derivation_path.to_absolute_path(), + ); + } + + let mut new_attrs: Vec<(String, String)> = drv + .outputs + .into_iter() + .map(|(name, output)| (name, output.path)) + .collect(); + + new_attrs.push(("drvPath".to_string(), derivation_path.to_absolute_path())); + + Ok(Value::Attrs(Box::new(NixAttrs::from_iter( + new_attrs.into_iter(), + )))) + } + + #[builtin("toFile")] + async fn builtin_to_file( + state: Rc<RefCell<KnownPaths>>, + co: GenCo, + name: Value, + content: Value, + ) -> Result<Value, ErrorKind> { + let name = name + .to_str() + .context("evaluating the `name` parameter of builtins.toFile")?; + let content = content + .to_str() + .context("evaluating the `content` parameter of builtins.toFile")?; + + let mut refscan = state.borrow().reference_scanner(); + refscan.scan(content.as_str()); + let refs = { + let paths = state.borrow(); + refscan + .finalise() + .into_iter() + .map(|path| paths[&path].path.to_string()) + .collect::<Vec<_>>() + }; + + // TODO: fail on derivation references (only "plain" is allowed here) + + let path = nix_compat::store_path::build_text_path(name.as_str(), content.as_str(), refs) + .map_err(|_e| { + nix_compat::derivation::DerivationError::InvalidOutputName( + name.as_str().to_string(), + ) + }) + .map_err(Error::InvalidDerivation)? + .to_absolute_path(); + + state.borrow_mut().plain(&path); + + // TODO: actually persist the file in the store at that path ... + + Ok(Value::String(path.into())) + } +} + +pub use derivation_builtins::builtins as derivation_builtins; + +#[cfg(test)] +mod tests { + use crate::known_paths::KnownPaths; + use nix_compat::store_path::hash_placeholder; + use std::{cell::RefCell, rc::Rc}; + use test_case::test_case; + use tvix_eval::EvaluationResult; + + /// evaluates a given nix expression and returns the result. + /// Takes care of setting up the evaluator so it knows about the + // `derivation` builtin. + fn eval(str: &str) -> EvaluationResult { + let mut eval = tvix_eval::Evaluation::new_impure(str, None); + + let known_paths: Rc<RefCell<KnownPaths>> = Default::default(); + + eval.builtins + .extend(crate::derivation::derivation_builtins(known_paths)); + + // Add the actual `builtins.derivation` from compiled Nix code + eval.src_builtins + .push(("derivation", include_str!("derivation.nix"))); + + // run the evaluation itself. + eval.evaluate() + } + + #[test] + fn derivation() { + let result = eval( + r#"(derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux";}).outPath"#, + ); + + assert!(result.errors.is_empty(), "expect evaluation to succeed"); + let value = result.value.expect("must be some"); + + match value { + tvix_eval::Value::String(s) => { + assert_eq!( + "/nix/store/xpcvxsx5sw4rbq666blz6sxqlmsqphmr-foo", + s.as_str() + ); + } + _ => panic!("unexpected value type: {:?}", value), + } + } + + /// a derivation with an empty name is an error. + #[test] + fn derivation_empty_name_fail() { + let result = eval( + r#"(derivation { name = ""; builder = "/bin/sh"; system = "x86_64-linux";}).outPath"#, + ); + + assert!(!result.errors.is_empty(), "expect evaluation to fail"); + } + + /// construct some calls to builtins.derivation and compare produced output + /// paths. + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/17wgs52s7kcamcyin4ja58njkf91ipq8-foo"; "r:sha256")] + #[test_case(r#"(builtins.derivation { name = "foo2"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/gi0p8vd635vpk1nq029cz3aa3jkhar5k-foo2"; "r:sha256 other name")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha1"; outputHash = "sha1-VUCRC+16gU5lcrLYHlPSUyx0Y/Q="; }).outPath"#, "/nix/store/p5sammmhpa84ama7ymkbgwwzrilva24x-foo"; "r:sha1")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "md5"; outputHash = "md5-07BzhNET7exJ6qYjitX/AA=="; }).outPath"#, "/nix/store/gmmxgpy1jrzs86r5y05wy6wiy2m15xgi-foo"; "r:md5")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha512"; outputHash = "sha512-DPkYCnZKuoY6Z7bXLwkYvBMcZ3JkLLLc5aNPCnAvlHDdwr8SXBIZixmVwjPDS0r9NGxUojNMNQqUilG26LTmtg=="; }).outPath"#, "/nix/store/lfi2bfyyap88y45mfdwi4j99gkaxaj19-foo"; "r:sha512")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "4374173a8cbe88de152b609f96f46e958bcf65762017474eec5a05ec2bd61530"; }).outPath"#, "/nix/store/17wgs52s7kcamcyin4ja58njkf91ipq8-foo"; "r:sha256 base16")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "0c0msqmyq1asxi74f5r0frjwz2wmdvs9d7v05caxx25yihx1fx23"; }).outPath"#, "/nix/store/17wgs52s7kcamcyin4ja58njkf91ipq8-foo"; "r:sha256 nixbase32")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/17wgs52s7kcamcyin4ja58njkf91ipq8-foo"; "r:sha256 base64")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "sha256-fgIr3TyFGDAXP5+qoAaiMKDg/a1MlT6Fv/S/DaA24S8="; }).outPath"#, "/nix/store/xm1l9dx4zgycv9qdhcqqvji1z88z534b-foo"; "r:sha256 base64 nopad")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "flat"; outputHashAlgo = "sha256"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/q4pkwkxdib797fhk22p0k3g1q32jmxvf-foo"; "sha256")] + #[test_case(r#"(builtins.derivation { name = "foo2"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "flat"; outputHashAlgo = "sha256"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/znw17xlmx9r6gw8izjkqxkl6s28sza4l-foo2"; "sha256 other name")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "flat"; outputHashAlgo = "sha1"; outputHash = "sha1-VUCRC+16gU5lcrLYHlPSUyx0Y/Q="; }).outPath"#, "/nix/store/zgpnjjmga53d8srp8chh3m9fn7nnbdv6-foo"; "sha1")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "flat"; outputHashAlgo = "md5"; outputHash = "md5-07BzhNET7exJ6qYjitX/AA=="; }).outPath"#, "/nix/store/jfhcwnq1852ccy9ad9nakybp2wadngnd-foo"; "md5")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "flat"; outputHashAlgo = "sha512"; outputHash = "sha512-DPkYCnZKuoY6Z7bXLwkYvBMcZ3JkLLLc5aNPCnAvlHDdwr8SXBIZixmVwjPDS0r9NGxUojNMNQqUilG26LTmtg=="; }).outPath"#, "/nix/store/as736rr116ian9qzg457f96j52ki8bm3-foo"; "sha512")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/17wgs52s7kcamcyin4ja58njkf91ipq8-foo"; "r:sha256 outputHashAlgo omitted")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/q4pkwkxdib797fhk22p0k3g1q32jmxvf-foo"; "r:sha256 outputHashAlgo and outputHashMode omitted")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; }).outPath"#, "/nix/store/xpcvxsx5sw4rbq666blz6sxqlmsqphmr-foo"; "outputHash* omitted")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; outputs = ["foo" "bar"]; system = "x86_64-linux"; }).outPath"#, "/nix/store/hkwdinvz2jpzgnjy9lv34d2zxvclj4s3-foo-foo"; "multiple outputs")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; args = ["--foo" "42" "--bar"]; system = "x86_64-linux"; }).outPath"#, "/nix/store/365gi78n2z7vwc1bvgb98k0a9cqfp6as-foo"; "args")] + #[test_case(r#" + let + bar = builtins.derivation { + name = "bar"; + builder = ":"; + system = ":"; + outputHash = "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba"; + outputHashAlgo = "sha256"; + outputHashMode = "recursive"; + }; + in + (builtins.derivation { + name = "foo"; + builder = ":"; + system = ":"; + inherit bar; + }).outPath + "#, "/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo"; "full")] + fn test_outpath(code: &str, expected_path: &str) { + let value = eval(code).value.expect("must succeed"); + + match value { + tvix_eval::Value::String(s) => { + assert_eq!(expected_path, s.as_str()); + } + _ => panic!("unexpected value type: {:?}", value), + } + } + + /// construct some calls to builtins.derivation that should be rejected + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "sha256-00"; }).outPath"#; "invalid outputhash")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha1"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#; "sha1 and sha256")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; outputs = ["foo" "foo"]; system = "x86_64-linux"; }).outPath"#; "duplicate output names")] + fn test_outpath_invalid(code: &str) { + let resp = eval(code); + assert!(resp.value.is_none(), "Value should be None"); + assert!( + !resp.errors.is_empty(), + "There should have been some errors" + ); + } + + #[test] + fn builtins_placeholder_hashes() { + assert_eq!( + hash_placeholder("out").as_str(), + "/1rz4g4znpzjwh1xymhjpm42vipw92pr73vdgl6xs1hycac8kf2n9" + ); + + assert_eq!( + hash_placeholder("").as_str(), + "/171rf4jhx57xqz3p7swniwkig249cif71pa08p80mgaf0mqz5bmr" + ); + } +} diff --git a/tvix/glue/src/errors.rs b/tvix/glue/src/errors.rs new file mode 100644 index 000000000000..5cbddcbba811 --- /dev/null +++ b/tvix/glue/src/errors.rs @@ -0,0 +1,26 @@ +use nix_compat::{derivation::DerivationError, nixhash}; +use std::rc::Rc; +use thiserror::Error; + +/// Errors related to derivation construction +#[derive(Debug, Error)] +pub enum Error { + #[error("an output with the name '{0}' is already defined")] + DuplicateOutput(String), + #[error("fixed-output derivations can only have the default `out`-output")] + ConflictingOutputTypes, + #[error("the environment variable '{0}' has already been set in this derivation")] + DuplicateEnvVar(String), + #[error("invalid derivation parameters: {0}")] + InvalidDerivation(DerivationError), + #[error("invalid output hash: {0}")] + InvalidOutputHash(nixhash::Error), + #[error("invalid output hash mode: '{0}', only 'recursive' and 'flat` are supported")] + InvalidOutputHashMode(String), +} + +impl From<Error> for tvix_eval::ErrorKind { + fn from(err: Error) -> Self { + tvix_eval::ErrorKind::TvixError(Rc::new(err)) + } +} diff --git a/tvix/glue/src/fetchurl.nix b/tvix/glue/src/fetchurl.nix new file mode 100644 index 000000000000..3f182a5a319b --- /dev/null +++ b/tvix/glue/src/fetchurl.nix @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: LGPL-2.1 +# +# This file is vendored from C++ Nix, as it needs to be bundled with +# an evaluator to be able to evaluate nixpkgs. +# +# Source: https://github.com/NixOS/nix/blob/2.3.16/corepkgs/fetchurl.nix + +{ system ? "" # obsolete +, url +, hash ? "" # an SRI hash + + # Legacy hash specification +, md5 ? "" +, sha1 ? "" +, sha256 ? "" +, sha512 ? "" +, outputHash ? if hash != "" then hash else if sha512 != "" then sha512 else if sha1 != "" then sha1 else if md5 != "" then md5 else sha256 +, outputHashAlgo ? if hash != "" then "" else if sha512 != "" then "sha512" else if sha1 != "" then "sha1" else if md5 != "" then "md5" else "sha256" + +, executable ? false +, unpack ? false +, name ? baseNameOf (toString url) +}: + +derivation { + builder = "builtin:fetchurl"; + + # New-style output content requirements. + inherit outputHashAlgo outputHash; + outputHashMode = if unpack || executable then "recursive" else "flat"; + + inherit name url executable unpack; + + system = "builtin"; + + # No need to double the amount of network traffic + preferLocalBuild = true; + + impureEnvVars = [ + # We borrow these environment variables from the caller to allow + # easy proxy configuration. This is impure, but a fixed-output + # derivation like fetchurl is allowed to do so since its result is + # by definition pure. + "http_proxy" + "https_proxy" + "ftp_proxy" + "all_proxy" + "no_proxy" + ]; + + # To make "nix-prefetch-url" work. + urls = [ url ]; +} diff --git a/tvix/glue/src/known_paths.rs b/tvix/glue/src/known_paths.rs new file mode 100644 index 000000000000..07373ef0da7a --- /dev/null +++ b/tvix/glue/src/known_paths.rs @@ -0,0 +1,186 @@ +//! This module implements logic required for persisting known paths +//! during an evaluation. +//! +//! Tvix needs to be able to keep track of each Nix store path that it +//! knows about during the scope of a single evaluation and its +//! related builds. +//! +//! This data is required to scan derivation inputs for the build +//! references (the "build closure") that they make use of. +//! +//! Please see //tvix/eval/docs/build-references.md for more +//! information. + +use crate::refscan::{ReferenceScanner, STORE_PATH_LEN}; +use nix_compat::nixhash::NixHash; +use std::{ + collections::{hash_map, BTreeSet, HashMap}, + ops::Index, +}; + +#[derive(Debug, PartialEq)] +pub enum PathKind { + /// A literal derivation (`.drv`-file), and the *names* of its outputs. + Derivation { output_names: BTreeSet<String> }, + + /// An output of a derivation, its name, and the path of its derivation. + Output { name: String, derivation: String }, + + /// A plain store path (e.g. source files copied to the store). + Plain, +} + +#[derive(Debug, PartialEq)] +pub struct KnownPath { + pub path: String, + pub kind: PathKind, +} + +impl KnownPath { + fn new(path: String, kind: PathKind) -> Self { + KnownPath { path, kind } + } +} + +/// Internal struct to prevent accidental leaks of the truncated path +/// names. +#[repr(transparent)] +#[derive(Clone, Debug, Default, PartialEq, PartialOrd, Ord, Eq, Hash)] +pub struct PathName(String); + +impl From<&str> for PathName { + fn from(s: &str) -> Self { + PathName(s[..STORE_PATH_LEN].to_string()) + } +} + +/// This instance is required to pass PathName instances as needles to +/// the reference scanner. +impl AsRef<[u8]> for PathName { + fn as_ref(&self) -> &[u8] { + self.0.as_ref() + } +} + +#[derive(Debug, Default)] +pub struct KnownPaths { + /// All known paths, keyed by a truncated version of their store + /// path used for reference scanning. + paths: HashMap<PathName, KnownPath>, + + /// All known derivation or FOD hashes. + /// + /// Keys are derivation paths, values is the NixHash. + derivation_or_fod_hashes: HashMap<String, NixHash>, +} + +impl Index<&PathName> for KnownPaths { + type Output = KnownPath; + + fn index(&self, index: &PathName) -> &Self::Output { + &self.paths[index] + } +} + +impl KnownPaths { + fn insert_path(&mut self, path: String, path_kind: PathKind) { + match self.paths.entry(path.as_str().into()) { + hash_map::Entry::Vacant(entry) => { + entry.insert(KnownPath::new(path, path_kind)); + } + + hash_map::Entry::Occupied(mut entry) => { + match (path_kind, &mut entry.get_mut().kind) { + // These variant combinations require no "merging action". + (PathKind::Plain, PathKind::Plain) => (), + (PathKind::Output { .. }, PathKind::Output { .. }) => (), + + ( + PathKind::Derivation { output_names: new }, + PathKind::Derivation { + output_names: ref mut old, + }, + ) => { + old.extend(new); + } + + _ => panic!( + "path '{}' inserted twice with different types", + entry.key().0 + ), + }; + } + }; + } + + /// Mark a plain path as known. + pub fn plain<S: ToString>(&mut self, path: S) { + self.insert_path(path.to_string(), PathKind::Plain); + } + + /// Mark a derivation as known. + pub fn drv<P: ToString, O: ToString>(&mut self, path: P, outputs: &[O]) { + self.insert_path( + path.to_string(), + PathKind::Derivation { + output_names: outputs.iter().map(ToString::to_string).collect(), + }, + ); + } + + /// Mark a derivation output path as known. + pub fn output<P: ToString, N: ToString, D: ToString>( + &mut self, + output_path: P, + name: N, + drv_path: D, + ) { + self.insert_path( + output_path.to_string(), + PathKind::Output { + name: name.to_string(), + derivation: drv_path.to_string(), + }, + ); + } + + /// Checks whether there are any known paths. If not, a reference + /// scanner can not be created. + pub fn is_empty(&self) -> bool { + self.paths.is_empty() + } + + /// Create a reference scanner from the current set of known paths. + pub fn reference_scanner(&self) -> ReferenceScanner<PathName> { + let candidates = self.paths.keys().map(Clone::clone).collect(); + ReferenceScanner::new(candidates) + } + + /// Fetch the opaque "hash derivation modulo" for a given derivation path. + pub fn get_hash_derivation_modulo(&self, drv_path: &str) -> NixHash { + // TODO: we rely on an invariant that things *should* have + // been calculated if we get this far. + self.derivation_or_fod_hashes[drv_path].clone() + } + + pub fn add_hash_derivation_modulo<D: ToString>( + &mut self, + drv: D, + hash_derivation_modulo: &NixHash, + ) { + #[allow(unused_variables)] // assertions on this only compiled in debug builds + let old = self + .derivation_or_fod_hashes + .insert(drv.to_string(), hash_derivation_modulo.to_owned()); + + #[cfg(debug_assertions)] + { + if let Some(old) = old { + debug_assert!( + old == *hash_derivation_modulo, + "hash derivation modulo for a given derivation should always be calculated the same" + ); + } + } + } +} diff --git a/tvix/glue/src/lib.rs b/tvix/glue/src/lib.rs new file mode 100644 index 000000000000..acb81d31445f --- /dev/null +++ b/tvix/glue/src/lib.rs @@ -0,0 +1,28 @@ +use std::{cell::RefCell, rc::Rc}; + +use known_paths::KnownPaths; + +pub mod derivation; +pub mod errors; +pub mod known_paths; +pub mod refscan; +pub mod tvix_io; +pub mod tvix_store_io; + +/// Adds derivation-related builtins to the passed [tvix_eval::Evaluation]. +/// +/// These are `derivation` and `derivationStrict`. +/// +/// As they need to interact with `known_paths`, we also need to pass in +/// `known_paths`. +pub fn add_derivation_builtins( + eval: &mut tvix_eval::Evaluation, + known_paths: Rc<RefCell<KnownPaths>>, +) { + eval.builtins + .extend(derivation::derivation_builtins(known_paths)); + + // Add the actual `builtins.derivation` from compiled Nix code + eval.src_builtins + .push(("derivation", include_str!("derivation.nix"))); +} diff --git a/tvix/glue/src/refscan.rs b/tvix/glue/src/refscan.rs new file mode 100644 index 000000000000..0e0bb6c77828 --- /dev/null +++ b/tvix/glue/src/refscan.rs @@ -0,0 +1,115 @@ +//! Simple scanner for non-overlapping, known references of Nix store paths in a +//! given string. +//! +//! This is used for determining build references (see +//! //tvix/eval/docs/build-references.md for more details). +//! +//! The scanner itself is using the Wu-Manber string-matching algorithm, using +//! our fork of the `wu-mamber` crate. + +use std::collections::BTreeSet; +use wu_manber::TwoByteWM; + +pub const STORE_PATH_LEN: usize = "/nix/store/00000000000000000000000000000000".len(); + +/// Represents a "primed" reference scanner with an automaton that knows the set +/// of store paths to scan for. +pub struct ReferenceScanner<P: Ord + AsRef<[u8]>> { + candidates: Vec<P>, + searcher: Option<TwoByteWM>, + matches: Vec<usize>, +} + +impl<P: Clone + Ord + AsRef<[u8]>> ReferenceScanner<P> { + /// Construct a new `ReferenceScanner` that knows how to scan for the given + /// candidate store paths. + pub fn new(candidates: Vec<P>) -> Self { + let searcher = if candidates.is_empty() { + None + } else { + Some(TwoByteWM::new(&candidates)) + }; + + ReferenceScanner { + searcher, + candidates, + matches: Default::default(), + } + } + + /// Scan the given str for all non-overlapping matches and collect them + /// in the scanner. + pub fn scan<S: AsRef<[u8]>>(&mut self, haystack: S) { + if haystack.as_ref().len() < STORE_PATH_LEN { + return; + } + + if let Some(searcher) = &self.searcher { + for m in searcher.find(haystack) { + self.matches.push(m.pat_idx); + } + } + } + + /// Finalise the reference scanner and return the resulting matches. + pub fn finalise(self) -> BTreeSet<P> { + self.matches + .into_iter() + .map(|idx| self.candidates[idx].clone()) + .collect() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // The actual derivation of `nixpkgs.hello`. + const HELLO_DRV: &str = r#"Derive([("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1","","")],[("/nix/store/6z1jfnqqgyqr221zgbpm30v91yfj3r45-bash-5.1-p16.drv",["out"]),("/nix/store/ap9g09fxbicj836zm88d56dn3ff4clxl-stdenv-linux.drv",["out"]),("/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",["out"])],["/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs",""),("builder","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash"),("cmakeFlags",""),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck","1"),("doInstallCheck",""),("mesonFlags",""),("name","hello-2.12.1"),("nativeBuildInputs",""),("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1"),("outputs","out"),("patches",""),("pname","hello"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/pa10z4ngm0g83kx9mssrqzz30s84vq7k-hello-2.12.1.tar.gz"),("stdenv","/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux"),("strictDeps",""),("system","x86_64-linux"),("version","2.12.1")])"#; + + #[test] + fn test_no_patterns() { + let mut scanner: ReferenceScanner<String> = ReferenceScanner::new(vec![]); + + scanner.scan(HELLO_DRV); + + let result = scanner.finalise(); + + assert_eq!(result.len(), 0); + } + + #[test] + fn test_single_match() { + let mut scanner = ReferenceScanner::new(vec![ + "/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".to_string(), + ]); + scanner.scan(HELLO_DRV); + + let result = scanner.finalise(); + + assert_eq!(result.len(), 1); + assert!(result.contains("/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16")); + } + + #[test] + fn test_multiple_matches() { + let candidates = vec![ + // these exist in the drv: + "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".to_string(), + "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".to_string(), + "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".to_string(), + // this doesn't: + "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".to_string(), + ]; + + let mut scanner = ReferenceScanner::new(candidates.clone()); + scanner.scan(HELLO_DRV); + + let result = scanner.finalise(); + assert_eq!(result.len(), 3); + + for c in candidates[..3].iter() { + assert!(result.contains(c)); + } + } +} diff --git a/tvix/glue/src/tvix_io.rs b/tvix/glue/src/tvix_io.rs new file mode 100644 index 000000000000..caadbeb5e663 --- /dev/null +++ b/tvix/glue/src/tvix_io.rs @@ -0,0 +1,80 @@ +//! This module implements a wrapper around tvix-eval's [EvalIO] type, +//! adding functionality which is required by tvix-cli: +//! +//! 1. Marking plain paths known to the reference scanner. +//! 2. Handling the C++ Nix `__corepkgs__`-hack for nixpkgs bootstrapping. +//! +//! All uses of [EvalIO] in tvix-cli must make use of this wrapper, +//! otherwise fundamental features like nixpkgs bootstrapping and hash +//! calculation will not work. + +use std::cell::RefCell; +use std::io; +use std::path::{Path, PathBuf}; +use std::rc::Rc; +use tvix_eval::{EvalIO, FileType}; + +use crate::known_paths::KnownPaths; + +// TODO: Merge this together with TvixStoreIO? +pub struct TvixIO<T: EvalIO> { + /// Ingested paths must be reported to this known paths tracker + /// for accurate build reference scanning. + known_paths: Rc<RefCell<KnownPaths>>, + + // Actual underlying [EvalIO] implementation. + actual: T, +} + +impl<T: EvalIO> TvixIO<T> { + pub fn new(known_paths: Rc<RefCell<KnownPaths>>, actual: T) -> Self { + Self { + known_paths, + actual, + } + } +} + +impl<T: EvalIO> EvalIO for TvixIO<T> { + fn store_dir(&self) -> Option<String> { + self.actual.store_dir() + } + + fn import_path(&self, path: &Path) -> Result<PathBuf, io::Error> { + let imported_path = self.actual.import_path(path)?; + self.known_paths + .borrow_mut() + .plain(imported_path.to_string_lossy()); + + Ok(imported_path) + } + + fn path_exists(&self, path: &Path) -> Result<bool, io::Error> { + if path.starts_with("/__corepkgs__") { + return Ok(true); + } + + self.actual.path_exists(path) + } + + fn read_to_string(&self, path: &Path) -> Result<String, io::Error> { + // Bundled version of corepkgs/fetchurl.nix. The counterpart + // of this happens in `main`, where the `nix_path` of the + // evaluation has `nix=/__corepkgs__` added to it. + // + // This workaround is similar to what cppnix does for passing + // the path through. + // + // TODO: this comparison is bad and allocates, we should use + // the sane path library. + if path.starts_with("/__corepkgs__/fetchurl.nix") { + return Ok(include_str!("fetchurl.nix").to_string()); + } + + self.actual.read_to_string(path) + } + + fn read_dir(&self, path: &Path) -> Result<Vec<(bytes::Bytes, FileType)>, io::Error> { + self.actual.read_dir(path) + } +} diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs new file mode 100644 index 000000000000..9be896ffc40f --- /dev/null +++ b/tvix/glue/src/tvix_store_io.rs @@ -0,0 +1,359 @@ +//! This module provides an implementation of EvalIO talking to tvix-store. + +use nix_compat::store_path::{self, StorePath}; +use std::{io, path::Path, path::PathBuf, sync::Arc}; +use tokio::io::AsyncReadExt; +use tracing::{error, instrument, warn}; +use tvix_eval::{EvalIO, FileType, StdIO}; + +use tvix_castore::{ + blobservice::BlobService, + directoryservice::{self, DirectoryService}, + import, + proto::{node::Node, NamedNode}, + B3Digest, +}; +use tvix_store::{ + nar::calculate_size_and_sha256, + pathinfoservice::PathInfoService, + proto::{NarInfo, PathInfo}, +}; + +/// Implements [EvalIO], asking given [PathInfoService], [DirectoryService] +/// and [BlobService]. +/// +/// In case the given path does not exist in these stores, we ask StdIO. +/// This is to both cover cases of syntactically valid store paths, that exist +/// on the filesystem (still managed by Nix), as well as being able to read +/// files outside store paths. +pub struct TvixStoreIO { + blob_service: Arc<dyn BlobService>, + directory_service: Arc<dyn DirectoryService>, + path_info_service: Arc<dyn PathInfoService>, + std_io: StdIO, + tokio_handle: tokio::runtime::Handle, +} + +impl TvixStoreIO { + pub fn new( + blob_service: Arc<dyn BlobService>, + directory_service: Arc<dyn DirectoryService>, + path_info_service: Arc<dyn PathInfoService>, + tokio_handle: tokio::runtime::Handle, + ) -> Self { + Self { + blob_service, + directory_service, + path_info_service, + std_io: StdIO {}, + tokio_handle, + } + } + + /// for a given [StorePath] and additional [Path] inside the store path, + /// look up the [PathInfo], and if it exists, and then use + /// [directoryservice::traverse_to] to return the + /// [Node] specified by `sub_path`. + #[instrument(skip(self), ret, err)] + fn store_path_to_root_node( + &self, + store_path: &StorePath, + sub_path: &Path, + ) -> Result<Option<Node>, io::Error> { + let path_info_service = self.path_info_service.clone(); + let task = self.tokio_handle.spawn({ + let digest = *store_path.digest(); + async move { path_info_service.get(digest).await } + }); + let path_info = match self.tokio_handle.block_on(task).unwrap()? { + // If there's no PathInfo found, early exit + None => return Ok(None), + Some(path_info) => path_info, + }; + + let root_node = { + match path_info.node { + None => { + warn!( + "returned PathInfo {:?} node is None, this shouldn't happen.", + &path_info + ); + return Ok(None); + } + Some(root_node) => match root_node.node { + None => { + warn!("node for {:?} is None, this shouldn't happen.", &root_node); + return Ok(None); + } + Some(root_node) => root_node, + }, + } + }; + + let directory_service = self.directory_service.clone(); + let sub_path = sub_path.to_owned(); + let task = self.tokio_handle.spawn(async move { + directoryservice::descend_to(directory_service, root_node, &sub_path).await + }); + + Ok(self.tokio_handle.block_on(task).unwrap()?) + } +} + +impl EvalIO for TvixStoreIO { + #[instrument(skip(self), ret, err)] + fn path_exists(&self, path: &Path) -> Result<bool, io::Error> { + if let Ok((store_path, sub_path)) = + StorePath::from_absolute_path_full(&path.to_string_lossy()) + { + if self + .store_path_to_root_node(&store_path, &sub_path)? + .is_some() + { + Ok(true) + } else { + // As tvix-store doesn't manage /nix/store on the filesystem, + // we still need to also ask self.std_io here. + self.std_io.path_exists(path) + } + } else { + // The store path is no store path, so do regular StdIO. + self.std_io.path_exists(path) + } + } + + #[instrument(skip(self), ret, err)] + fn read_to_string(&self, path: &Path) -> Result<String, io::Error> { + if let Ok((store_path, sub_path)) = + StorePath::from_absolute_path_full(&path.to_string_lossy()) + { + if let Some(node) = self.store_path_to_root_node(&store_path, &sub_path)? { + // depending on the node type, treat read_to_string differently + match node { + Node::Directory(_) => { + // This would normally be a io::ErrorKind::IsADirectory (still unstable) + Err(io::Error::new( + io::ErrorKind::Unsupported, + format!("tried to read directory at {:?} to string", path), + )) + } + Node::File(file_node) => { + let digest: B3Digest = + file_node.digest.clone().try_into().map_err(|_e| { + error!( + file_node = ?file_node, + "invalid digest" + ); + io::Error::new( + io::ErrorKind::InvalidData, + format!("invalid digest length in file node: {:?}", file_node), + ) + })?; + + let blob_service = self.blob_service.clone(); + + let task = self.tokio_handle.spawn(async move { + let mut reader = { + let resp = blob_service.open_read(&digest).await?; + match resp { + Some(blob_reader) => blob_reader, + None => { + error!( + blob.digest = %digest, + "blob not found", + ); + Err(io::Error::new( + io::ErrorKind::NotFound, + format!("blob {} not found", &digest), + ))? + } + } + }; + + let mut buf = String::new(); + + reader.read_to_string(&mut buf).await?; + Ok(buf) + }); + + self.tokio_handle.block_on(task).unwrap() + } + Node::Symlink(_symlink_node) => Err(io::Error::new( + io::ErrorKind::Unsupported, + "read_to_string for symlinks is unsupported", + ))?, + } + } else { + // As tvix-store doesn't manage /nix/store on the filesystem, + // we still need to also ask self.std_io here. + self.std_io.read_to_string(path) + } + } else { + // The store path is no store path, so do regular StdIO. + self.std_io.read_to_string(path) + } + } + + #[instrument(skip(self), ret, err)] + fn read_dir(&self, path: &Path) -> Result<Vec<(bytes::Bytes, FileType)>, io::Error> { + if let Ok((store_path, sub_path)) = + StorePath::from_absolute_path_full(&path.to_string_lossy()) + { + if let Some(node) = self.store_path_to_root_node(&store_path, &sub_path)? { + match node { + Node::Directory(directory_node) => { + // fetch the Directory itself. + let digest: B3Digest = + directory_node.digest.clone().try_into().map_err(|_e| { + io::Error::new( + io::ErrorKind::InvalidData, + format!( + "invalid digest length in directory node: {:?}", + directory_node + ), + ) + })?; + + let directory_service = self.directory_service.clone(); + let digest_clone = digest.clone(); + let task = self + .tokio_handle + .spawn(async move { directory_service.get(&digest_clone).await }); + if let Some(directory) = self.tokio_handle.block_on(task).unwrap()? { + let mut children: Vec<(bytes::Bytes, FileType)> = Vec::new(); + for node in directory.nodes() { + children.push(match node { + Node::Directory(e) => (e.name, FileType::Directory), + Node::File(e) => (e.name, FileType::Regular), + Node::Symlink(e) => (e.name, FileType::Symlink), + }) + } + Ok(children) + } else { + // If we didn't get the directory node that's linked, that's a store inconsistency! + error!( + directory.digest = %digest, + path = ?path, + "directory not found", + ); + Err(io::Error::new( + io::ErrorKind::NotFound, + format!("directory {digest} does not exist"), + ))? + } + } + Node::File(_file_node) => { + // This would normally be a io::ErrorKind::NotADirectory (still unstable) + Err(io::Error::new( + io::ErrorKind::Unsupported, + "tried to readdir path {:?}, which is a file", + ))? + } + Node::Symlink(_symlink_node) => Err(io::Error::new( + io::ErrorKind::Unsupported, + "read_dir for symlinks is unsupported", + ))?, + } + } else { + self.std_io.read_dir(path) + } + } else { + self.std_io.read_dir(path) + } + } + + #[instrument(skip(self), ret, err)] + fn import_path(&self, path: &std::path::Path) -> Result<PathBuf, std::io::Error> { + let p = path.to_owned(); + let blob_service = self.blob_service.clone(); + let directory_service = self.directory_service.clone(); + let path_info_service = self.path_info_service.clone(); + + let task = self.tokio_handle.spawn(async move { + import_path_with_pathinfo(blob_service, directory_service, path_info_service, &p).await + }); + + let path_info = self.tokio_handle.block_on(task).unwrap()?; + + // from the [PathInfo], extract the store path (as string). + Ok({ + let mut path = PathBuf::from(nix_compat::store_path::STORE_DIR_WITH_SLASH); + + let root_node_name = path_info.node.unwrap().node.unwrap().get_name().to_vec(); + + // This must be a string, otherwise it would have failed validation. + let root_node_name = String::from_utf8(root_node_name).unwrap(); + + // append to the PathBuf + path.push(root_node_name); + + // and return it + path + }) + } + + #[instrument(skip(self), ret)] + fn store_dir(&self) -> Option<String> { + Some("/nix/store".to_string()) + } +} + +/// Imports a given path on the filesystem into the store, and returns the +/// [PathInfo] describing the path, that was sent to +/// [PathInfoService]. +#[instrument(skip(blob_service, directory_service, path_info_service), ret, err)] +async fn import_path_with_pathinfo( + blob_service: Arc<dyn BlobService>, + directory_service: Arc<dyn DirectoryService>, + path_info_service: Arc<dyn PathInfoService>, + path: &std::path::Path, +) -> Result<PathInfo, io::Error> { + // Call [import::ingest_path], which will walk over the given path and return a root_node. + let root_node = import::ingest_path(blob_service.clone(), directory_service.clone(), path) + .await + .expect("error during import_path"); + + // Render the NAR. + let (nar_size, nar_sha256) = + calculate_size_and_sha256(&root_node, blob_service.clone(), directory_service.clone()) + .await + .expect("error during nar calculation"); // TODO: handle error + + // TODO: make a path_to_name helper function? + let name = path + .file_name() + .expect("path must not be ..") + .to_str() + .expect("path must be valid unicode"); + + let output_path = store_path::build_nar_based_store_path(&nar_sha256, name); + + // assemble a new root_node with a name that is derived from the nar hash. + let root_node = root_node.rename(output_path.to_string().into_bytes().into()); + + // assemble the [PathInfo] object. + let path_info = PathInfo { + node: Some(tvix_castore::proto::Node { + node: Some(root_node), + }), + // There's no reference scanning on path contents ingested like this. + references: vec![], + narinfo: Some(NarInfo { + nar_size, + nar_sha256: nar_sha256.to_vec().into(), + signatures: vec![], + reference_names: vec![], + deriver: None, + ca: Some(tvix_store::proto::nar_info::Ca { + r#type: tvix_store::proto::nar_info::ca::Hash::NarSha256.into(), + digest: nar_sha256.to_vec().into(), + }), + }), + }; + + // put into [PathInfoService], and return the [PathInfo] that we get + // back from there (it might contain additional signatures). + let path_info = path_info_service.put(path_info).await?; + + Ok(path_info) +} |