diff options
author | Florian Klink <flokli@flokli.de> | 2023-11-03T11·34+0200 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2023-11-04T15·18+0000 |
commit | 3196fe0143b6ff729c177fa5d17fa03c9e9627c9 (patch) | |
tree | 33a6cbb1a965a739e8cdd96dc2faec79b83635de /tvix/glue | |
parent | a51d277764d73582bc9bf816f6f4163d2df7f9c4 (diff) |
refactor(tvix): move tvix glue code into glue crate r/6936
There's various bits and pieces in tvix-cli that use both the store and evaluator, as well as nix-compat. For example, builtins.derivation, as well as the reference scanning implementation. This "glue code" currently isn't accessible from anywhere else, but it'd be very useful if it were. Move it out into a `glue` crate, and make `tvix-cli` a consumer of it. All the KnownPaths setup and passing around, as well as NIX_PATH handling is also something that should probably be moved into the glue crate as well, but that's something left for a future CL. Change-Id: I080ed3d1825ab23790666486840f301f00856277 Reviewed-on: https://cl.tvl.fyi/c/depot/+/9908 Autosubmit: flokli <flokli@flokli.de> Tested-by: BuildkiteCI Reviewed-by: raitobezarius <tvl@lahfa.xyz>
Diffstat (limited to 'tvix/glue')
-rw-r--r-- | tvix/glue/Cargo.toml | 20 | ||||
-rw-r--r-- | tvix/glue/default.nix | 5 | ||||
-rw-r--r-- | tvix/glue/src/.skip-subtree | 1 | ||||
-rw-r--r-- | tvix/glue/src/derivation.nix | 36 | ||||
-rw-r--r-- | tvix/glue/src/derivation.rs | 601 | ||||
-rw-r--r-- | tvix/glue/src/errors.rs | 26 | ||||
-rw-r--r-- | tvix/glue/src/fetchurl.nix | 53 | ||||
-rw-r--r-- | tvix/glue/src/known_paths.rs | 186 | ||||
-rw-r--r-- | tvix/glue/src/lib.rs | 28 | ||||
-rw-r--r-- | tvix/glue/src/refscan.rs | 115 | ||||
-rw-r--r-- | tvix/glue/src/tvix_io.rs | 80 | ||||
-rw-r--r-- | tvix/glue/src/tvix_store_io.rs | 359 |
12 files changed, 1510 insertions, 0 deletions
diff --git a/tvix/glue/Cargo.toml b/tvix/glue/Cargo.toml new file mode 100644 index 000000000000..4ebfda870399 --- /dev/null +++ b/tvix/glue/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "tvix-glue" +version = "0.1.0" +edition = "2021" + +[dependencies] +nix-compat = { path = "../nix-compat" } +tvix-eval = { path = "../eval" } +tvix-castore = { path = "../castore" } +tvix-store = { path = "../store", default-features = false, features = []} +bytes = "1.4.0" +tracing = "0.1.37" +tokio = "1.28.0" +thiserror = "1.0.38" + +[dependencies.wu-manber] +git = "https://github.com/tvlfyi/wu-manber.git" + +[dev-dependencies] +test-case = "2.2.2" diff --git a/tvix/glue/default.nix b/tvix/glue/default.nix new file mode 100644 index 000000000000..975b0a5a7007 --- /dev/null +++ b/tvix/glue/default.nix @@ -0,0 +1,5 @@ +{ depot, ... }: + +(depot.tvix.crates.workspaceMembers.tvix-glue.build.override { + runTests = true; +}) diff --git a/tvix/glue/src/.skip-subtree b/tvix/glue/src/.skip-subtree new file mode 100644 index 000000000000..a16a2afe1f1e --- /dev/null +++ b/tvix/glue/src/.skip-subtree @@ -0,0 +1 @@ +Because of the derivation.nix file ... diff --git a/tvix/glue/src/derivation.nix b/tvix/glue/src/derivation.nix new file mode 100644 index 000000000000..9355cc3a96f0 --- /dev/null +++ b/tvix/glue/src/derivation.nix @@ -0,0 +1,36 @@ +# LGPL-2.1-or-later +# +# taken from: https://github.com/NixOS/nix/blob/master/src/libexpr/primops/derivation.nix +# +# TODO: rewrite in native Rust code + +/* This is the implementation of the ‘derivation’ builtin function. + It's actually a wrapper around the ‘derivationStrict’ primop. */ + +drvAttrs @ { outputs ? [ "out" ], ... }: + +let + + strict = derivationStrict drvAttrs; + + commonAttrs = drvAttrs // (builtins.listToAttrs outputsList) // + { + all = map (x: x.value) outputsList; + inherit drvAttrs; + }; + + outputToAttrListElement = outputName: + { + name = outputName; + value = commonAttrs // { + outPath = builtins.getAttr outputName strict; + drvPath = strict.drvPath; + type = "derivation"; + inherit outputName; + }; + }; + + outputsList = map outputToAttrListElement outputs; + +in +(builtins.head outputsList).value diff --git a/tvix/glue/src/derivation.rs b/tvix/glue/src/derivation.rs new file mode 100644 index 000000000000..86a271a3966d --- /dev/null +++ b/tvix/glue/src/derivation.rs @@ -0,0 +1,601 @@ +//! Implements `builtins.derivation`, the core of what makes Nix build packages. +use nix_compat::derivation::{Derivation, Output}; +use nix_compat::nixhash; +use std::cell::RefCell; +use std::collections::{btree_map, BTreeSet}; +use std::rc::Rc; +use tvix_eval::builtin_macros::builtins; +use tvix_eval::generators::{self, emit_warning_kind, GenCo}; +use tvix_eval::{ + AddContext, CatchableErrorKind, CoercionKind, ErrorKind, NixAttrs, NixList, Value, WarningKind, +}; + +use crate::errors::Error; +use crate::known_paths::{KnownPaths, PathKind, PathName}; + +// Constants used for strangely named fields in derivation inputs. +const STRUCTURED_ATTRS: &str = "__structuredAttrs"; +const IGNORE_NULLS: &str = "__ignoreNulls"; + +/// Helper function for populating the `drv.outputs` field from a +/// manually specified set of outputs, instead of the default +/// `outputs`. +async fn populate_outputs( + co: &GenCo, + drv: &mut Derivation, + outputs: NixList, +) -> Result<(), ErrorKind> { + // Remove the original default `out` output. + drv.outputs.clear(); + + for output in outputs { + let output_name = generators::request_force(co, output) + .await + .to_str() + .context("determining output name")?; + + if drv + .outputs + .insert(output_name.as_str().into(), Default::default()) + .is_some() + { + return Err(Error::DuplicateOutput(output_name.as_str().into()).into()); + } + } + + Ok(()) +} + +/// Populate the inputs of a derivation from the build references +/// found when scanning the derivation's parameters. +fn populate_inputs<I: IntoIterator<Item = PathName>>( + drv: &mut Derivation, + known_paths: &KnownPaths, + references: I, +) { + for reference in references.into_iter() { + let reference = &known_paths[&reference]; + match &reference.kind { + PathKind::Plain => { + drv.input_sources.insert(reference.path.clone()); + } + + PathKind::Output { name, derivation } => { + match drv.input_derivations.entry(derivation.clone()) { + btree_map::Entry::Vacant(entry) => { + entry.insert(BTreeSet::from([name.clone()])); + } + + btree_map::Entry::Occupied(mut entry) => { + entry.get_mut().insert(name.clone()); + } + } + } + + PathKind::Derivation { output_names } => { + match drv.input_derivations.entry(reference.path.clone()) { + btree_map::Entry::Vacant(entry) => { + entry.insert(output_names.clone()); + } + + btree_map::Entry::Occupied(mut entry) => { + entry.get_mut().extend(output_names.clone().into_iter()); + } + } + } + } + } +} + +/// Populate the output configuration of a derivation based on the +/// parameters passed to the call, configuring a fixed-output derivation output +/// if necessary. +/// +/// This function handles all possible combinations of the +/// parameters, including invalid ones. +/// +/// Due to the support for SRI hashes, and how these are passed along to +/// builtins.derivation, outputHash and outputHashAlgo can have values which +/// need to be further modified before constructing the Derivation struct. +/// +/// If outputHashAlgo is an SRI hash, outputHashAlgo must either be an empty +/// string, or the hash algorithm as specified in the (single) SRI (entry). +/// SRI strings with multiple hash algorithms are not supported. +/// +/// In case an SRI string was used, the (single) fixed output is populated +/// with the hash algo name, and the hash digest is populated with the +/// (lowercase) hex encoding of the digest. +/// +/// These values are only rewritten for the outputs, not what's passed to env. +fn handle_fixed_output( + drv: &mut Derivation, + hash_str: Option<String>, // in nix: outputHash + hash_algo_str: Option<String>, // in nix: outputHashAlgo + hash_mode_str: Option<String>, // in nix: outputHashmode +) -> Result<(), ErrorKind> { + // If outputHash is provided, ensure hash_algo_str is compatible. + // If outputHash is not provided, do nothing. + if let Some(hash_str) = hash_str { + // treat an empty algo as None + let hash_algo_str = match hash_algo_str { + Some(s) if s.is_empty() => None, + Some(s) => Some(s), + None => None, + }; + + // construct a NixHash. + let nixhash = nixhash::from_str(&hash_str, hash_algo_str.as_deref()) + .map_err(Error::InvalidOutputHash)?; + + // construct the fixed output. + drv.outputs.insert( + "out".to_string(), + Output { + path: "".to_string(), + ca_hash: match hash_mode_str.as_deref() { + None | Some("flat") => Some(nixhash::CAHash::Flat(nixhash)), + Some("recursive") => Some(nixhash::CAHash::Nar(nixhash)), + Some(other) => return Err(Error::InvalidOutputHashMode(other.to_string()))?, + }, + }, + ); + } + Ok(()) +} + +/// Handles derivation parameters which are not just forwarded to +/// the environment. The return value indicates whether the +/// parameter should be included in the environment. +async fn handle_derivation_parameters( + drv: &mut Derivation, + co: &GenCo, + name: &str, + value: &Value, + val_str: &str, +) -> Result<Result<bool, CatchableErrorKind>, ErrorKind> { + match name { + IGNORE_NULLS => return Ok(Ok(false)), + + // Command line arguments to the builder. + "args" => { + let args = value.to_list()?; + for arg in args { + match strong_coerce_to_string(co, arg).await? { + Err(cek) => return Ok(Err(cek)), + Ok(s) => drv.arguments.push(s), + } + } + + // The arguments do not appear in the environment. + return Ok(Ok(false)); + } + + // Explicitly specified drv outputs (instead of default [ "out" ]) + "outputs" => { + let outputs = value + .to_list() + .context("looking at the `outputs` parameter of the derivation")?; + + populate_outputs(co, drv, outputs).await?; + } + + "builder" => { + drv.builder = val_str.to_string(); + } + + "system" => { + drv.system = val_str.to_string(); + } + + _ => {} + } + + Ok(Ok(true)) +} + +async fn strong_coerce_to_string( + co: &GenCo, + val: Value, +) -> Result<Result<String, CatchableErrorKind>, ErrorKind> { + let val = generators::request_force(co, val).await; + match generators::request_string_coerce(co, val, CoercionKind::Strong).await { + Err(cek) => Ok(Err(cek)), + Ok(val_str) => Ok(Ok(val_str.as_str().to_string())), + } +} + +#[builtins(state = "Rc<RefCell<KnownPaths>>")] +mod derivation_builtins { + use super::*; + use nix_compat::store_path::hash_placeholder; + use tvix_eval::generators::Gen; + + #[builtin("placeholder")] + async fn builtin_placeholder(co: GenCo, input: Value) -> Result<Value, ErrorKind> { + let placeholder = hash_placeholder( + input + .to_str() + .context("looking at output name in builtins.placeholder")? + .as_str(), + ); + + Ok(placeholder.into()) + } + + /// Strictly construct a Nix derivation from the supplied arguments. + /// + /// This is considered an internal function, users usually want to + /// use the higher-level `builtins.derivation` instead. + #[builtin("derivationStrict")] + async fn builtin_derivation_strict( + state: Rc<RefCell<KnownPaths>>, + co: GenCo, + input: Value, + ) -> Result<Value, ErrorKind> { + let input = input.to_attrs()?; + let name = generators::request_force(&co, input.select_required("name")?.clone()) + .await + .to_str() + .context("determining derivation name")?; + + if name.is_empty() { + return Err(ErrorKind::Abort("derivation has empty name".to_string())); + } + + // Check whether attributes should be passed as a JSON file. + // TODO: the JSON serialisation has to happen here. + if let Some(sa) = input.select(STRUCTURED_ATTRS) { + if generators::request_force(&co, sa.clone()).await.as_bool()? { + return Err(ErrorKind::NotImplemented(STRUCTURED_ATTRS)); + } + } + + // Check whether null attributes should be ignored or passed through. + let ignore_nulls = match input.select(IGNORE_NULLS) { + Some(b) => generators::request_force(&co, b.clone()).await.as_bool()?, + None => false, + }; + + let mut drv = Derivation::default(); + drv.outputs.insert("out".to_string(), Default::default()); + + async fn select_string( + co: &GenCo, + attrs: &NixAttrs, + key: &str, + ) -> Result<Result<Option<String>, CatchableErrorKind>, ErrorKind> { + if let Some(attr) = attrs.select(key) { + match strong_coerce_to_string(co, attr.clone()).await? { + Err(cek) => return Ok(Err(cek)), + Ok(str) => return Ok(Ok(Some(str))), + } + } + + Ok(Ok(None)) + } + + for (name, value) in input.clone().into_iter_sorted() { + let value = generators::request_force(&co, value).await; + if ignore_nulls && matches!(value, Value::Null) { + continue; + } + + match strong_coerce_to_string(&co, value.clone()).await? { + Err(cek) => return Ok(Value::Catchable(cek)), + Ok(val_str) => { + // handle_derivation_parameters tells us whether the + // argument should be added to the environment; continue + // to the next one otherwise + match handle_derivation_parameters( + &mut drv, + &co, + name.as_str(), + &value, + &val_str, + ) + .await? + { + Err(cek) => return Ok(Value::Catchable(cek)), + Ok(false) => continue, + _ => (), + } + + // Most of these are also added to the builder's environment in "raw" form. + if drv + .environment + .insert(name.as_str().to_string(), val_str.into()) + .is_some() + { + return Err(Error::DuplicateEnvVar(name.as_str().to_string()).into()); + } + } + } + } + + // Configure fixed-output derivations if required. + { + let output_hash = match select_string(&co, &input, "outputHash") + .await + .context("evaluating the `outputHash` parameter")? + { + Err(cek) => return Ok(Value::Catchable(cek)), + Ok(s) => s, + }; + let output_hash_algo = match select_string(&co, &input, "outputHashAlgo") + .await + .context("evaluating the `outputHashAlgo` parameter")? + { + Err(cek) => return Ok(Value::Catchable(cek)), + Ok(s) => s, + }; + let output_hash_mode = match select_string(&co, &input, "outputHashMode") + .await + .context("evaluating the `outputHashMode` parameter")? + { + Err(cek) => return Ok(Value::Catchable(cek)), + Ok(s) => s, + }; + handle_fixed_output(&mut drv, output_hash, output_hash_algo, output_hash_mode)?; + } + + // Scan references in relevant attributes to detect any build-references. + let references = { + let state = state.borrow(); + if state.is_empty() { + // skip reference scanning, create an empty result + Default::default() + } else { + let mut refscan = state.reference_scanner(); + drv.arguments.iter().for_each(|s| refscan.scan(s)); + drv.environment.values().for_each(|s| refscan.scan(s)); + refscan.scan(&drv.builder); + refscan.finalise() + } + }; + + // Each output name needs to exist in the environment, at this + // point initialised as an empty string because that is the + // way of Golang ;) + for output in drv.outputs.keys() { + if drv + .environment + .insert(output.to_string(), String::new().into()) + .is_some() + { + emit_warning_kind(&co, WarningKind::ShadowedOutput(output.to_string())).await; + } + } + + let mut known_paths = state.borrow_mut(); + populate_inputs(&mut drv, &known_paths, references); + + // At this point, derivation fields are fully populated from + // eval data structures. + drv.validate(false).map_err(Error::InvalidDerivation)?; + + // Calculate the derivation_or_fod_hash for the current derivation. + // This one is still intermediate (so not added to known_paths) + let derivation_or_fod_hash_tmp = + drv.derivation_or_fod_hash(|drv| known_paths.get_hash_derivation_modulo(drv)); + + // Mutate the Derivation struct and set output paths + drv.calculate_output_paths(&name, &derivation_or_fod_hash_tmp) + .map_err(Error::InvalidDerivation)?; + + let derivation_path = drv + .calculate_derivation_path(&name) + .map_err(Error::InvalidDerivation)?; + + // recompute the hash derivation modulo and add to known_paths + let derivation_or_fod_hash_final = + drv.derivation_or_fod_hash(|drv| known_paths.get_hash_derivation_modulo(drv)); + + known_paths.add_hash_derivation_modulo( + derivation_path.to_absolute_path(), + &derivation_or_fod_hash_final, + ); + + // mark all the new paths as known + let output_names: Vec<String> = drv.outputs.keys().map(Clone::clone).collect(); + known_paths.drv(derivation_path.to_absolute_path(), &output_names); + + for (output_name, output) in &drv.outputs { + known_paths.output( + &output.path, + output_name, + derivation_path.to_absolute_path(), + ); + } + + let mut new_attrs: Vec<(String, String)> = drv + .outputs + .into_iter() + .map(|(name, output)| (name, output.path)) + .collect(); + + new_attrs.push(("drvPath".to_string(), derivation_path.to_absolute_path())); + + Ok(Value::Attrs(Box::new(NixAttrs::from_iter( + new_attrs.into_iter(), + )))) + } + + #[builtin("toFile")] + async fn builtin_to_file( + state: Rc<RefCell<KnownPaths>>, + co: GenCo, + name: Value, + content: Value, + ) -> Result<Value, ErrorKind> { + let name = name + .to_str() + .context("evaluating the `name` parameter of builtins.toFile")?; + let content = content + .to_str() + .context("evaluating the `content` parameter of builtins.toFile")?; + + let mut refscan = state.borrow().reference_scanner(); + refscan.scan(content.as_str()); + let refs = { + let paths = state.borrow(); + refscan + .finalise() + .into_iter() + .map(|path| paths[&path].path.to_string()) + .collect::<Vec<_>>() + }; + + // TODO: fail on derivation references (only "plain" is allowed here) + + let path = nix_compat::store_path::build_text_path(name.as_str(), content.as_str(), refs) + .map_err(|_e| { + nix_compat::derivation::DerivationError::InvalidOutputName( + name.as_str().to_string(), + ) + }) + .map_err(Error::InvalidDerivation)? + .to_absolute_path(); + + state.borrow_mut().plain(&path); + + // TODO: actually persist the file in the store at that path ... + + Ok(Value::String(path.into())) + } +} + +pub use derivation_builtins::builtins as derivation_builtins; + +#[cfg(test)] +mod tests { + use crate::known_paths::KnownPaths; + use nix_compat::store_path::hash_placeholder; + use std::{cell::RefCell, rc::Rc}; + use test_case::test_case; + use tvix_eval::EvaluationResult; + + /// evaluates a given nix expression and returns the result. + /// Takes care of setting up the evaluator so it knows about the + // `derivation` builtin. + fn eval(str: &str) -> EvaluationResult { + let mut eval = tvix_eval::Evaluation::new_impure(str, None); + + let known_paths: Rc<RefCell<KnownPaths>> = Default::default(); + + eval.builtins + .extend(crate::derivation::derivation_builtins(known_paths)); + + // Add the actual `builtins.derivation` from compiled Nix code + eval.src_builtins + .push(("derivation", include_str!("derivation.nix"))); + + // run the evaluation itself. + eval.evaluate() + } + + #[test] + fn derivation() { + let result = eval( + r#"(derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux";}).outPath"#, + ); + + assert!(result.errors.is_empty(), "expect evaluation to succeed"); + let value = result.value.expect("must be some"); + + match value { + tvix_eval::Value::String(s) => { + assert_eq!( + "/nix/store/xpcvxsx5sw4rbq666blz6sxqlmsqphmr-foo", + s.as_str() + ); + } + _ => panic!("unexpected value type: {:?}", value), + } + } + + /// a derivation with an empty name is an error. + #[test] + fn derivation_empty_name_fail() { + let result = eval( + r#"(derivation { name = ""; builder = "/bin/sh"; system = "x86_64-linux";}).outPath"#, + ); + + assert!(!result.errors.is_empty(), "expect evaluation to fail"); + } + + /// construct some calls to builtins.derivation and compare produced output + /// paths. + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/17wgs52s7kcamcyin4ja58njkf91ipq8-foo"; "r:sha256")] + #[test_case(r#"(builtins.derivation { name = "foo2"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/gi0p8vd635vpk1nq029cz3aa3jkhar5k-foo2"; "r:sha256 other name")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha1"; outputHash = "sha1-VUCRC+16gU5lcrLYHlPSUyx0Y/Q="; }).outPath"#, "/nix/store/p5sammmhpa84ama7ymkbgwwzrilva24x-foo"; "r:sha1")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "md5"; outputHash = "md5-07BzhNET7exJ6qYjitX/AA=="; }).outPath"#, "/nix/store/gmmxgpy1jrzs86r5y05wy6wiy2m15xgi-foo"; "r:md5")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha512"; outputHash = "sha512-DPkYCnZKuoY6Z7bXLwkYvBMcZ3JkLLLc5aNPCnAvlHDdwr8SXBIZixmVwjPDS0r9NGxUojNMNQqUilG26LTmtg=="; }).outPath"#, "/nix/store/lfi2bfyyap88y45mfdwi4j99gkaxaj19-foo"; "r:sha512")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "4374173a8cbe88de152b609f96f46e958bcf65762017474eec5a05ec2bd61530"; }).outPath"#, "/nix/store/17wgs52s7kcamcyin4ja58njkf91ipq8-foo"; "r:sha256 base16")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "0c0msqmyq1asxi74f5r0frjwz2wmdvs9d7v05caxx25yihx1fx23"; }).outPath"#, "/nix/store/17wgs52s7kcamcyin4ja58njkf91ipq8-foo"; "r:sha256 nixbase32")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/17wgs52s7kcamcyin4ja58njkf91ipq8-foo"; "r:sha256 base64")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "sha256-fgIr3TyFGDAXP5+qoAaiMKDg/a1MlT6Fv/S/DaA24S8="; }).outPath"#, "/nix/store/xm1l9dx4zgycv9qdhcqqvji1z88z534b-foo"; "r:sha256 base64 nopad")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "flat"; outputHashAlgo = "sha256"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/q4pkwkxdib797fhk22p0k3g1q32jmxvf-foo"; "sha256")] + #[test_case(r#"(builtins.derivation { name = "foo2"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "flat"; outputHashAlgo = "sha256"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/znw17xlmx9r6gw8izjkqxkl6s28sza4l-foo2"; "sha256 other name")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "flat"; outputHashAlgo = "sha1"; outputHash = "sha1-VUCRC+16gU5lcrLYHlPSUyx0Y/Q="; }).outPath"#, "/nix/store/zgpnjjmga53d8srp8chh3m9fn7nnbdv6-foo"; "sha1")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "flat"; outputHashAlgo = "md5"; outputHash = "md5-07BzhNET7exJ6qYjitX/AA=="; }).outPath"#, "/nix/store/jfhcwnq1852ccy9ad9nakybp2wadngnd-foo"; "md5")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "flat"; outputHashAlgo = "sha512"; outputHash = "sha512-DPkYCnZKuoY6Z7bXLwkYvBMcZ3JkLLLc5aNPCnAvlHDdwr8SXBIZixmVwjPDS0r9NGxUojNMNQqUilG26LTmtg=="; }).outPath"#, "/nix/store/as736rr116ian9qzg457f96j52ki8bm3-foo"; "sha512")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/17wgs52s7kcamcyin4ja58njkf91ipq8-foo"; "r:sha256 outputHashAlgo omitted")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/q4pkwkxdib797fhk22p0k3g1q32jmxvf-foo"; "r:sha256 outputHashAlgo and outputHashMode omitted")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; }).outPath"#, "/nix/store/xpcvxsx5sw4rbq666blz6sxqlmsqphmr-foo"; "outputHash* omitted")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; outputs = ["foo" "bar"]; system = "x86_64-linux"; }).outPath"#, "/nix/store/hkwdinvz2jpzgnjy9lv34d2zxvclj4s3-foo-foo"; "multiple outputs")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; args = ["--foo" "42" "--bar"]; system = "x86_64-linux"; }).outPath"#, "/nix/store/365gi78n2z7vwc1bvgb98k0a9cqfp6as-foo"; "args")] + #[test_case(r#" + let + bar = builtins.derivation { + name = "bar"; + builder = ":"; + system = ":"; + outputHash = "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba"; + outputHashAlgo = "sha256"; + outputHashMode = "recursive"; + }; + in + (builtins.derivation { + name = "foo"; + builder = ":"; + system = ":"; + inherit bar; + }).outPath + "#, "/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo"; "full")] + fn test_outpath(code: &str, expected_path: &str) { + let value = eval(code).value.expect("must succeed"); + + match value { + tvix_eval::Value::String(s) => { + assert_eq!(expected_path, s.as_str()); + } + _ => panic!("unexpected value type: {:?}", value), + } + } + + /// construct some calls to builtins.derivation that should be rejected + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "sha256-00"; }).outPath"#; "invalid outputhash")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha1"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#; "sha1 and sha256")] + #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; outputs = ["foo" "foo"]; system = "x86_64-linux"; }).outPath"#; "duplicate output names")] + fn test_outpath_invalid(code: &str) { + let resp = eval(code); + assert!(resp.value.is_none(), "Value should be None"); + assert!( + !resp.errors.is_empty(), + "There should have been some errors" + ); + } + + #[test] + fn builtins_placeholder_hashes() { + assert_eq!( + hash_placeholder("out").as_str(), + "/1rz4g4znpzjwh1xymhjpm42vipw92pr73vdgl6xs1hycac8kf2n9" + ); + + assert_eq!( + hash_placeholder("").as_str(), + "/171rf4jhx57xqz3p7swniwkig249cif71pa08p80mgaf0mqz5bmr" + ); + } +} diff --git a/tvix/glue/src/errors.rs b/tvix/glue/src/errors.rs new file mode 100644 index 000000000000..5cbddcbba811 --- /dev/null +++ b/tvix/glue/src/errors.rs @@ -0,0 +1,26 @@ +use nix_compat::{derivation::DerivationError, nixhash}; +use std::rc::Rc; +use thiserror::Error; + +/// Errors related to derivation construction +#[derive(Debug, Error)] +pub enum Error { + #[error("an output with the name '{0}' is already defined")] + DuplicateOutput(String), + #[error("fixed-output derivations can only have the default `out`-output")] + ConflictingOutputTypes, + #[error("the environment variable '{0}' has already been set in this derivation")] + DuplicateEnvVar(String), + #[error("invalid derivation parameters: {0}")] + InvalidDerivation(DerivationError), + #[error("invalid output hash: {0}")] + InvalidOutputHash(nixhash::Error), + #[error("invalid output hash mode: '{0}', only 'recursive' and 'flat` are supported")] + InvalidOutputHashMode(String), +} + +impl From<Error> for tvix_eval::ErrorKind { + fn from(err: Error) -> Self { + tvix_eval::ErrorKind::TvixError(Rc::new(err)) + } +} diff --git a/tvix/glue/src/fetchurl.nix b/tvix/glue/src/fetchurl.nix new file mode 100644 index 000000000000..3f182a5a319b --- /dev/null +++ b/tvix/glue/src/fetchurl.nix @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: LGPL-2.1 +# +# This file is vendored from C++ Nix, as it needs to be bundled with +# an evaluator to be able to evaluate nixpkgs. +# +# Source: https://github.com/NixOS/nix/blob/2.3.16/corepkgs/fetchurl.nix + +{ system ? "" # obsolete +, url +, hash ? "" # an SRI hash + + # Legacy hash specification +, md5 ? "" +, sha1 ? "" +, sha256 ? "" +, sha512 ? "" +, outputHash ? if hash != "" then hash else if sha512 != "" then sha512 else if sha1 != "" then sha1 else if md5 != "" then md5 else sha256 +, outputHashAlgo ? if hash != "" then "" else if sha512 != "" then "sha512" else if sha1 != "" then "sha1" else if md5 != "" then "md5" else "sha256" + +, executable ? false +, unpack ? false +, name ? baseNameOf (toString url) +}: + +derivation { + builder = "builtin:fetchurl"; + + # New-style output content requirements. + inherit outputHashAlgo outputHash; + outputHashMode = if unpack || executable then "recursive" else "flat"; + + inherit name url executable unpack; + + system = "builtin"; + + # No need to double the amount of network traffic + preferLocalBuild = true; + + impureEnvVars = [ + # We borrow these environment variables from the caller to allow + # easy proxy configuration. This is impure, but a fixed-output + # derivation like fetchurl is allowed to do so since its result is + # by definition pure. + "http_proxy" + "https_proxy" + "ftp_proxy" + "all_proxy" + "no_proxy" + ]; + + # To make "nix-prefetch-url" work. + urls = [ url ]; +} diff --git a/tvix/glue/src/known_paths.rs b/tvix/glue/src/known_paths.rs new file mode 100644 index 000000000000..07373ef0da7a --- /dev/null +++ b/tvix/glue/src/known_paths.rs @@ -0,0 +1,186 @@ +//! This module implements logic required for persisting known paths +//! during an evaluation. +//! +//! Tvix needs to be able to keep track of each Nix store path that it +//! knows about during the scope of a single evaluation and its +//! related builds. +//! +//! This data is required to scan derivation inputs for the build +//! references (the "build closure") that they make use of. +//! +//! Please see //tvix/eval/docs/build-references.md for more +//! information. + +use crate::refscan::{ReferenceScanner, STORE_PATH_LEN}; +use nix_compat::nixhash::NixHash; +use std::{ + collections::{hash_map, BTreeSet, HashMap}, + ops::Index, +}; + +#[derive(Debug, PartialEq)] +pub enum PathKind { + /// A literal derivation (`.drv`-file), and the *names* of its outputs. + Derivation { output_names: BTreeSet<String> }, + + /// An output of a derivation, its name, and the path of its derivation. + Output { name: String, derivation: String }, + + /// A plain store path (e.g. source files copied to the store). + Plain, +} + +#[derive(Debug, PartialEq)] +pub struct KnownPath { + pub path: String, + pub kind: PathKind, +} + +impl KnownPath { + fn new(path: String, kind: PathKind) -> Self { + KnownPath { path, kind } + } +} + +/// Internal struct to prevent accidental leaks of the truncated path +/// names. +#[repr(transparent)] +#[derive(Clone, Debug, Default, PartialEq, PartialOrd, Ord, Eq, Hash)] +pub struct PathName(String); + +impl From<&str> for PathName { + fn from(s: &str) -> Self { + PathName(s[..STORE_PATH_LEN].to_string()) + } +} + +/// This instance is required to pass PathName instances as needles to +/// the reference scanner. +impl AsRef<[u8]> for PathName { + fn as_ref(&self) -> &[u8] { + self.0.as_ref() + } +} + +#[derive(Debug, Default)] +pub struct KnownPaths { + /// All known paths, keyed by a truncated version of their store + /// path used for reference scanning. + paths: HashMap<PathName, KnownPath>, + + /// All known derivation or FOD hashes. + /// + /// Keys are derivation paths, values is the NixHash. + derivation_or_fod_hashes: HashMap<String, NixHash>, +} + +impl Index<&PathName> for KnownPaths { + type Output = KnownPath; + + fn index(&self, index: &PathName) -> &Self::Output { + &self.paths[index] + } +} + +impl KnownPaths { + fn insert_path(&mut self, path: String, path_kind: PathKind) { + match self.paths.entry(path.as_str().into()) { + hash_map::Entry::Vacant(entry) => { + entry.insert(KnownPath::new(path, path_kind)); + } + + hash_map::Entry::Occupied(mut entry) => { + match (path_kind, &mut entry.get_mut().kind) { + // These variant combinations require no "merging action". + (PathKind::Plain, PathKind::Plain) => (), + (PathKind::Output { .. }, PathKind::Output { .. }) => (), + + ( + PathKind::Derivation { output_names: new }, + PathKind::Derivation { + output_names: ref mut old, + }, + ) => { + old.extend(new); + } + + _ => panic!( + "path '{}' inserted twice with different types", + entry.key().0 + ), + }; + } + }; + } + + /// Mark a plain path as known. + pub fn plain<S: ToString>(&mut self, path: S) { + self.insert_path(path.to_string(), PathKind::Plain); + } + + /// Mark a derivation as known. + pub fn drv<P: ToString, O: ToString>(&mut self, path: P, outputs: &[O]) { + self.insert_path( + path.to_string(), + PathKind::Derivation { + output_names: outputs.iter().map(ToString::to_string).collect(), + }, + ); + } + + /// Mark a derivation output path as known. + pub fn output<P: ToString, N: ToString, D: ToString>( + &mut self, + output_path: P, + name: N, + drv_path: D, + ) { + self.insert_path( + output_path.to_string(), + PathKind::Output { + name: name.to_string(), + derivation: drv_path.to_string(), + }, + ); + } + + /// Checks whether there are any known paths. If not, a reference + /// scanner can not be created. + pub fn is_empty(&self) -> bool { + self.paths.is_empty() + } + + /// Create a reference scanner from the current set of known paths. + pub fn reference_scanner(&self) -> ReferenceScanner<PathName> { + let candidates = self.paths.keys().map(Clone::clone).collect(); + ReferenceScanner::new(candidates) + } + + /// Fetch the opaque "hash derivation modulo" for a given derivation path. + pub fn get_hash_derivation_modulo(&self, drv_path: &str) -> NixHash { + // TODO: we rely on an invariant that things *should* have + // been calculated if we get this far. + self.derivation_or_fod_hashes[drv_path].clone() + } + + pub fn add_hash_derivation_modulo<D: ToString>( + &mut self, + drv: D, + hash_derivation_modulo: &NixHash, + ) { + #[allow(unused_variables)] // assertions on this only compiled in debug builds + let old = self + .derivation_or_fod_hashes + .insert(drv.to_string(), hash_derivation_modulo.to_owned()); + + #[cfg(debug_assertions)] + { + if let Some(old) = old { + debug_assert!( + old == *hash_derivation_modulo, + "hash derivation modulo for a given derivation should always be calculated the same" + ); + } + } + } +} diff --git a/tvix/glue/src/lib.rs b/tvix/glue/src/lib.rs new file mode 100644 index 000000000000..acb81d31445f --- /dev/null +++ b/tvix/glue/src/lib.rs @@ -0,0 +1,28 @@ +use std::{cell::RefCell, rc::Rc}; + +use known_paths::KnownPaths; + +pub mod derivation; +pub mod errors; +pub mod known_paths; +pub mod refscan; +pub mod tvix_io; +pub mod tvix_store_io; + +/// Adds derivation-related builtins to the passed [tvix_eval::Evaluation]. +/// +/// These are `derivation` and `derivationStrict`. +/// +/// As they need to interact with `known_paths`, we also need to pass in +/// `known_paths`. +pub fn add_derivation_builtins( + eval: &mut tvix_eval::Evaluation, + known_paths: Rc<RefCell<KnownPaths>>, +) { + eval.builtins + .extend(derivation::derivation_builtins(known_paths)); + + // Add the actual `builtins.derivation` from compiled Nix code + eval.src_builtins + .push(("derivation", include_str!("derivation.nix"))); +} diff --git a/tvix/glue/src/refscan.rs b/tvix/glue/src/refscan.rs new file mode 100644 index 000000000000..0e0bb6c77828 --- /dev/null +++ b/tvix/glue/src/refscan.rs @@ -0,0 +1,115 @@ +//! Simple scanner for non-overlapping, known references of Nix store paths in a +//! given string. +//! +//! This is used for determining build references (see +//! //tvix/eval/docs/build-references.md for more details). +//! +//! The scanner itself is using the Wu-Manber string-matching algorithm, using +//! our fork of the `wu-mamber` crate. + +use std::collections::BTreeSet; +use wu_manber::TwoByteWM; + +pub const STORE_PATH_LEN: usize = "/nix/store/00000000000000000000000000000000".len(); + +/// Represents a "primed" reference scanner with an automaton that knows the set +/// of store paths to scan for. +pub struct ReferenceScanner<P: Ord + AsRef<[u8]>> { + candidates: Vec<P>, + searcher: Option<TwoByteWM>, + matches: Vec<usize>, +} + +impl<P: Clone + Ord + AsRef<[u8]>> ReferenceScanner<P> { + /// Construct a new `ReferenceScanner` that knows how to scan for the given + /// candidate store paths. + pub fn new(candidates: Vec<P>) -> Self { + let searcher = if candidates.is_empty() { + None + } else { + Some(TwoByteWM::new(&candidates)) + }; + + ReferenceScanner { + searcher, + candidates, + matches: Default::default(), + } + } + + /// Scan the given str for all non-overlapping matches and collect them + /// in the scanner. + pub fn scan<S: AsRef<[u8]>>(&mut self, haystack: S) { + if haystack.as_ref().len() < STORE_PATH_LEN { + return; + } + + if let Some(searcher) = &self.searcher { + for m in searcher.find(haystack) { + self.matches.push(m.pat_idx); + } + } + } + + /// Finalise the reference scanner and return the resulting matches. + pub fn finalise(self) -> BTreeSet<P> { + self.matches + .into_iter() + .map(|idx| self.candidates[idx].clone()) + .collect() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // The actual derivation of `nixpkgs.hello`. + const HELLO_DRV: &str = r#"Derive([("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1","","")],[("/nix/store/6z1jfnqqgyqr221zgbpm30v91yfj3r45-bash-5.1-p16.drv",["out"]),("/nix/store/ap9g09fxbicj836zm88d56dn3ff4clxl-stdenv-linux.drv",["out"]),("/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",["out"])],["/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs",""),("builder","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash"),("cmakeFlags",""),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck","1"),("doInstallCheck",""),("mesonFlags",""),("name","hello-2.12.1"),("nativeBuildInputs",""),("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1"),("outputs","out"),("patches",""),("pname","hello"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/pa10z4ngm0g83kx9mssrqzz30s84vq7k-hello-2.12.1.tar.gz"),("stdenv","/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux"),("strictDeps",""),("system","x86_64-linux"),("version","2.12.1")])"#; + + #[test] + fn test_no_patterns() { + let mut scanner: ReferenceScanner<String> = ReferenceScanner::new(vec![]); + + scanner.scan(HELLO_DRV); + + let result = scanner.finalise(); + + assert_eq!(result.len(), 0); + } + + #[test] + fn test_single_match() { + let mut scanner = ReferenceScanner::new(vec![ + "/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".to_string(), + ]); + scanner.scan(HELLO_DRV); + + let result = scanner.finalise(); + + assert_eq!(result.len(), 1); + assert!(result.contains("/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16")); + } + + #[test] + fn test_multiple_matches() { + let candidates = vec![ + // these exist in the drv: + "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".to_string(), + "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".to_string(), + "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".to_string(), + // this doesn't: + "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".to_string(), + ]; + + let mut scanner = ReferenceScanner::new(candidates.clone()); + scanner.scan(HELLO_DRV); + + let result = scanner.finalise(); + assert_eq!(result.len(), 3); + + for c in candidates[..3].iter() { + assert!(result.contains(c)); + } + } +} diff --git a/tvix/glue/src/tvix_io.rs b/tvix/glue/src/tvix_io.rs new file mode 100644 index 000000000000..caadbeb5e663 --- /dev/null +++ b/tvix/glue/src/tvix_io.rs @@ -0,0 +1,80 @@ +//! This module implements a wrapper around tvix-eval's [EvalIO] type, +//! adding functionality which is required by tvix-cli: +//! +//! 1. Marking plain paths known to the reference scanner. +//! 2. Handling the C++ Nix `__corepkgs__`-hack for nixpkgs bootstrapping. +//! +//! All uses of [EvalIO] in tvix-cli must make use of this wrapper, +//! otherwise fundamental features like nixpkgs bootstrapping and hash +//! calculation will not work. + +use std::cell::RefCell; +use std::io; +use std::path::{Path, PathBuf}; +use std::rc::Rc; +use tvix_eval::{EvalIO, FileType}; + +use crate::known_paths::KnownPaths; + +// TODO: Merge this together with TvixStoreIO? +pub struct TvixIO<T: EvalIO> { + /// Ingested paths must be reported to this known paths tracker + /// for accurate build reference scanning. + known_paths: Rc<RefCell<KnownPaths>>, + + // Actual underlying [EvalIO] implementation. + actual: T, +} + +impl<T: EvalIO> TvixIO<T> { + pub fn new(known_paths: Rc<RefCell<KnownPaths>>, actual: T) -> Self { + Self { + known_paths, + actual, + } + } +} + +impl<T: EvalIO> EvalIO for TvixIO<T> { + fn store_dir(&self) -> Option<String> { + self.actual.store_dir() + } + + fn import_path(&self, path: &Path) -> Result<PathBuf, io::Error> { + let imported_path = self.actual.import_path(path)?; + self.known_paths + .borrow_mut() + .plain(imported_path.to_string_lossy()); + + Ok(imported_path) + } + + fn path_exists(&self, path: &Path) -> Result<bool, io::Error> { + if path.starts_with("/__corepkgs__") { + return Ok(true); + } + + self.actual.path_exists(path) + } + + fn read_to_string(&self, path: &Path) -> Result<String, io::Error> { + // Bundled version of corepkgs/fetchurl.nix. The counterpart + // of this happens in `main`, where the `nix_path` of the + // evaluation has `nix=/__corepkgs__` added to it. + // + // This workaround is similar to what cppnix does for passing + // the path through. + // + // TODO: this comparison is bad and allocates, we should use + // the sane path library. + if path.starts_with("/__corepkgs__/fetchurl.nix") { + return Ok(include_str!("fetchurl.nix").to_string()); + } + + self.actual.read_to_string(path) + } + + fn read_dir(&self, path: &Path) -> Result<Vec<(bytes::Bytes, FileType)>, io::Error> { + self.actual.read_dir(path) + } +} diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs new file mode 100644 index 000000000000..9be896ffc40f --- /dev/null +++ b/tvix/glue/src/tvix_store_io.rs @@ -0,0 +1,359 @@ +//! This module provides an implementation of EvalIO talking to tvix-store. + +use nix_compat::store_path::{self, StorePath}; +use std::{io, path::Path, path::PathBuf, sync::Arc}; +use tokio::io::AsyncReadExt; +use tracing::{error, instrument, warn}; +use tvix_eval::{EvalIO, FileType, StdIO}; + +use tvix_castore::{ + blobservice::BlobService, + directoryservice::{self, DirectoryService}, + import, + proto::{node::Node, NamedNode}, + B3Digest, +}; +use tvix_store::{ + nar::calculate_size_and_sha256, + pathinfoservice::PathInfoService, + proto::{NarInfo, PathInfo}, +}; + +/// Implements [EvalIO], asking given [PathInfoService], [DirectoryService] +/// and [BlobService]. +/// +/// In case the given path does not exist in these stores, we ask StdIO. +/// This is to both cover cases of syntactically valid store paths, that exist +/// on the filesystem (still managed by Nix), as well as being able to read +/// files outside store paths. +pub struct TvixStoreIO { + blob_service: Arc<dyn BlobService>, + directory_service: Arc<dyn DirectoryService>, + path_info_service: Arc<dyn PathInfoService>, + std_io: StdIO, + tokio_handle: tokio::runtime::Handle, +} + +impl TvixStoreIO { + pub fn new( + blob_service: Arc<dyn BlobService>, + directory_service: Arc<dyn DirectoryService>, + path_info_service: Arc<dyn PathInfoService>, + tokio_handle: tokio::runtime::Handle, + ) -> Self { + Self { + blob_service, + directory_service, + path_info_service, + std_io: StdIO {}, + tokio_handle, + } + } + + /// for a given [StorePath] and additional [Path] inside the store path, + /// look up the [PathInfo], and if it exists, and then use + /// [directoryservice::traverse_to] to return the + /// [Node] specified by `sub_path`. + #[instrument(skip(self), ret, err)] + fn store_path_to_root_node( + &self, + store_path: &StorePath, + sub_path: &Path, + ) -> Result<Option<Node>, io::Error> { + let path_info_service = self.path_info_service.clone(); + let task = self.tokio_handle.spawn({ + let digest = *store_path.digest(); + async move { path_info_service.get(digest).await } + }); + let path_info = match self.tokio_handle.block_on(task).unwrap()? { + // If there's no PathInfo found, early exit + None => return Ok(None), + Some(path_info) => path_info, + }; + + let root_node = { + match path_info.node { + None => { + warn!( + "returned PathInfo {:?} node is None, this shouldn't happen.", + &path_info + ); + return Ok(None); + } + Some(root_node) => match root_node.node { + None => { + warn!("node for {:?} is None, this shouldn't happen.", &root_node); + return Ok(None); + } + Some(root_node) => root_node, + }, + } + }; + + let directory_service = self.directory_service.clone(); + let sub_path = sub_path.to_owned(); + let task = self.tokio_handle.spawn(async move { + directoryservice::descend_to(directory_service, root_node, &sub_path).await + }); + + Ok(self.tokio_handle.block_on(task).unwrap()?) + } +} + +impl EvalIO for TvixStoreIO { + #[instrument(skip(self), ret, err)] + fn path_exists(&self, path: &Path) -> Result<bool, io::Error> { + if let Ok((store_path, sub_path)) = + StorePath::from_absolute_path_full(&path.to_string_lossy()) + { + if self + .store_path_to_root_node(&store_path, &sub_path)? + .is_some() + { + Ok(true) + } else { + // As tvix-store doesn't manage /nix/store on the filesystem, + // we still need to also ask self.std_io here. + self.std_io.path_exists(path) + } + } else { + // The store path is no store path, so do regular StdIO. + self.std_io.path_exists(path) + } + } + + #[instrument(skip(self), ret, err)] + fn read_to_string(&self, path: &Path) -> Result<String, io::Error> { + if let Ok((store_path, sub_path)) = + StorePath::from_absolute_path_full(&path.to_string_lossy()) + { + if let Some(node) = self.store_path_to_root_node(&store_path, &sub_path)? { + // depending on the node type, treat read_to_string differently + match node { + Node::Directory(_) => { + // This would normally be a io::ErrorKind::IsADirectory (still unstable) + Err(io::Error::new( + io::ErrorKind::Unsupported, + format!("tried to read directory at {:?} to string", path), + )) + } + Node::File(file_node) => { + let digest: B3Digest = + file_node.digest.clone().try_into().map_err(|_e| { + error!( + file_node = ?file_node, + "invalid digest" + ); + io::Error::new( + io::ErrorKind::InvalidData, + format!("invalid digest length in file node: {:?}", file_node), + ) + })?; + + let blob_service = self.blob_service.clone(); + + let task = self.tokio_handle.spawn(async move { + let mut reader = { + let resp = blob_service.open_read(&digest).await?; + match resp { + Some(blob_reader) => blob_reader, + None => { + error!( + blob.digest = %digest, + "blob not found", + ); + Err(io::Error::new( + io::ErrorKind::NotFound, + format!("blob {} not found", &digest), + ))? + } + } + }; + + let mut buf = String::new(); + + reader.read_to_string(&mut buf).await?; + Ok(buf) + }); + + self.tokio_handle.block_on(task).unwrap() + } + Node::Symlink(_symlink_node) => Err(io::Error::new( + io::ErrorKind::Unsupported, + "read_to_string for symlinks is unsupported", + ))?, + } + } else { + // As tvix-store doesn't manage /nix/store on the filesystem, + // we still need to also ask self.std_io here. + self.std_io.read_to_string(path) + } + } else { + // The store path is no store path, so do regular StdIO. + self.std_io.read_to_string(path) + } + } + + #[instrument(skip(self), ret, err)] + fn read_dir(&self, path: &Path) -> Result<Vec<(bytes::Bytes, FileType)>, io::Error> { + if let Ok((store_path, sub_path)) = + StorePath::from_absolute_path_full(&path.to_string_lossy()) + { + if let Some(node) = self.store_path_to_root_node(&store_path, &sub_path)? { + match node { + Node::Directory(directory_node) => { + // fetch the Directory itself. + let digest: B3Digest = + directory_node.digest.clone().try_into().map_err(|_e| { + io::Error::new( + io::ErrorKind::InvalidData, + format!( + "invalid digest length in directory node: {:?}", + directory_node + ), + ) + })?; + + let directory_service = self.directory_service.clone(); + let digest_clone = digest.clone(); + let task = self + .tokio_handle + .spawn(async move { directory_service.get(&digest_clone).await }); + if let Some(directory) = self.tokio_handle.block_on(task).unwrap()? { + let mut children: Vec<(bytes::Bytes, FileType)> = Vec::new(); + for node in directory.nodes() { + children.push(match node { + Node::Directory(e) => (e.name, FileType::Directory), + Node::File(e) => (e.name, FileType::Regular), + Node::Symlink(e) => (e.name, FileType::Symlink), + }) + } + Ok(children) + } else { + // If we didn't get the directory node that's linked, that's a store inconsistency! + error!( + directory.digest = %digest, + path = ?path, + "directory not found", + ); + Err(io::Error::new( + io::ErrorKind::NotFound, + format!("directory {digest} does not exist"), + ))? + } + } + Node::File(_file_node) => { + // This would normally be a io::ErrorKind::NotADirectory (still unstable) + Err(io::Error::new( + io::ErrorKind::Unsupported, + "tried to readdir path {:?}, which is a file", + ))? + } + Node::Symlink(_symlink_node) => Err(io::Error::new( + io::ErrorKind::Unsupported, + "read_dir for symlinks is unsupported", + ))?, + } + } else { + self.std_io.read_dir(path) + } + } else { + self.std_io.read_dir(path) + } + } + + #[instrument(skip(self), ret, err)] + fn import_path(&self, path: &std::path::Path) -> Result<PathBuf, std::io::Error> { + let p = path.to_owned(); + let blob_service = self.blob_service.clone(); + let directory_service = self.directory_service.clone(); + let path_info_service = self.path_info_service.clone(); + + let task = self.tokio_handle.spawn(async move { + import_path_with_pathinfo(blob_service, directory_service, path_info_service, &p).await + }); + + let path_info = self.tokio_handle.block_on(task).unwrap()?; + + // from the [PathInfo], extract the store path (as string). + Ok({ + let mut path = PathBuf::from(nix_compat::store_path::STORE_DIR_WITH_SLASH); + + let root_node_name = path_info.node.unwrap().node.unwrap().get_name().to_vec(); + + // This must be a string, otherwise it would have failed validation. + let root_node_name = String::from_utf8(root_node_name).unwrap(); + + // append to the PathBuf + path.push(root_node_name); + + // and return it + path + }) + } + + #[instrument(skip(self), ret)] + fn store_dir(&self) -> Option<String> { + Some("/nix/store".to_string()) + } +} + +/// Imports a given path on the filesystem into the store, and returns the +/// [PathInfo] describing the path, that was sent to +/// [PathInfoService]. +#[instrument(skip(blob_service, directory_service, path_info_service), ret, err)] +async fn import_path_with_pathinfo( + blob_service: Arc<dyn BlobService>, + directory_service: Arc<dyn DirectoryService>, + path_info_service: Arc<dyn PathInfoService>, + path: &std::path::Path, +) -> Result<PathInfo, io::Error> { + // Call [import::ingest_path], which will walk over the given path and return a root_node. + let root_node = import::ingest_path(blob_service.clone(), directory_service.clone(), path) + .await + .expect("error during import_path"); + + // Render the NAR. + let (nar_size, nar_sha256) = + calculate_size_and_sha256(&root_node, blob_service.clone(), directory_service.clone()) + .await + .expect("error during nar calculation"); // TODO: handle error + + // TODO: make a path_to_name helper function? + let name = path + .file_name() + .expect("path must not be ..") + .to_str() + .expect("path must be valid unicode"); + + let output_path = store_path::build_nar_based_store_path(&nar_sha256, name); + + // assemble a new root_node with a name that is derived from the nar hash. + let root_node = root_node.rename(output_path.to_string().into_bytes().into()); + + // assemble the [PathInfo] object. + let path_info = PathInfo { + node: Some(tvix_castore::proto::Node { + node: Some(root_node), + }), + // There's no reference scanning on path contents ingested like this. + references: vec![], + narinfo: Some(NarInfo { + nar_size, + nar_sha256: nar_sha256.to_vec().into(), + signatures: vec![], + reference_names: vec![], + deriver: None, + ca: Some(tvix_store::proto::nar_info::Ca { + r#type: tvix_store::proto::nar_info::ca::Hash::NarSha256.into(), + digest: nar_sha256.to_vec().into(), + }), + }), + }; + + // put into [PathInfoService], and return the [PathInfo] that we get + // back from there (it might contain additional signatures). + let path_info = path_info_service.put(path_info).await?; + + Ok(path_info) +} |