diff options
Diffstat (limited to 'tvix/cli')
-rw-r--r-- | tvix/cli/Cargo.toml | 24 | ||||
-rw-r--r-- | tvix/cli/default.nix | 35 | ||||
-rw-r--r-- | tvix/cli/src/.skip-subtree | 1 | ||||
-rw-r--r-- | tvix/cli/src/derivation.nix | 36 | ||||
-rw-r--r-- | tvix/cli/src/derivation.rs | 783 | ||||
-rw-r--r-- | tvix/cli/src/errors.rs | 26 | ||||
-rw-r--r-- | tvix/cli/src/fetchurl.nix | 53 | ||||
-rw-r--r-- | tvix/cli/src/known_paths.rs | 186 | ||||
-rw-r--r-- | tvix/cli/src/main.rs | 287 | ||||
-rw-r--r-- | tvix/cli/src/refscan.rs | 115 | ||||
-rw-r--r-- | tvix/cli/src/tvix_io.rs | 79 | ||||
-rw-r--r-- | tvix/cli/src/tvix_store_io.rs | 356 |
12 files changed, 1981 insertions, 0 deletions
diff --git a/tvix/cli/Cargo.toml b/tvix/cli/Cargo.toml new file mode 100644 index 000000000000..94b010cc1efd --- /dev/null +++ b/tvix/cli/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "tvix-cli" +version = "0.1.0" +edition = "2021" + +[[bin]] +name = "tvix" +path = "src/main.rs" + +[dependencies] +nix-compat = { path = "../nix-compat" } +tvix-castore = { path = "../castore" } +tvix-store = { path = "../store", default-features = false, features = []} +tvix-eval = { path = "../eval" } +bytes = "1.4.0" +clap = { version = "4.0", features = ["derive", "env"] } +dirs = "4.0.0" +rustyline = "10.0.0" +thiserror = "1.0.38" +tracing = "0.1.37" +tokio = "1.28.0" + +[dependencies.wu-manber] +git = "https://github.com/tvlfyi/wu-manber.git" diff --git a/tvix/cli/default.nix b/tvix/cli/default.nix new file mode 100644 index 000000000000..0b73fa1aa59e --- /dev/null +++ b/tvix/cli/default.nix @@ -0,0 +1,35 @@ +{ depot, pkgs, lib, ... }: + +let + mkNixpkgsEvalCheck = attrset: expectedPath: { + label = ":nix: evaluate nixpkgs.${attrset} in tvix"; + needsOutput = true; + + command = pkgs.writeShellScript "tvix-eval-${builtins.replaceStrings [".drv"] ["-drv"] attrset}" '' + TVIX_OUTPUT=$(result/bin/tvix -E '(import ${pkgs.path} {}).${attrset}') + EXPECTED='${/* the verbatim expected Tvix output: */ "=> \"${expectedPath}\" :: string"}' + + echo "Tvix output: ''${TVIX_OUTPUT}" + if [ "$TVIX_OUTPUT" != "$EXPECTED" ]; then + echo "Correct would have been ''${EXPECTED}" + exit 1 + fi + + echo "Output was correct." + ''; + }; +in + +(depot.tvix.crates.workspaceMembers.tvix-cli.build.override { + runTests = true; +}).overrideAttrs (_: { + meta = { + ci.extraSteps = { + eval-nixpkgs-stdenv-drvpath = (mkNixpkgsEvalCheck "stdenv.drvPath" pkgs.stdenv.drvPath); + eval-nixpkgs-stdenv-outpath = (mkNixpkgsEvalCheck "stdenv.outPath" pkgs.stdenv.outPath); + eval-nixpkgs-hello-outpath = (mkNixpkgsEvalCheck "hello.outPath" pkgs.hello.outPath); + eval-nixpkgs-cross-stdenv-outpath = (mkNixpkgsEvalCheck "pkgsCross.aarch64-multiplatform.stdenv.outPath" pkgs.pkgsCross.aarch64-multiplatform.stdenv.outPath); + eval-nixpkgs-cross-hello-outpath = (mkNixpkgsEvalCheck "pkgsCross.aarch64-multiplatform.hello.outPath" pkgs.pkgsCross.aarch64-multiplatform.hello.outPath); + }; + }; +}) diff --git a/tvix/cli/src/.skip-subtree b/tvix/cli/src/.skip-subtree new file mode 100644 index 000000000000..a16a2afe1f1e --- /dev/null +++ b/tvix/cli/src/.skip-subtree @@ -0,0 +1 @@ +Because of the derivation.nix file ... diff --git a/tvix/cli/src/derivation.nix b/tvix/cli/src/derivation.nix new file mode 100644 index 000000000000..9355cc3a96f0 --- /dev/null +++ b/tvix/cli/src/derivation.nix @@ -0,0 +1,36 @@ +# LGPL-2.1-or-later +# +# taken from: https://github.com/NixOS/nix/blob/master/src/libexpr/primops/derivation.nix +# +# TODO: rewrite in native Rust code + +/* This is the implementation of the ‘derivation’ builtin function. + It's actually a wrapper around the ‘derivationStrict’ primop. */ + +drvAttrs @ { outputs ? [ "out" ], ... }: + +let + + strict = derivationStrict drvAttrs; + + commonAttrs = drvAttrs // (builtins.listToAttrs outputsList) // + { + all = map (x: x.value) outputsList; + inherit drvAttrs; + }; + + outputToAttrListElement = outputName: + { + name = outputName; + value = commonAttrs // { + outPath = builtins.getAttr outputName strict; + drvPath = strict.drvPath; + type = "derivation"; + inherit outputName; + }; + }; + + outputsList = map outputToAttrListElement outputs; + +in +(builtins.head outputsList).value diff --git a/tvix/cli/src/derivation.rs b/tvix/cli/src/derivation.rs new file mode 100644 index 000000000000..76cc3f60f77e --- /dev/null +++ b/tvix/cli/src/derivation.rs @@ -0,0 +1,783 @@ +//! Implements `builtins.derivation`, the core of what makes Nix build packages. +use nix_compat::derivation::Derivation; +use nix_compat::nixhash; +use std::cell::RefCell; +use std::collections::{btree_map, BTreeSet}; +use std::rc::Rc; +use tvix_eval::builtin_macros::builtins; +use tvix_eval::generators::{self, emit_warning_kind, GenCo}; +use tvix_eval::{ + AddContext, CatchableErrorKind, CoercionKind, ErrorKind, NixAttrs, NixList, Value, WarningKind, +}; + +use crate::errors::Error; +use crate::known_paths::{KnownPaths, PathKind, PathName}; + +// Constants used for strangely named fields in derivation inputs. +const STRUCTURED_ATTRS: &str = "__structuredAttrs"; +const IGNORE_NULLS: &str = "__ignoreNulls"; + +/// Helper function for populating the `drv.outputs` field from a +/// manually specified set of outputs, instead of the default +/// `outputs`. +async fn populate_outputs( + co: &GenCo, + drv: &mut Derivation, + outputs: NixList, +) -> Result<(), ErrorKind> { + // Remove the original default `out` output. + drv.outputs.clear(); + + for output in outputs { + let output_name = generators::request_force(co, output) + .await + .to_str() + .context("determining output name")?; + + if drv + .outputs + .insert(output_name.as_str().into(), Default::default()) + .is_some() + { + return Err(Error::DuplicateOutput(output_name.as_str().into()).into()); + } + } + + Ok(()) +} + +/// Populate the inputs of a derivation from the build references +/// found when scanning the derivation's parameters. +fn populate_inputs<I: IntoIterator<Item = PathName>>( + drv: &mut Derivation, + known_paths: &KnownPaths, + references: I, +) { + for reference in references.into_iter() { + let reference = &known_paths[&reference]; + match &reference.kind { + PathKind::Plain => { + drv.input_sources.insert(reference.path.clone()); + } + + PathKind::Output { name, derivation } => { + match drv.input_derivations.entry(derivation.clone()) { + btree_map::Entry::Vacant(entry) => { + entry.insert(BTreeSet::from([name.clone()])); + } + + btree_map::Entry::Occupied(mut entry) => { + entry.get_mut().insert(name.clone()); + } + } + } + + PathKind::Derivation { output_names } => { + match drv.input_derivations.entry(reference.path.clone()) { + btree_map::Entry::Vacant(entry) => { + entry.insert(output_names.clone()); + } + + btree_map::Entry::Occupied(mut entry) => { + entry.get_mut().extend(output_names.clone().into_iter()); + } + } + } + } + } +} + +/// Populate the output configuration of a derivation based on the +/// parameters passed to the call, flipping the required +/// parameters for a fixed-output derivation if necessary. +/// +/// This function handles all possible combinations of the +/// parameters, including invalid ones. +/// +/// Due to the support for SRI hashes, and how these are passed along to +/// builtins.derivation, outputHash and outputHashAlgo can have values which +/// need to be further modified before constructing the Derivation struct. +/// +/// If outputHashAlgo is an SRI hash, outputHashAlgo must either be an empty +/// string, or the hash algorithm as specified in the (single) SRI (entry). +/// SRI strings with multiple hash algorithms are not supported. +/// +/// In case an SRI string was used, the (single) fixed output is populated +/// with the hash algo name, and the hash digest is populated with the +/// (lowercase) hex encoding of the digest. +/// +/// These values are only rewritten for the outputs, not what's passed to env. +fn populate_output_configuration( + drv: &mut Derivation, + hash: Option<String>, // in nix: outputHash + hash_algo: Option<String>, // in nix: outputHashAlgo + hash_mode: Option<String>, // in nix: outputHashmode +) -> Result<(), ErrorKind> { + // We only do something when `digest` and `algo` are `Some(_)``, and + // there's an `out` output. + if let (Some(hash), Some(algo), hash_mode) = (hash, hash_algo, hash_mode) { + match drv.outputs.get_mut("out") { + None => return Err(Error::ConflictingOutputTypes.into()), + Some(out) => { + // treat an empty algo as None + let a = if algo.is_empty() { + None + } else { + Some(algo.as_ref()) + }; + + let output_hash = nixhash::from_str(&hash, a).map_err(Error::InvalidOutputHash)?; + + // construct the NixHashWithMode. + out.hash_with_mode = match hash_mode.as_deref() { + None | Some("flat") => Some(nixhash::NixHashWithMode::Flat(output_hash)), + Some("recursive") => Some(nixhash::NixHashWithMode::Recursive(output_hash)), + Some(other) => { + return Err(Error::InvalidOutputHashMode(other.to_string()).into()) + } + } + } + } + } + + Ok(()) +} + +/// Handles derivation parameters which are not just forwarded to +/// the environment. The return value indicates whether the +/// parameter should be included in the environment. +async fn handle_derivation_parameters( + drv: &mut Derivation, + co: &GenCo, + name: &str, + value: &Value, + val_str: &str, +) -> Result<Result<bool, CatchableErrorKind>, ErrorKind> { + match name { + IGNORE_NULLS => return Ok(Ok(false)), + + // Command line arguments to the builder. + "args" => { + let args = value.to_list()?; + for arg in args { + match strong_coerce_to_string(co, arg).await? { + Err(cek) => return Ok(Err(cek)), + Ok(s) => drv.arguments.push(s), + } + } + + // The arguments do not appear in the environment. + return Ok(Ok(false)); + } + + // Explicitly specified drv outputs (instead of default [ "out" ]) + "outputs" => { + let outputs = value + .to_list() + .context("looking at the `outputs` parameter of the derivation")?; + + populate_outputs(co, drv, outputs).await?; + } + + "builder" => { + drv.builder = val_str.to_string(); + } + + "system" => { + drv.system = val_str.to_string(); + } + + _ => {} + } + + Ok(Ok(true)) +} + +async fn strong_coerce_to_string( + co: &GenCo, + val: Value, +) -> Result<Result<String, CatchableErrorKind>, ErrorKind> { + let val = generators::request_force(co, val).await; + match generators::request_string_coerce(co, val, CoercionKind::Strong).await { + Err(cek) => Ok(Err(cek)), + Ok(val_str) => Ok(Ok(val_str.as_str().to_string())), + } +} + +#[builtins(state = "Rc<RefCell<KnownPaths>>")] +mod derivation_builtins { + use super::*; + use nix_compat::store_path::hash_placeholder; + use tvix_eval::generators::Gen; + + #[builtin("placeholder")] + async fn builtin_placeholder(co: GenCo, input: Value) -> Result<Value, ErrorKind> { + let placeholder = hash_placeholder( + input + .to_str() + .context("looking at output name in builtins.placeholder")? + .as_str(), + ); + + Ok(placeholder.into()) + } + + /// Strictly construct a Nix derivation from the supplied arguments. + /// + /// This is considered an internal function, users usually want to + /// use the higher-level `builtins.derivation` instead. + #[builtin("derivationStrict")] + async fn builtin_derivation_strict( + state: Rc<RefCell<KnownPaths>>, + co: GenCo, + input: Value, + ) -> Result<Value, ErrorKind> { + let input = input.to_attrs()?; + let name = generators::request_force(&co, input.select_required("name")?.clone()) + .await + .to_str() + .context("determining derivation name")?; + + if name.is_empty() { + return Err(ErrorKind::Abort("derivation has empty name".to_string())); + } + + // Check whether attributes should be passed as a JSON file. + // TODO: the JSON serialisation has to happen here. + if let Some(sa) = input.select(STRUCTURED_ATTRS) { + if generators::request_force(&co, sa.clone()).await.as_bool()? { + return Err(ErrorKind::NotImplemented(STRUCTURED_ATTRS)); + } + } + + // Check whether null attributes should be ignored or passed through. + let ignore_nulls = match input.select(IGNORE_NULLS) { + Some(b) => generators::request_force(&co, b.clone()).await.as_bool()?, + None => false, + }; + + let mut drv = Derivation::default(); + drv.outputs.insert("out".to_string(), Default::default()); + + // Configure fixed-output derivations if required. + + async fn select_string( + co: &GenCo, + attrs: &NixAttrs, + key: &str, + ) -> Result<Result<Option<String>, CatchableErrorKind>, ErrorKind> { + if let Some(attr) = attrs.select(key) { + match strong_coerce_to_string(co, attr.clone()).await? { + Err(cek) => return Ok(Err(cek)), + Ok(str) => return Ok(Ok(Some(str))), + } + } + + Ok(Ok(None)) + } + + for (name, value) in input.clone().into_iter_sorted() { + let value = generators::request_force(&co, value).await; + if ignore_nulls && matches!(value, Value::Null) { + continue; + } + + match strong_coerce_to_string(&co, value.clone()).await? { + Err(cek) => return Ok(Value::Catchable(cek)), + Ok(val_str) => { + // handle_derivation_parameters tells us whether the + // argument should be added to the environment; continue + // to the next one otherwise + match handle_derivation_parameters( + &mut drv, + &co, + name.as_str(), + &value, + &val_str, + ) + .await? + { + Err(cek) => return Ok(Value::Catchable(cek)), + Ok(false) => continue, + _ => (), + } + + // Most of these are also added to the builder's environment in "raw" form. + if drv + .environment + .insert(name.as_str().to_string(), val_str.into()) + .is_some() + { + return Err(Error::DuplicateEnvVar(name.as_str().to_string()).into()); + } + } + } + } + + let output_hash = match select_string(&co, &input, "outputHash") + .await + .context("evaluating the `outputHash` parameter")? + { + Err(cek) => return Ok(Value::Catchable(cek)), + Ok(s) => s, + }; + let output_hash_algo = match select_string(&co, &input, "outputHashAlgo") + .await + .context("evaluating the `outputHashAlgo` parameter")? + { + Err(cek) => return Ok(Value::Catchable(cek)), + Ok(s) => s, + }; + let output_hash_mode = match select_string(&co, &input, "outputHashMode") + .await + .context("evaluating the `outputHashMode` parameter")? + { + Err(cek) => return Ok(Value::Catchable(cek)), + Ok(s) => s, + }; + populate_output_configuration(&mut drv, output_hash, output_hash_algo, output_hash_mode)?; + + // Scan references in relevant attributes to detect any build-references. + let references = { + let state = state.borrow(); + if state.is_empty() { + // skip reference scanning, create an empty result + Default::default() + } else { + let mut refscan = state.reference_scanner(); + drv.arguments.iter().for_each(|s| refscan.scan(s)); + drv.environment.values().for_each(|s| refscan.scan(s)); + refscan.scan(&drv.builder); + refscan.finalise() + } + }; + + // Each output name needs to exist in the environment, at this + // point initialised as an empty string because that is the + // way of Golang ;) + for output in drv.outputs.keys() { + if drv + .environment + .insert(output.to_string(), String::new().into()) + .is_some() + { + emit_warning_kind(&co, WarningKind::ShadowedOutput(output.to_string())).await; + } + } + + let mut known_paths = state.borrow_mut(); + populate_inputs(&mut drv, &known_paths, references); + + // At this point, derivation fields are fully populated from + // eval data structures. + drv.validate(false).map_err(Error::InvalidDerivation)?; + + // Calculate the derivation_or_fod_hash for the current derivation. + // This one is still intermediate (so not added to known_paths) + let derivation_or_fod_hash_tmp = + drv.derivation_or_fod_hash(|drv| known_paths.get_hash_derivation_modulo(drv)); + + // Mutate the Derivation struct and set output paths + drv.calculate_output_paths(&name, &derivation_or_fod_hash_tmp) + .map_err(Error::InvalidDerivation)?; + + let derivation_path = drv + .calculate_derivation_path(&name) + .map_err(Error::InvalidDerivation)?; + + // recompute the hash derivation modulo and add to known_paths + let derivation_or_fod_hash_final = + drv.derivation_or_fod_hash(|drv| known_paths.get_hash_derivation_modulo(drv)); + + known_paths.add_hash_derivation_modulo( + derivation_path.to_absolute_path(), + &derivation_or_fod_hash_final, + ); + + // mark all the new paths as known + let output_names: Vec<String> = drv.outputs.keys().map(Clone::clone).collect(); + known_paths.drv(derivation_path.to_absolute_path(), &output_names); + + for (output_name, output) in &drv.outputs { + known_paths.output( + &output.path, + output_name, + derivation_path.to_absolute_path(), + ); + } + + let mut new_attrs: Vec<(String, String)> = drv + .outputs + .into_iter() + .map(|(name, output)| (name, output.path)) + .collect(); + + new_attrs.push(("drvPath".to_string(), derivation_path.to_absolute_path())); + + Ok(Value::Attrs(Box::new(NixAttrs::from_iter( + new_attrs.into_iter(), + )))) + } + + #[builtin("toFile")] + async fn builtin_to_file( + state: Rc<RefCell<KnownPaths>>, + co: GenCo, + name: Value, + content: Value, + ) -> Result<Value, ErrorKind> { + let name = name + .to_str() + .context("evaluating the `name` parameter of builtins.toFile")?; + let content = content + .to_str() + .context("evaluating the `content` parameter of builtins.toFile")?; + + let mut refscan = state.borrow().reference_scanner(); + refscan.scan(content.as_str()); + let refs = { + let paths = state.borrow(); + refscan + .finalise() + .into_iter() + .map(|path| paths[&path].path.to_string()) + .collect::<Vec<_>>() + }; + + // TODO: fail on derivation references (only "plain" is allowed here) + + let path = nix_compat::store_path::build_text_path(name.as_str(), content.as_str(), refs) + .map_err(|_e| { + nix_compat::derivation::DerivationError::InvalidOutputName( + name.as_str().to_string(), + ) + }) + .map_err(Error::InvalidDerivation)? + .to_absolute_path(); + + state.borrow_mut().plain(&path); + + // TODO: actually persist the file in the store at that path ... + + Ok(Value::String(path.into())) + } +} + +pub use derivation_builtins::builtins as derivation_builtins; + +#[cfg(test)] +mod tests { + use crate::known_paths::KnownPaths; + use nix_compat::store_path::hash_placeholder; + use std::{cell::RefCell, rc::Rc}; + + #[test] + fn derivation() { + let mut eval = tvix_eval::Evaluation::new_impure( + r#"(derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux";}).outPath"#, + None, + ); + + let known_paths: Rc<RefCell<KnownPaths>> = Default::default(); + + eval.builtins + .extend(crate::derivation::derivation_builtins(known_paths)); + + // Add the actual `builtins.derivation` from compiled Nix code + // TODO: properly compose this + eval.src_builtins + .push(("derivation", include_str!("derivation.nix"))); + + let result = eval.evaluate(); + + assert!(result.errors.is_empty(), "expect evaluation to succeed"); + let value = result.value.expect("must be some"); + // TODO: test this more reliably, derive Eq? + assert_eq!( + "\"/nix/store/xpcvxsx5sw4rbq666blz6sxqlmsqphmr-foo\"", + value.to_string() + ); + } + + #[test] + fn derivation_empty_name() { + let mut eval = tvix_eval::Evaluation::new_impure( + r#"(derivation { name = ""; builder = "/bin/sh"; system = "x86_64-linux";}).outPath"#, + None, + ); + + let known_paths: Rc<RefCell<KnownPaths>> = Default::default(); + + eval.builtins + .extend(crate::derivation::derivation_builtins(known_paths)); + + // Add the actual `builtins.derivation` from compiled Nix code + // TODO: properly compose this + eval.src_builtins + .push(("derivation", include_str!("derivation.nix"))); + + assert!( + !eval.evaluate().errors.is_empty(), + "expect evaluation to fail" + ); + } + + // TODO: These tests are commented out because we do not have + // scaffolding to drive generators during testing at the moment. + + // static mut OBSERVER: NoOpObserver = NoOpObserver {}; + + // // Creates a fake VM for tests, which can *not* actually be + // // used to force (most) values but can satisfy the type + // // parameter. + // fn fake_vm() -> VM<'static> { + // // safe because accessing the observer doesn't actually do anything + // unsafe { + // VM::new( + // Default::default(), + // Box::new(tvix_eval::DummyIO), + // &mut OBSERVER, + // Default::default(), + // todo!(), + // ) + // } + // } + + // #[test] + // fn populate_outputs_ok() { + // let mut vm = fake_vm(); + // let mut drv = Derivation::default(); + // drv.outputs.insert("out".to_string(), Default::default()); + + // let outputs = NixList::construct( + // 2, + // vec![Value::String("foo".into()), Value::String("bar".into())], + // ); + + // populate_outputs(&mut vm, &mut drv, outputs).expect("populate_outputs should succeed"); + + // assert_eq!(drv.outputs.len(), 2); + // assert!(drv.outputs.contains_key("bar")); + // assert!(drv.outputs.contains_key("foo")); + // } + + // #[test] + // fn populate_outputs_duplicate() { + // let mut vm = fake_vm(); + // let mut drv = Derivation::default(); + // drv.outputs.insert("out".to_string(), Default::default()); + + // let outputs = NixList::construct( + // 2, + // vec![Value::String("foo".into()), Value::String("foo".into())], + // ); + + // populate_outputs(&mut vm, &mut drv, outputs) + // .expect_err("supplying duplicate outputs should fail"); + // } + + // #[test] + // fn populate_inputs_empty() { + // let mut drv = Derivation::default(); + // let paths = KnownPaths::default(); + // let inputs = vec![]; + + // populate_inputs(&mut drv, &paths, inputs); + + // assert!(drv.input_sources.is_empty()); + // assert!(drv.input_derivations.is_empty()); + // } + + // #[test] + // fn populate_inputs_all() { + // let mut drv = Derivation::default(); + + // let mut paths = KnownPaths::default(); + // paths.plain("/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-foo"); + // paths.drv( + // "/nix/store/aqffiyqx602lbam7n1zsaz3yrh6v08pc-bar.drv", + // &["out"], + // ); + // paths.output( + // "/nix/store/zvpskvjwi72fjxg0vzq822sfvq20mq4l-bar", + // "out", + // "/nix/store/aqffiyqx602lbam7n1zsaz3yrh6v08pc-bar.drv", + // ); + + // let inputs = vec![ + // "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-foo".into(), + // "/nix/store/aqffiyqx602lbam7n1zsaz3yrh6v08pc-bar.drv".into(), + // "/nix/store/zvpskvjwi72fjxg0vzq822sfvq20mq4l-bar".into(), + // ]; + + // populate_inputs(&mut drv, &paths, inputs); + + // assert_eq!(drv.input_sources.len(), 1); + // assert!(drv + // .input_sources + // .contains("/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-foo")); + + // assert_eq!(drv.input_derivations.len(), 1); + // assert!(drv + // .input_derivations + // .contains_key("/nix/store/aqffiyqx602lbam7n1zsaz3yrh6v08pc-bar.drv")); + // } + + // #[test] + // fn populate_output_config_std() { + // let mut drv = Derivation::default(); + + // populate_output_configuration(&mut drv, None, None, None) + // .expect("populate_output_configuration() should succeed"); + + // assert_eq!(drv, Derivation::default(), "derivation should be unchanged"); + // } + + // #[test] + // fn populate_output_config_fod() { + // let mut drv = Derivation::default(); + // drv.outputs.insert("out".to_string(), Default::default()); + + // populate_output_configuration( + // &mut drv, + // Some("0000000000000000000000000000000000000000000000000000000000000000".into()), + // Some("sha256".into()), + // None, + // ) + // .expect("populate_output_configuration() should succeed"); + + // let expected = Hash { + // algo: "sha256".into(), + // digest: "0000000000000000000000000000000000000000000000000000000000000000".into(), + // }; + + // assert_eq!(drv.outputs["out"].hash, Some(expected)); + // } + + // #[test] + // fn populate_output_config_fod_recursive() { + // let mut drv = Derivation::default(); + // drv.outputs.insert("out".to_string(), Default::default()); + + // populate_output_configuration( + // &mut drv, + // Some("0000000000000000000000000000000000000000000000000000000000000000".into()), + // Some("sha256".into()), + // Some("recursive".into()), + // ) + // .expect("populate_output_configuration() should succeed"); + + // let expected = Hash { + // algo: "r:sha256".into(), + // digest: "0000000000000000000000000000000000000000000000000000000000000000".into(), + // }; + + // assert_eq!(drv.outputs["out"].hash, Some(expected)); + // } + + // #[test] + // /// hash_algo set to sha256, but SRI hash passed + // fn populate_output_config_flat_sri_sha256() { + // let mut drv = Derivation::default(); + // drv.outputs.insert("out".to_string(), Default::default()); + + // populate_output_configuration( + // &mut drv, + // Some("sha256-swapHA/ZO8QoDPwumMt6s5gf91oYe+oyk4EfRSyJqMg=".into()), + // Some("sha256".into()), + // Some("flat".into()), + // ) + // .expect("populate_output_configuration() should succeed"); + + // let expected = Hash { + // algo: "sha256".into(), + // digest: "b306a91c0fd93bc4280cfc2e98cb7ab3981ff75a187bea3293811f452c89a8c8".into(), // lower hex + // }; + + // assert_eq!(drv.outputs["out"].hash, Some(expected)); + // } + + // #[test] + // /// hash_algo set to empty string, SRI hash passed + // fn populate_output_config_flat_sri() { + // let mut drv = Derivation::default(); + // drv.outputs.insert("out".to_string(), Default::default()); + + // populate_output_configuration( + // &mut drv, + // Some("sha256-s6JN6XqP28g1uYMxaVAQMLiXcDG8tUs7OsE3QPhGqzA=".into()), + // Some("".into()), + // Some("flat".into()), + // ) + // .expect("populate_output_configuration() should succeed"); + + // let expected = Hash { + // algo: "sha256".into(), + // digest: "b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30".into(), // lower hex + // }; + + // assert_eq!(drv.outputs["out"].hash, Some(expected)); + // } + + // #[test] + // fn handle_outputs_parameter() { + // let mut vm = fake_vm(); + // let mut drv = Derivation::default(); + // drv.outputs.insert("out".to_string(), Default::default()); + + // let outputs = Value::List(NixList::construct( + // 2, + // vec![Value::String("foo".into()), Value::String("bar".into())], + // )); + // let outputs_str = outputs + // .coerce_to_string(CoercionKind::Strong, &mut vm) + // .unwrap(); + + // handle_derivation_parameters(&mut drv, &mut vm, "outputs", &outputs, outputs_str.as_str()) + // .expect("handling 'outputs' parameter should succeed"); + + // assert_eq!(drv.outputs.len(), 2); + // assert!(drv.outputs.contains_key("bar")); + // assert!(drv.outputs.contains_key("foo")); + // } + + // #[test] + // fn handle_args_parameter() { + // let mut vm = fake_vm(); + // let mut drv = Derivation::default(); + + // let args = Value::List(NixList::construct( + // 3, + // vec![ + // Value::String("--foo".into()), + // Value::String("42".into()), + // Value::String("--bar".into()), + // ], + // )); + + // let args_str = args + // .coerce_to_string(CoercionKind::Strong, &mut vm) + // .unwrap(); + + // handle_derivation_parameters(&mut drv, &mut vm, "args", &args, args_str.as_str()) + // .expect("handling 'args' parameter should succeed"); + + // assert_eq!( + // drv.arguments, + // vec!["--foo".to_string(), "42".to_string(), "--bar".to_string()] + // ); + // } + + #[test] + fn builtins_placeholder_hashes() { + assert_eq!( + hash_placeholder("out").as_str(), + "/1rz4g4znpzjwh1xymhjpm42vipw92pr73vdgl6xs1hycac8kf2n9" + ); + + assert_eq!( + hash_placeholder("").as_str(), + "/171rf4jhx57xqz3p7swniwkig249cif71pa08p80mgaf0mqz5bmr" + ); + } +} diff --git a/tvix/cli/src/errors.rs b/tvix/cli/src/errors.rs new file mode 100644 index 000000000000..5cbddcbba811 --- /dev/null +++ b/tvix/cli/src/errors.rs @@ -0,0 +1,26 @@ +use nix_compat::{derivation::DerivationError, nixhash}; +use std::rc::Rc; +use thiserror::Error; + +/// Errors related to derivation construction +#[derive(Debug, Error)] +pub enum Error { + #[error("an output with the name '{0}' is already defined")] + DuplicateOutput(String), + #[error("fixed-output derivations can only have the default `out`-output")] + ConflictingOutputTypes, + #[error("the environment variable '{0}' has already been set in this derivation")] + DuplicateEnvVar(String), + #[error("invalid derivation parameters: {0}")] + InvalidDerivation(DerivationError), + #[error("invalid output hash: {0}")] + InvalidOutputHash(nixhash::Error), + #[error("invalid output hash mode: '{0}', only 'recursive' and 'flat` are supported")] + InvalidOutputHashMode(String), +} + +impl From<Error> for tvix_eval::ErrorKind { + fn from(err: Error) -> Self { + tvix_eval::ErrorKind::TvixError(Rc::new(err)) + } +} diff --git a/tvix/cli/src/fetchurl.nix b/tvix/cli/src/fetchurl.nix new file mode 100644 index 000000000000..3f182a5a319b --- /dev/null +++ b/tvix/cli/src/fetchurl.nix @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: LGPL-2.1 +# +# This file is vendored from C++ Nix, as it needs to be bundled with +# an evaluator to be able to evaluate nixpkgs. +# +# Source: https://github.com/NixOS/nix/blob/2.3.16/corepkgs/fetchurl.nix + +{ system ? "" # obsolete +, url +, hash ? "" # an SRI hash + + # Legacy hash specification +, md5 ? "" +, sha1 ? "" +, sha256 ? "" +, sha512 ? "" +, outputHash ? if hash != "" then hash else if sha512 != "" then sha512 else if sha1 != "" then sha1 else if md5 != "" then md5 else sha256 +, outputHashAlgo ? if hash != "" then "" else if sha512 != "" then "sha512" else if sha1 != "" then "sha1" else if md5 != "" then "md5" else "sha256" + +, executable ? false +, unpack ? false +, name ? baseNameOf (toString url) +}: + +derivation { + builder = "builtin:fetchurl"; + + # New-style output content requirements. + inherit outputHashAlgo outputHash; + outputHashMode = if unpack || executable then "recursive" else "flat"; + + inherit name url executable unpack; + + system = "builtin"; + + # No need to double the amount of network traffic + preferLocalBuild = true; + + impureEnvVars = [ + # We borrow these environment variables from the caller to allow + # easy proxy configuration. This is impure, but a fixed-output + # derivation like fetchurl is allowed to do so since its result is + # by definition pure. + "http_proxy" + "https_proxy" + "ftp_proxy" + "all_proxy" + "no_proxy" + ]; + + # To make "nix-prefetch-url" work. + urls = [ url ]; +} diff --git a/tvix/cli/src/known_paths.rs b/tvix/cli/src/known_paths.rs new file mode 100644 index 000000000000..07373ef0da7a --- /dev/null +++ b/tvix/cli/src/known_paths.rs @@ -0,0 +1,186 @@ +//! This module implements logic required for persisting known paths +//! during an evaluation. +//! +//! Tvix needs to be able to keep track of each Nix store path that it +//! knows about during the scope of a single evaluation and its +//! related builds. +//! +//! This data is required to scan derivation inputs for the build +//! references (the "build closure") that they make use of. +//! +//! Please see //tvix/eval/docs/build-references.md for more +//! information. + +use crate::refscan::{ReferenceScanner, STORE_PATH_LEN}; +use nix_compat::nixhash::NixHash; +use std::{ + collections::{hash_map, BTreeSet, HashMap}, + ops::Index, +}; + +#[derive(Debug, PartialEq)] +pub enum PathKind { + /// A literal derivation (`.drv`-file), and the *names* of its outputs. + Derivation { output_names: BTreeSet<String> }, + + /// An output of a derivation, its name, and the path of its derivation. + Output { name: String, derivation: String }, + + /// A plain store path (e.g. source files copied to the store). + Plain, +} + +#[derive(Debug, PartialEq)] +pub struct KnownPath { + pub path: String, + pub kind: PathKind, +} + +impl KnownPath { + fn new(path: String, kind: PathKind) -> Self { + KnownPath { path, kind } + } +} + +/// Internal struct to prevent accidental leaks of the truncated path +/// names. +#[repr(transparent)] +#[derive(Clone, Debug, Default, PartialEq, PartialOrd, Ord, Eq, Hash)] +pub struct PathName(String); + +impl From<&str> for PathName { + fn from(s: &str) -> Self { + PathName(s[..STORE_PATH_LEN].to_string()) + } +} + +/// This instance is required to pass PathName instances as needles to +/// the reference scanner. +impl AsRef<[u8]> for PathName { + fn as_ref(&self) -> &[u8] { + self.0.as_ref() + } +} + +#[derive(Debug, Default)] +pub struct KnownPaths { + /// All known paths, keyed by a truncated version of their store + /// path used for reference scanning. + paths: HashMap<PathName, KnownPath>, + + /// All known derivation or FOD hashes. + /// + /// Keys are derivation paths, values is the NixHash. + derivation_or_fod_hashes: HashMap<String, NixHash>, +} + +impl Index<&PathName> for KnownPaths { + type Output = KnownPath; + + fn index(&self, index: &PathName) -> &Self::Output { + &self.paths[index] + } +} + +impl KnownPaths { + fn insert_path(&mut self, path: String, path_kind: PathKind) { + match self.paths.entry(path.as_str().into()) { + hash_map::Entry::Vacant(entry) => { + entry.insert(KnownPath::new(path, path_kind)); + } + + hash_map::Entry::Occupied(mut entry) => { + match (path_kind, &mut entry.get_mut().kind) { + // These variant combinations require no "merging action". + (PathKind::Plain, PathKind::Plain) => (), + (PathKind::Output { .. }, PathKind::Output { .. }) => (), + + ( + PathKind::Derivation { output_names: new }, + PathKind::Derivation { + output_names: ref mut old, + }, + ) => { + old.extend(new); + } + + _ => panic!( + "path '{}' inserted twice with different types", + entry.key().0 + ), + }; + } + }; + } + + /// Mark a plain path as known. + pub fn plain<S: ToString>(&mut self, path: S) { + self.insert_path(path.to_string(), PathKind::Plain); + } + + /// Mark a derivation as known. + pub fn drv<P: ToString, O: ToString>(&mut self, path: P, outputs: &[O]) { + self.insert_path( + path.to_string(), + PathKind::Derivation { + output_names: outputs.iter().map(ToString::to_string).collect(), + }, + ); + } + + /// Mark a derivation output path as known. + pub fn output<P: ToString, N: ToString, D: ToString>( + &mut self, + output_path: P, + name: N, + drv_path: D, + ) { + self.insert_path( + output_path.to_string(), + PathKind::Output { + name: name.to_string(), + derivation: drv_path.to_string(), + }, + ); + } + + /// Checks whether there are any known paths. If not, a reference + /// scanner can not be created. + pub fn is_empty(&self) -> bool { + self.paths.is_empty() + } + + /// Create a reference scanner from the current set of known paths. + pub fn reference_scanner(&self) -> ReferenceScanner<PathName> { + let candidates = self.paths.keys().map(Clone::clone).collect(); + ReferenceScanner::new(candidates) + } + + /// Fetch the opaque "hash derivation modulo" for a given derivation path. + pub fn get_hash_derivation_modulo(&self, drv_path: &str) -> NixHash { + // TODO: we rely on an invariant that things *should* have + // been calculated if we get this far. + self.derivation_or_fod_hashes[drv_path].clone() + } + + pub fn add_hash_derivation_modulo<D: ToString>( + &mut self, + drv: D, + hash_derivation_modulo: &NixHash, + ) { + #[allow(unused_variables)] // assertions on this only compiled in debug builds + let old = self + .derivation_or_fod_hashes + .insert(drv.to_string(), hash_derivation_modulo.to_owned()); + + #[cfg(debug_assertions)] + { + if let Some(old) = old { + debug_assert!( + old == *hash_derivation_modulo, + "hash derivation modulo for a given derivation should always be calculated the same" + ); + } + } + } +} diff --git a/tvix/cli/src/main.rs b/tvix/cli/src/main.rs new file mode 100644 index 000000000000..ebcfe4b800b8 --- /dev/null +++ b/tvix/cli/src/main.rs @@ -0,0 +1,287 @@ +mod derivation; +mod errors; +mod known_paths; +mod refscan; +mod tvix_io; +mod tvix_store_io; + +use std::cell::RefCell; +use std::rc::Rc; +use std::sync::Arc; +use std::{fs, path::PathBuf}; + +use clap::Parser; +use known_paths::KnownPaths; +use rustyline::{error::ReadlineError, Editor}; +use tvix_castore::blobservice::MemoryBlobService; +use tvix_castore::directoryservice::MemoryDirectoryService; +use tvix_eval::observer::{DisassemblingObserver, TracingObserver}; +use tvix_eval::Value; +use tvix_store::pathinfoservice::MemoryPathInfoService; +use tvix_store_io::TvixStoreIO; + +#[derive(Parser)] +struct Args { + /// Path to a script to evaluate + script: Option<PathBuf>, + + #[clap(long, short = 'E')] + expr: Option<String>, + + /// Dump the raw AST to stdout before interpreting + #[clap(long, env = "TVIX_DISPLAY_AST")] + display_ast: bool, + + /// Dump the bytecode to stdout before evaluating + #[clap(long, env = "TVIX_DUMP_BYTECODE")] + dump_bytecode: bool, + + /// Trace the runtime of the VM + #[clap(long, env = "TVIX_TRACE_RUNTIME")] + trace_runtime: bool, + + /// Only compile, but do not execute code. This will make Tvix act + /// sort of like a linter. + #[clap(long)] + compile_only: bool, + + /// Don't print warnings. + #[clap(long)] + no_warnings: bool, + + /// A colon-separated list of directories to use to resolve `<...>`-style paths + #[clap(long, short = 'I', env = "NIX_PATH")] + nix_search_path: Option<String>, + + /// Print "raw" (unquoted) output. + #[clap(long)] + raw: bool, + + /// Strictly evaluate values, traversing them and forcing e.g. + /// elements of lists and attribute sets before printing the + /// return value. + #[clap(long)] + strict: bool, +} + +/// Interprets the given code snippet, printing out warnings, errors +/// and the result itself. The return value indicates whether +/// evaluation succeeded. +fn interpret(code: &str, path: Option<PathBuf>, args: &Args, explain: bool) -> bool { + let mut eval = tvix_eval::Evaluation::new_impure(code, path); + let known_paths: Rc<RefCell<KnownPaths>> = Default::default(); + + eval.strict = args.strict; + + let blob_service = Arc::new(MemoryBlobService::default()); + let directory_service = Arc::new(MemoryDirectoryService::default()); + let path_info_service = Arc::new(MemoryPathInfoService::new( + blob_service.clone(), + directory_service.clone(), + )); + + let tokio_runtime = tokio::runtime::Runtime::new().unwrap(); + + eval.io_handle = Box::new(tvix_io::TvixIO::new( + known_paths.clone(), + TvixStoreIO::new( + blob_service, + directory_service, + path_info_service, + tokio_runtime.handle().clone(), + ), + )); + + // bundle fetchurl.nix (used in nixpkgs) by resolving <nix> to + // `/__corepkgs__`, which has special handling in [`nix_compat`]. + eval.nix_path = args + .nix_search_path + .as_ref() + .map(|p| format!("nix=/__corepkgs__:{}", p)) + .or_else(|| Some("nix=/__corepkgs__".to_string())); + + eval.builtins + .extend(derivation::derivation_builtins(known_paths)); + + // Add the actual `builtins.derivation` from compiled Nix code + eval.src_builtins + .push(("derivation", include_str!("derivation.nix"))); + + let source_map = eval.source_map(); + let result = { + let mut compiler_observer = + DisassemblingObserver::new(source_map.clone(), std::io::stderr()); + if args.dump_bytecode { + eval.compiler_observer = Some(&mut compiler_observer); + } + + let mut runtime_observer = TracingObserver::new(std::io::stderr()); + if args.trace_runtime { + eval.runtime_observer = Some(&mut runtime_observer); + } + + eval.evaluate() + }; + + if args.display_ast { + if let Some(ref expr) = result.expr { + eprintln!("AST: {}", tvix_eval::pretty_print_expr(expr)); + } + } + + for error in &result.errors { + error.fancy_format_stderr(&source_map); + } + + if !args.no_warnings { + for warning in &result.warnings { + warning.fancy_format_stderr(&source_map); + } + } + + if let Some(value) = result.value.as_ref() { + if explain { + println!("=> {}", value.explain()); + } else { + println_result(value, args.raw); + } + } + + // inform the caller about any errors + result.errors.is_empty() +} + +/// Interpret the given code snippet, but only run the Tvix compiler +/// on it and return errors and warnings. +fn lint(code: &str, path: Option<PathBuf>, args: &Args) -> bool { + let mut eval = tvix_eval::Evaluation::new_impure(code, path); + eval.strict = args.strict; + + let source_map = eval.source_map(); + + let mut compiler_observer = DisassemblingObserver::new(source_map.clone(), std::io::stderr()); + + if args.dump_bytecode { + eval.compiler_observer = Some(&mut compiler_observer); + } + + if args.trace_runtime { + eprintln!("warning: --trace-runtime has no effect with --compile-only!"); + } + + let result = eval.compile_only(); + + if args.display_ast { + if let Some(ref expr) = result.expr { + eprintln!("AST: {}", tvix_eval::pretty_print_expr(expr)); + } + } + + for error in &result.errors { + error.fancy_format_stderr(&source_map); + } + + for warning in &result.warnings { + warning.fancy_format_stderr(&source_map); + } + + // inform the caller about any errors + result.errors.is_empty() +} + +fn main() { + let args = Args::parse(); + + if let Some(file) = &args.script { + run_file(file.clone(), &args) + } else if let Some(expr) = &args.expr { + if !interpret(expr, None, &args, false) { + std::process::exit(1); + } + } else { + run_prompt(&args) + } +} + +fn run_file(mut path: PathBuf, args: &Args) { + if path.is_dir() { + path.push("default.nix"); + } + let contents = fs::read_to_string(&path).expect("failed to read the input file"); + + let success = if args.compile_only { + lint(&contents, Some(path), args) + } else { + interpret(&contents, Some(path), args, false) + }; + + if !success { + std::process::exit(1); + } +} + +fn println_result(result: &Value, raw: bool) { + if raw { + println!("{}", result.to_str().unwrap().as_str()) + } else { + println!("=> {} :: {}", result, result.type_of()) + } +} + +fn state_dir() -> Option<PathBuf> { + let mut path = dirs::data_dir(); + if let Some(p) = path.as_mut() { + p.push("tvix") + } + path +} + +fn run_prompt(args: &Args) { + let mut rl = Editor::<()>::new().expect("should be able to launch rustyline"); + + if args.compile_only { + eprintln!("warning: `--compile-only` has no effect on REPL usage!"); + } + + let history_path = match state_dir() { + // Attempt to set up these paths, but do not hard fail if it + // doesn't work. + Some(mut path) => { + let _ = std::fs::create_dir_all(&path); + path.push("history.txt"); + let _ = rl.load_history(&path); + Some(path) + } + + None => None, + }; + + loop { + let readline = rl.readline("tvix-repl> "); + match readline { + Ok(line) => { + if line.is_empty() { + continue; + } + + rl.add_history_entry(&line); + + if let Some(without_prefix) = line.strip_prefix(":d ") { + interpret(without_prefix, None, args, true); + } else { + interpret(&line, None, args, false); + } + } + Err(ReadlineError::Interrupted) | Err(ReadlineError::Eof) => break, + + Err(err) => { + eprintln!("error: {}", err); + break; + } + } + } + + if let Some(path) = history_path { + rl.save_history(&path).unwrap(); + } +} diff --git a/tvix/cli/src/refscan.rs b/tvix/cli/src/refscan.rs new file mode 100644 index 000000000000..0e0bb6c77828 --- /dev/null +++ b/tvix/cli/src/refscan.rs @@ -0,0 +1,115 @@ +//! Simple scanner for non-overlapping, known references of Nix store paths in a +//! given string. +//! +//! This is used for determining build references (see +//! //tvix/eval/docs/build-references.md for more details). +//! +//! The scanner itself is using the Wu-Manber string-matching algorithm, using +//! our fork of the `wu-mamber` crate. + +use std::collections::BTreeSet; +use wu_manber::TwoByteWM; + +pub const STORE_PATH_LEN: usize = "/nix/store/00000000000000000000000000000000".len(); + +/// Represents a "primed" reference scanner with an automaton that knows the set +/// of store paths to scan for. +pub struct ReferenceScanner<P: Ord + AsRef<[u8]>> { + candidates: Vec<P>, + searcher: Option<TwoByteWM>, + matches: Vec<usize>, +} + +impl<P: Clone + Ord + AsRef<[u8]>> ReferenceScanner<P> { + /// Construct a new `ReferenceScanner` that knows how to scan for the given + /// candidate store paths. + pub fn new(candidates: Vec<P>) -> Self { + let searcher = if candidates.is_empty() { + None + } else { + Some(TwoByteWM::new(&candidates)) + }; + + ReferenceScanner { + searcher, + candidates, + matches: Default::default(), + } + } + + /// Scan the given str for all non-overlapping matches and collect them + /// in the scanner. + pub fn scan<S: AsRef<[u8]>>(&mut self, haystack: S) { + if haystack.as_ref().len() < STORE_PATH_LEN { + return; + } + + if let Some(searcher) = &self.searcher { + for m in searcher.find(haystack) { + self.matches.push(m.pat_idx); + } + } + } + + /// Finalise the reference scanner and return the resulting matches. + pub fn finalise(self) -> BTreeSet<P> { + self.matches + .into_iter() + .map(|idx| self.candidates[idx].clone()) + .collect() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // The actual derivation of `nixpkgs.hello`. + const HELLO_DRV: &str = r#"Derive([("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1","","")],[("/nix/store/6z1jfnqqgyqr221zgbpm30v91yfj3r45-bash-5.1-p16.drv",["out"]),("/nix/store/ap9g09fxbicj836zm88d56dn3ff4clxl-stdenv-linux.drv",["out"]),("/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",["out"])],["/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs",""),("builder","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash"),("cmakeFlags",""),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck","1"),("doInstallCheck",""),("mesonFlags",""),("name","hello-2.12.1"),("nativeBuildInputs",""),("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1"),("outputs","out"),("patches",""),("pname","hello"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/pa10z4ngm0g83kx9mssrqzz30s84vq7k-hello-2.12.1.tar.gz"),("stdenv","/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux"),("strictDeps",""),("system","x86_64-linux"),("version","2.12.1")])"#; + + #[test] + fn test_no_patterns() { + let mut scanner: ReferenceScanner<String> = ReferenceScanner::new(vec![]); + + scanner.scan(HELLO_DRV); + + let result = scanner.finalise(); + + assert_eq!(result.len(), 0); + } + + #[test] + fn test_single_match() { + let mut scanner = ReferenceScanner::new(vec![ + "/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".to_string(), + ]); + scanner.scan(HELLO_DRV); + + let result = scanner.finalise(); + + assert_eq!(result.len(), 1); + assert!(result.contains("/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16")); + } + + #[test] + fn test_multiple_matches() { + let candidates = vec![ + // these exist in the drv: + "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".to_string(), + "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".to_string(), + "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".to_string(), + // this doesn't: + "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".to_string(), + ]; + + let mut scanner = ReferenceScanner::new(candidates.clone()); + scanner.scan(HELLO_DRV); + + let result = scanner.finalise(); + assert_eq!(result.len(), 3); + + for c in candidates[..3].iter() { + assert!(result.contains(c)); + } + } +} diff --git a/tvix/cli/src/tvix_io.rs b/tvix/cli/src/tvix_io.rs new file mode 100644 index 000000000000..74f91138bff8 --- /dev/null +++ b/tvix/cli/src/tvix_io.rs @@ -0,0 +1,79 @@ +//! This module implements a wrapper around tvix-eval's [EvalIO] type, +//! adding functionality which is required by tvix-cli: +//! +//! 1. Marking plain paths known to the reference scanner. +//! 2. Handling the C++ Nix `__corepkgs__`-hack for nixpkgs bootstrapping. +//! +//! All uses of [EvalIO] in tvix-cli must make use of this wrapper, +//! otherwise fundamental features like nixpkgs bootstrapping and hash +//! calculation will not work. + +use crate::KnownPaths; +use std::cell::RefCell; +use std::io; +use std::path::{Path, PathBuf}; +use std::rc::Rc; +use tvix_eval::{EvalIO, FileType}; + +// TODO: Merge this together with TvixStoreIO? +pub(crate) struct TvixIO<T: EvalIO> { + /// Ingested paths must be reported to this known paths tracker + /// for accurate build reference scanning. + known_paths: Rc<RefCell<KnownPaths>>, + + // Actual underlying [EvalIO] implementation. + actual: T, +} + +impl<T: EvalIO> TvixIO<T> { + pub(crate) fn new(known_paths: Rc<RefCell<KnownPaths>>, actual: T) -> Self { + Self { + known_paths, + actual, + } + } +} + +impl<T: EvalIO> EvalIO for TvixIO<T> { + fn store_dir(&self) -> Option<String> { + self.actual.store_dir() + } + + fn import_path(&self, path: &Path) -> Result<PathBuf, io::Error> { + let imported_path = self.actual.import_path(path)?; + self.known_paths + .borrow_mut() + .plain(imported_path.to_string_lossy()); + + Ok(imported_path) + } + + fn path_exists(&self, path: &Path) -> Result<bool, io::Error> { + if path.starts_with("/__corepkgs__") { + return Ok(true); + } + + self.actual.path_exists(path) + } + + fn read_to_string(&self, path: &Path) -> Result<String, io::Error> { + // Bundled version of corepkgs/fetchurl.nix. The counterpart + // of this happens in `main`, where the `nix_path` of the + // evaluation has `nix=/__corepkgs__` added to it. + // + // This workaround is similar to what cppnix does for passing + // the path through. + // + // TODO: this comparison is bad and allocates, we should use + // the sane path library. + if path.starts_with("/__corepkgs__/fetchurl.nix") { + return Ok(include_str!("fetchurl.nix").to_string()); + } + + self.actual.read_to_string(path) + } + + fn read_dir(&self, path: &Path) -> Result<Vec<(bytes::Bytes, FileType)>, io::Error> { + self.actual.read_dir(path) + } +} diff --git a/tvix/cli/src/tvix_store_io.rs b/tvix/cli/src/tvix_store_io.rs new file mode 100644 index 000000000000..cc69357282fd --- /dev/null +++ b/tvix/cli/src/tvix_store_io.rs @@ -0,0 +1,356 @@ +//! This module provides an implementation of EvalIO talking to tvix-store. + +use nix_compat::store_path::{self, StorePath}; +use std::{io, path::Path, path::PathBuf, sync::Arc}; +use tokio::io::AsyncReadExt; +use tracing::{error, instrument, warn}; +use tvix_eval::{EvalIO, FileType, StdIO}; + +use tvix_castore::{ + blobservice::BlobService, + directoryservice::{self, DirectoryService}, + import, + proto::{node::Node, NamedNode}, + B3Digest, +}; +use tvix_store::{ + nar::calculate_size_and_sha256, + pathinfoservice::PathInfoService, + proto::{NarInfo, PathInfo}, +}; + +/// Implements [EvalIO], asking given [PathInfoService], [DirectoryService] +/// and [BlobService]. +/// +/// In case the given path does not exist in these stores, we ask StdIO. +/// This is to both cover cases of syntactically valid store paths, that exist +/// on the filesystem (still managed by Nix), as well as being able to read +/// files outside store paths. +pub struct TvixStoreIO { + blob_service: Arc<dyn BlobService>, + directory_service: Arc<dyn DirectoryService>, + path_info_service: Arc<dyn PathInfoService>, + std_io: StdIO, + tokio_handle: tokio::runtime::Handle, +} + +impl TvixStoreIO { + pub fn new( + blob_service: Arc<dyn BlobService>, + directory_service: Arc<dyn DirectoryService>, + path_info_service: Arc<dyn PathInfoService>, + tokio_handle: tokio::runtime::Handle, + ) -> Self { + Self { + blob_service, + directory_service, + path_info_service, + std_io: StdIO {}, + tokio_handle, + } + } + + /// for a given [StorePath] and additional [Path] inside the store path, + /// look up the [PathInfo], and if it exists, and then use + /// [directoryservice::traverse_to] to return the + /// [Node] specified by `sub_path`. + #[instrument(skip(self), ret, err)] + fn store_path_to_root_node( + &self, + store_path: &StorePath, + sub_path: &Path, + ) -> Result<Option<Node>, io::Error> { + let path_info_service = self.path_info_service.clone(); + let digest = store_path.digest.clone(); + let task = self + .tokio_handle + .spawn(async move { path_info_service.get(digest).await }); + let path_info = match self.tokio_handle.block_on(task).unwrap()? { + // If there's no PathInfo found, early exit + None => return Ok(None), + Some(path_info) => path_info, + }; + + let root_node = { + match path_info.node { + None => { + warn!( + "returned PathInfo {:?} node is None, this shouldn't happen.", + &path_info + ); + return Ok(None); + } + Some(root_node) => match root_node.node { + None => { + warn!("node for {:?} is None, this shouldn't happen.", &root_node); + return Ok(None); + } + Some(root_node) => root_node, + }, + } + }; + + let directory_service = self.directory_service.clone(); + let sub_path = sub_path.to_owned(); + let task = self.tokio_handle.spawn(async move { + directoryservice::descend_to(directory_service, root_node, &sub_path).await + }); + + Ok(self.tokio_handle.block_on(task).unwrap()?) + } +} + +impl EvalIO for TvixStoreIO { + #[instrument(skip(self), ret, err)] + fn path_exists(&self, path: &Path) -> Result<bool, io::Error> { + if let Ok((store_path, sub_path)) = + StorePath::from_absolute_path_full(&path.to_string_lossy()) + { + if self + .store_path_to_root_node(&store_path, &sub_path)? + .is_some() + { + Ok(true) + } else { + // As tvix-store doesn't manage /nix/store on the filesystem, + // we still need to also ask self.std_io here. + self.std_io.path_exists(path) + } + } else { + // The store path is no store path, so do regular StdIO. + self.std_io.path_exists(path) + } + } + + #[instrument(skip(self), ret, err)] + fn read_to_string(&self, path: &Path) -> Result<String, io::Error> { + if let Ok((store_path, sub_path)) = + StorePath::from_absolute_path_full(&path.to_string_lossy()) + { + if let Some(node) = self.store_path_to_root_node(&store_path, &sub_path)? { + // depending on the node type, treat read_to_string differently + match node { + Node::Directory(_) => { + // This would normally be a io::ErrorKind::IsADirectory (still unstable) + Err(io::Error::new( + io::ErrorKind::Unsupported, + format!("tried to read directory at {:?} to string", path), + )) + } + Node::File(file_node) => { + let digest: B3Digest = + file_node.digest.clone().try_into().map_err(|_e| { + error!( + file_node = ?file_node, + "invalid digest" + ); + io::Error::new( + io::ErrorKind::InvalidData, + format!("invalid digest length in file node: {:?}", file_node), + ) + })?; + + let blob_service = self.blob_service.clone(); + + let task = self.tokio_handle.spawn(async move { + let mut reader = { + let resp = blob_service.open_read(&digest).await?; + match resp { + Some(blob_reader) => blob_reader, + None => { + error!( + blob.digest = %digest, + "blob not found", + ); + Err(io::Error::new( + io::ErrorKind::NotFound, + format!("blob {} not found", &digest), + ))? + } + } + }; + + let mut buf = String::new(); + + reader.read_to_string(&mut buf).await?; + Ok(buf) + }); + + self.tokio_handle.block_on(task).unwrap() + } + Node::Symlink(_symlink_node) => Err(io::Error::new( + io::ErrorKind::Unsupported, + "read_to_string for symlinks is unsupported", + ))?, + } + } else { + // As tvix-store doesn't manage /nix/store on the filesystem, + // we still need to also ask self.std_io here. + self.std_io.read_to_string(path) + } + } else { + // The store path is no store path, so do regular StdIO. + self.std_io.read_to_string(path) + } + } + + #[instrument(skip(self), ret, err)] + fn read_dir(&self, path: &Path) -> Result<Vec<(bytes::Bytes, FileType)>, io::Error> { + if let Ok((store_path, sub_path)) = + StorePath::from_absolute_path_full(&path.to_string_lossy()) + { + if let Some(node) = self.store_path_to_root_node(&store_path, &sub_path)? { + match node { + Node::Directory(directory_node) => { + // fetch the Directory itself. + let digest: B3Digest = + directory_node.digest.clone().try_into().map_err(|_e| { + io::Error::new( + io::ErrorKind::InvalidData, + format!( + "invalid digest length in directory node: {:?}", + directory_node + ), + ) + })?; + + let directory_service = self.directory_service.clone(); + let digest_clone = digest.clone(); + let task = self + .tokio_handle + .spawn(async move { directory_service.get(&digest_clone).await }); + if let Some(directory) = self.tokio_handle.block_on(task).unwrap()? { + let mut children: Vec<(bytes::Bytes, FileType)> = Vec::new(); + for node in directory.nodes() { + children.push(match node { + Node::Directory(e) => (e.name, FileType::Directory), + Node::File(e) => (e.name, FileType::Regular), + Node::Symlink(e) => (e.name, FileType::Symlink), + }) + } + Ok(children) + } else { + // If we didn't get the directory node that's linked, that's a store inconsistency! + error!( + directory.digest = %digest, + path = ?path, + "directory not found", + ); + Err(io::Error::new( + io::ErrorKind::NotFound, + format!("directory {digest} does not exist"), + ))? + } + } + Node::File(_file_node) => { + // This would normally be a io::ErrorKind::NotADirectory (still unstable) + Err(io::Error::new( + io::ErrorKind::Unsupported, + "tried to readdir path {:?}, which is a file", + ))? + } + Node::Symlink(_symlink_node) => Err(io::Error::new( + io::ErrorKind::Unsupported, + "read_dir for symlinks is unsupported", + ))?, + } + } else { + self.std_io.read_dir(path) + } + } else { + self.std_io.read_dir(path) + } + } + + #[instrument(skip(self), ret, err)] + fn import_path(&self, path: &std::path::Path) -> Result<PathBuf, std::io::Error> { + let p = path.to_owned(); + let blob_service = self.blob_service.clone(); + let directory_service = self.directory_service.clone(); + let path_info_service = self.path_info_service.clone(); + + let task = self.tokio_handle.spawn(async move { + import_path_with_pathinfo(blob_service, directory_service, path_info_service, &p).await + }); + + let path_info = self.tokio_handle.block_on(task).unwrap()?; + + // from the [PathInfo], extract the store path (as string). + Ok({ + let mut path = PathBuf::from(nix_compat::store_path::STORE_DIR_WITH_SLASH); + + let root_node_name = path_info.node.unwrap().node.unwrap().get_name().to_vec(); + + // This must be a string, otherwise it would have failed validation. + let root_node_name = String::from_utf8(root_node_name).unwrap(); + + // append to the PathBuf + path.push(root_node_name); + + // and return it + path + }) + } + + #[instrument(skip(self), ret)] + fn store_dir(&self) -> Option<String> { + Some("/nix/store".to_string()) + } +} + +/// Imports a given path on the filesystem into the store, and returns the +/// [PathInfo] describing the path, that was sent to +/// [PathInfoService]. +#[instrument(skip(blob_service, directory_service, path_info_service), ret, err)] +async fn import_path_with_pathinfo( + blob_service: Arc<dyn BlobService>, + directory_service: Arc<dyn DirectoryService>, + path_info_service: Arc<dyn PathInfoService>, + path: &std::path::Path, +) -> Result<PathInfo, io::Error> { + // Call [import::ingest_path], which will walk over the given path and return a root_node. + let root_node = import::ingest_path(blob_service.clone(), directory_service.clone(), path) + .await + .expect("error during import_path"); + + // Render the NAR. + let (nar_size, nar_sha256) = + calculate_size_and_sha256(&root_node, blob_service.clone(), directory_service.clone()) + .await + .expect("error during nar calculation"); // TODO: handle error + + // TODO: make a path_to_name helper function? + let name = path + .file_name() + .expect("path must not be ..") + .to_str() + .expect("path must be valid unicode"); + + let output_path = store_path::build_nar_based_store_path(&nar_sha256, name); + + // assemble a new root_node with a name that is derived from the nar hash. + let root_node = root_node.rename(output_path.to_string().into_bytes().into()); + + // assemble the [PathInfo] object. + let path_info = PathInfo { + node: Some(tvix_castore::proto::Node { + node: Some(root_node), + }), + // There's no reference scanning on path contents ingested like this. + references: vec![], + narinfo: Some(NarInfo { + nar_size, + nar_sha256: nar_sha256.to_vec().into(), + signatures: vec![], + reference_names: vec![], + // TODO: narinfo for talosctl.src contains `CA: fixed:r:sha256:1x13j5hy75221bf6kz7cpgld9vgic6bqx07w5xjs4pxnksj6lxb6` + // do we need this anywhere? + }), + }; + + // put into [PathInfoService], and return the [PathInfo] that we get + // back from there (it might contain additional signatures). + let path_info = path_info_service.put(path_info).await?; + + Ok(path_info) +} |