From f27f5ef0c990c3cab9182437bb76593be9b0a0fd Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Thu, 12 Jan 2023 13:59:58 +0300 Subject: feat(tvix/cli): add known_paths module This module implements types used to track the set of known paths in the context of an evaluation. These are used to determine the build references of a derivation. Change-Id: I81e15ae33632784e699128916485751613b231a3 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7816 Tested-by: BuildkiteCI Reviewed-by: flokli --- tvix/cli/src/known_paths.rs | 114 ++++++++++++++++++++++++++++++++++++++++++++ tvix/cli/src/main.rs | 1 + tvix/cli/src/refscan.rs | 33 +++++++++++-- 3 files changed, 143 insertions(+), 5 deletions(-) create mode 100644 tvix/cli/src/known_paths.rs (limited to 'tvix/cli/src') diff --git a/tvix/cli/src/known_paths.rs b/tvix/cli/src/known_paths.rs new file mode 100644 index 0000000000..10191c967e --- /dev/null +++ b/tvix/cli/src/known_paths.rs @@ -0,0 +1,114 @@ +//! This module implements logic required for persisting known paths +//! during an evaluation. +//! +//! Tvix needs to be able to keep track of each Nix store path that it +//! knows about during the scope of a single evaluation and its +//! related builds. +//! +//! This data is required to scan derivation inputs for the build +//! references (the "build closure") that they make use of. +//! +//! Please see //tvix/eval/docs/build-references.md for more +//! information. + +use crate::refscan::ReferenceScanner; +use std::{ + collections::{hash_map, BTreeSet, HashMap}, + ops::Index, +}; + +#[derive(Debug, PartialEq)] +pub enum PathType { + /// A literal derivation (`.drv`-file), and the *names* of its outputs. + Derivation { output_names: BTreeSet }, + + /// An output of a derivation, its name, and the path of its derivation. + Output { name: String, derivation: String }, + + /// A plain store path (e.g. source files copied to the store). + Plain, +} + +pub struct KnownPaths { + /// All known paths, and their associated [`PathType`]. + paths: HashMap, +} + +impl Index<&str> for KnownPaths { + type Output = PathType; + + fn index(&self, index: &str) -> &Self::Output { + &self.paths[index] + } +} + +impl KnownPaths { + /// Mark a plain path as known. + pub fn plain(&mut self, path: S) { + self.paths.insert(path.to_string(), PathType::Plain); + } + + /// Mark a derivation as known. + pub fn drv(&mut self, path: P, outputs: &[O]) { + match self.paths.entry(path.to_string()) { + hash_map::Entry::Occupied(mut entry) => { + for output in outputs { + match entry.get_mut() { + PathType::Derivation { + ref mut output_names, + } => { + output_names.insert(output.to_string()); + } + + // Branches like this explicitly panic right now to find odd + // situations where something unexpected is done with the + // same path being inserted twice as different types. + _ => panic!( + "bug: {} is already a known path, but not a derivation!", + path.to_string() + ), + } + } + } + + hash_map::Entry::Vacant(entry) => { + let output_names = outputs.iter().map(|o| o.to_string()).collect(); + entry.insert(PathType::Derivation { output_names }); + } + } + } + + /// Mark a derivation output path as known. + pub fn output( + &mut self, + output_path: P, + name: N, + drv_path: D, + ) { + match self.paths.entry(output_path.to_string()) { + hash_map::Entry::Occupied(entry) => { + /* nothing to do, really! */ + debug_assert!( + *entry.get() + == PathType::Output { + name: name.to_string(), + derivation: drv_path.to_string(), + } + ); + } + + hash_map::Entry::Vacant(entry) => { + entry.insert(PathType::Output { + name: name.to_string(), + derivation: drv_path.to_string(), + }); + } + } + } + + /// Create a reference scanner from the current set of known paths. + pub fn reference_scanner<'a>(&'a self) -> ReferenceScanner<'a> { + let candidates: Vec<&'a str> = self.paths.keys().map(|s| s.as_str()).collect(); + ReferenceScanner::new(candidates) + } +} diff --git a/tvix/cli/src/main.rs b/tvix/cli/src/main.rs index 0f837b346c..42297d365b 100644 --- a/tvix/cli/src/main.rs +++ b/tvix/cli/src/main.rs @@ -1,3 +1,4 @@ +mod known_paths; mod nix_compat; mod refscan; diff --git a/tvix/cli/src/refscan.rs b/tvix/cli/src/refscan.rs index 31fccb797a..6ef486385d 100644 --- a/tvix/cli/src/refscan.rs +++ b/tvix/cli/src/refscan.rs @@ -13,17 +13,34 @@ use std::io; /// Represents a "primed" reference scanner with an automaton that knows the set /// of store paths to scan for. -pub struct ReferenceScanner<'c, 's> { - candidates: &'c [&'s str], +pub struct ReferenceScanner<'s> { + candidates: Vec<&'s str>, searcher: AhoCorasick, matches: BTreeSet<&'s str>, } -impl<'c, 's> ReferenceScanner<'c, 's> { +pub trait ToOwnedVec { + fn to_owned_vec(self) -> Vec; +} + +impl ToOwnedVec for &[T] { + fn to_owned_vec(self) -> Vec { + self.to_vec() + } +} + +impl ToOwnedVec for Vec { + fn to_owned_vec(self) -> Vec { + self + } +} + +impl<'s> ReferenceScanner<'s> { /// Construct a new `ReferenceScanner` that knows how to scan for the given /// candidate store paths. - pub fn new(candidates: &'c [&'s str]) -> Self { - let searcher = AhoCorasick::new_auto_configured(candidates); + pub fn new>(candidates: V) -> Self { + let candidates = candidates.to_owned_vec(); + let searcher = AhoCorasick::new_auto_configured(&candidates); ReferenceScanner { searcher, @@ -70,6 +87,12 @@ mod tests { // The actual derivation of `nixpkgs.hello`. const HELLO_DRV: &'static str = r#"Derive([("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1","","")],[("/nix/store/6z1jfnqqgyqr221zgbpm30v91yfj3r45-bash-5.1-p16.drv",["out"]),("/nix/store/ap9g09fxbicj836zm88d56dn3ff4clxl-stdenv-linux.drv",["out"]),("/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",["out"])],["/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs",""),("builder","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash"),("cmakeFlags",""),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck","1"),("doInstallCheck",""),("mesonFlags",""),("name","hello-2.12.1"),("nativeBuildInputs",""),("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1"),("outputs","out"),("patches",""),("pname","hello"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/pa10z4ngm0g83kx9mssrqzz30s84vq7k-hello-2.12.1.tar.gz"),("stdenv","/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux"),("strictDeps",""),("system","x86_64-linux"),("version","2.12.1")])"#; + impl ToOwnedVec for &[T; N] { + fn to_owned_vec(self) -> Vec { + self.to_vec() + } + } + #[test] fn test_empty() { let mut scanner = ReferenceScanner::new(&[]); -- cgit 1.4.1