From 38e8c2e95931673deb7cb939a05ac9bdaf305340 Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Fri, 3 Feb 2023 00:20:02 +0300 Subject: fix(tvix/cli): keep tracking full paths in known_paths We need to distinguish explicitly between the paths used for the scanner, and the paths that populate the derivation inputs. The full paths must be accessible from the result of the refscanner to populate drv fields correctly. This was previously hidden by debug changes that masked actual IO operations with no-ops. Change-Id: I037af6e6bbe2b573034d695f8779bee1b56bc125 Reviewed-on: https://cl.tvl.fyi/c/depot/+/8022 Reviewed-by: flokli Tested-by: BuildkiteCI --- tvix/cli/src/derivation.rs | 28 ++++++++++------ tvix/cli/src/known_paths.rs | 78 ++++++++++++++++++++++++++++++++------------- tvix/cli/src/refscan.rs | 20 ++++++------ 3 files changed, 84 insertions(+), 42 deletions(-) diff --git a/tvix/cli/src/derivation.rs b/tvix/cli/src/derivation.rs index 6af3d24a2487..88c5e522966d 100644 --- a/tvix/cli/src/derivation.rs +++ b/tvix/cli/src/derivation.rs @@ -8,7 +8,7 @@ use tvix_eval::builtin_macros::builtins; use tvix_eval::{AddContext, CoercionKind, ErrorKind, NixAttrs, NixList, Value, VM}; use crate::errors::Error; -use crate::known_paths::{KnownPaths, PathType}; +use crate::known_paths::{KnownPaths, PathKind, PathName}; // Constants used for strangely named fields in derivation inputs. const STRUCTURED_ATTRS: &str = "__structuredAttrs"; @@ -41,18 +41,19 @@ fn populate_outputs(vm: &mut VM, drv: &mut Derivation, outputs: NixList) -> Resu /// Populate the inputs of a derivation from the build references /// found when scanning the derivation's parameters. -fn populate_inputs>( +fn populate_inputs>( drv: &mut Derivation, known_paths: &KnownPaths, references: I, ) { for reference in references.into_iter() { - match &known_paths[&reference] { - PathType::Plain => { - drv.input_sources.insert(reference.to_string()); + let reference = &known_paths[&reference]; + match &reference.kind { + PathKind::Plain => { + drv.input_sources.insert(reference.path.clone()); } - PathType::Output { name, derivation } => { + PathKind::Output { name, derivation } => { match drv.input_derivations.entry(derivation.clone()) { btree_map::Entry::Vacant(entry) => { entry.insert(BTreeSet::from([name.clone()])); @@ -64,8 +65,8 @@ fn populate_inputs>( } } - PathType::Derivation { output_names } => { - match drv.input_derivations.entry(reference.to_string()) { + PathKind::Derivation { output_names } => { + match drv.input_derivations.entry(reference.path.clone()) { btree_map::Entry::Vacant(entry) => { entry.insert(output_names.clone()); } @@ -389,7 +390,14 @@ mod derivation_builtins { let mut refscan = state.borrow().reference_scanner(); refscan.scan_str(content.as_str()); - let refs = refscan.finalise(); + let refs = { + let paths = state.borrow(); + refscan + .finalise() + .into_iter() + .map(|path| paths[&path].path.to_string()) + .collect::>() + }; // TODO: fail on derivation references (only "plain" is allowed here) @@ -491,7 +499,7 @@ mod tests { "/nix/store/aqffiyqx602lbam7n1zsaz3yrh6v08pc-bar.drv", ); - let inputs: Vec = vec![ + let inputs = vec![ "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-foo".into(), "/nix/store/aqffiyqx602lbam7n1zsaz3yrh6v08pc-bar.drv".into(), "/nix/store/zvpskvjwi72fjxg0vzq822sfvq20mq4l-bar".into(), diff --git a/tvix/cli/src/known_paths.rs b/tvix/cli/src/known_paths.rs index 69651d418069..251366b53c20 100644 --- a/tvix/cli/src/known_paths.rs +++ b/tvix/cli/src/known_paths.rs @@ -18,7 +18,7 @@ use std::{ }; #[derive(Debug, PartialEq)] -pub enum PathType { +pub enum PathKind { /// A literal derivation (`.drv`-file), and the *names* of its outputs. Derivation { output_names: BTreeSet }, @@ -29,10 +29,43 @@ pub enum PathType { Plain, } +#[derive(Debug, PartialEq)] +pub struct KnownPath { + pub path: String, + pub kind: PathKind, +} + +impl KnownPath { + fn new(path: String, kind: PathKind) -> Self { + KnownPath { path, kind } + } +} + +/// Internal struct to prevent accidental leaks of the truncated path +/// names. +#[repr(transparent)] +#[derive(Clone, Debug, Default, PartialEq, PartialOrd, Ord, Eq, Hash)] +pub struct PathName(String); + +impl From<&str> for PathName { + fn from(s: &str) -> Self { + PathName(s[..STORE_PATH_LEN].to_string()) + } +} + +/// This instance is required to pass PathName instances as needles to +/// the reference scanner. +impl AsRef<[u8]> for PathName { + fn as_ref(&self) -> &[u8] { + self.0.as_ref() + } +} + #[derive(Debug, Default)] pub struct KnownPaths { - /// All known paths, and their associated [`PathType`]. - paths: HashMap, + /// All known paths, keyed by a truncated version of their store + /// path used for reference scanning. + paths: HashMap, /// All known replacement strings for derivations. /// @@ -41,39 +74,40 @@ pub struct KnownPaths { replacements: HashMap, } -impl Index<&str> for KnownPaths { - type Output = PathType; +impl Index<&PathName> for KnownPaths { + type Output = KnownPath; - fn index(&self, index: &str) -> &Self::Output { - &self.paths[&index[..STORE_PATH_LEN]] + fn index(&self, index: &PathName) -> &Self::Output { + &self.paths[index] } } impl KnownPaths { - fn insert_path(&mut self, path: String, path_type: PathType) { - let path = path[..STORE_PATH_LEN].to_owned(); - assert_eq!(path.len(), STORE_PATH_LEN, "should match"); - match self.paths.entry(path) { + fn insert_path(&mut self, path: String, path_kind: PathKind) { + match self.paths.entry(path.as_str().into()) { hash_map::Entry::Vacant(entry) => { - entry.insert(path_type); + entry.insert(KnownPath::new(path, path_kind)); } hash_map::Entry::Occupied(mut entry) => { - match (path_type, entry.get_mut()) { + match (path_kind, &mut entry.get_mut().kind) { // These variant combinations require no "merging action". - (PathType::Plain, PathType::Plain) => (), - (PathType::Output { .. }, PathType::Output { .. }) => (), + (PathKind::Plain, PathKind::Plain) => (), + (PathKind::Output { .. }, PathKind::Output { .. }) => (), ( - PathType::Derivation { output_names: new }, - PathType::Derivation { + PathKind::Derivation { output_names: new }, + PathKind::Derivation { output_names: ref mut old, }, ) => { old.extend(new); } - _ => panic!("path '{}' inserted twice with different types", entry.key()), + _ => panic!( + "path '{}' inserted twice with different types", + entry.key().0 + ), }; } }; @@ -81,14 +115,14 @@ impl KnownPaths { /// Mark a plain path as known. pub fn plain(&mut self, path: S) { - self.insert_path(path.to_string(), PathType::Plain); + self.insert_path(path.to_string(), PathKind::Plain); } /// Mark a derivation as known. pub fn drv(&mut self, path: P, outputs: &[O]) { self.insert_path( path.to_string(), - PathType::Derivation { + PathKind::Derivation { output_names: outputs.into_iter().map(ToString::to_string).collect(), }, ); @@ -103,7 +137,7 @@ impl KnownPaths { ) { self.insert_path( output_path.to_string(), - PathType::Output { + PathKind::Output { name: name.to_string(), derivation: drv_path.to_string(), }, @@ -117,7 +151,7 @@ impl KnownPaths { } /// Create a reference scanner from the current set of known paths. - pub fn reference_scanner(&self) -> ReferenceScanner { + pub fn reference_scanner(&self) -> ReferenceScanner { let candidates = self.paths.keys().map(Clone::clone).collect(); ReferenceScanner::new(candidates) } diff --git a/tvix/cli/src/refscan.rs b/tvix/cli/src/refscan.rs index 4314e0164429..567a677ce15d 100644 --- a/tvix/cli/src/refscan.rs +++ b/tvix/cli/src/refscan.rs @@ -14,16 +14,16 @@ pub const STORE_PATH_LEN: usize = "/nix/store/00000000000000000000000000000000". /// Represents a "primed" reference scanner with an automaton that knows the set /// of store paths to scan for. -pub struct ReferenceScanner { - candidates: Vec, +pub struct ReferenceScanner> { + candidates: Vec

, searcher: TwoByteWM, matches: Vec, } -impl ReferenceScanner { +impl> ReferenceScanner

{ /// Construct a new `ReferenceScanner` that knows how to scan for the given /// candidate store paths. - pub fn new(candidates: Vec) -> Self { + pub fn new(candidates: Vec

) -> Self { let searcher = TwoByteWM::new(&candidates); ReferenceScanner { @@ -46,7 +46,7 @@ impl ReferenceScanner { } /// Finalise the reference scanner and return the resulting matches. - pub fn finalise(self) -> BTreeSet { + pub fn finalise(self) -> BTreeSet

{ self.matches .into_iter() .map(|idx| self.candidates[idx].clone()) @@ -64,7 +64,7 @@ mod tests { #[test] fn test_single_match() { let mut scanner = ReferenceScanner::new(vec![ - "/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".into(), + "/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".to_string(), ]); scanner.scan_str(HELLO_DRV); @@ -78,11 +78,11 @@ mod tests { fn test_multiple_matches() { let candidates = vec![ // these exist in the drv: - "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".into(), - "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".into(), - "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".into(), + "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".to_string(), + "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".to_string(), + "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".to_string(), // this doesn't: - "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".into(), + "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".to_string(), ]; let mut scanner = ReferenceScanner::new(candidates.clone()); -- cgit 1.4.1