about summary refs log tree commit diff
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2023-02-02T21·20+0300
committertazjin <tazjin@tvl.su>2023-02-02T23·37+0000
commit38e8c2e95931673deb7cb939a05ac9bdaf305340 (patch)
tree42acefda078647e1b8dfcd9173b43a42beff85ee
parente6235e2932cc76b18fe8cc8acf209c5fe2e8b79f (diff)
fix(tvix/cli): keep tracking full paths in known_paths r/5827
We need to distinguish explicitly between the paths used for the
scanner, and the paths that populate the derivation inputs. The full
paths must be accessible from the result of the refscanner to populate
drv fields correctly.

This was previously hidden by debug changes that masked actual IO
operations with no-ops.

Change-Id: I037af6e6bbe2b573034d695f8779bee1b56bc125
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8022
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
-rw-r--r--tvix/cli/src/derivation.rs28
-rw-r--r--tvix/cli/src/known_paths.rs78
-rw-r--r--tvix/cli/src/refscan.rs20
3 files changed, 84 insertions, 42 deletions
diff --git a/tvix/cli/src/derivation.rs b/tvix/cli/src/derivation.rs
index 6af3d24a24..88c5e52296 100644
--- a/tvix/cli/src/derivation.rs
+++ b/tvix/cli/src/derivation.rs
@@ -8,7 +8,7 @@ use tvix_eval::builtin_macros::builtins;
 use tvix_eval::{AddContext, CoercionKind, ErrorKind, NixAttrs, NixList, Value, VM};
 
 use crate::errors::Error;
-use crate::known_paths::{KnownPaths, PathType};
+use crate::known_paths::{KnownPaths, PathKind, PathName};
 
 // Constants used for strangely named fields in derivation inputs.
 const STRUCTURED_ATTRS: &str = "__structuredAttrs";
@@ -41,18 +41,19 @@ fn populate_outputs(vm: &mut VM, drv: &mut Derivation, outputs: NixList) -> Resu
 
 /// Populate the inputs of a derivation from the build references
 /// found when scanning the derivation's parameters.
-fn populate_inputs<I: IntoIterator<Item = String>>(
+fn populate_inputs<I: IntoIterator<Item = PathName>>(
     drv: &mut Derivation,
     known_paths: &KnownPaths,
     references: I,
 ) {
     for reference in references.into_iter() {
-        match &known_paths[&reference] {
-            PathType::Plain => {
-                drv.input_sources.insert(reference.to_string());
+        let reference = &known_paths[&reference];
+        match &reference.kind {
+            PathKind::Plain => {
+                drv.input_sources.insert(reference.path.clone());
             }
 
-            PathType::Output { name, derivation } => {
+            PathKind::Output { name, derivation } => {
                 match drv.input_derivations.entry(derivation.clone()) {
                     btree_map::Entry::Vacant(entry) => {
                         entry.insert(BTreeSet::from([name.clone()]));
@@ -64,8 +65,8 @@ fn populate_inputs<I: IntoIterator<Item = String>>(
                 }
             }
 
-            PathType::Derivation { output_names } => {
-                match drv.input_derivations.entry(reference.to_string()) {
+            PathKind::Derivation { output_names } => {
+                match drv.input_derivations.entry(reference.path.clone()) {
                     btree_map::Entry::Vacant(entry) => {
                         entry.insert(output_names.clone());
                     }
@@ -389,7 +390,14 @@ mod derivation_builtins {
 
         let mut refscan = state.borrow().reference_scanner();
         refscan.scan_str(content.as_str());
-        let refs = refscan.finalise();
+        let refs = {
+            let paths = state.borrow();
+            refscan
+                .finalise()
+                .into_iter()
+                .map(|path| paths[&path].path.to_string())
+                .collect::<Vec<_>>()
+        };
 
         // TODO: fail on derivation references (only "plain" is allowed here)
 
@@ -491,7 +499,7 @@ mod tests {
             "/nix/store/aqffiyqx602lbam7n1zsaz3yrh6v08pc-bar.drv",
         );
 
-        let inputs: Vec<String> = vec![
+        let inputs = vec![
             "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-foo".into(),
             "/nix/store/aqffiyqx602lbam7n1zsaz3yrh6v08pc-bar.drv".into(),
             "/nix/store/zvpskvjwi72fjxg0vzq822sfvq20mq4l-bar".into(),
diff --git a/tvix/cli/src/known_paths.rs b/tvix/cli/src/known_paths.rs
index 69651d4180..251366b53c 100644
--- a/tvix/cli/src/known_paths.rs
+++ b/tvix/cli/src/known_paths.rs
@@ -18,7 +18,7 @@ use std::{
 };
 
 #[derive(Debug, PartialEq)]
-pub enum PathType {
+pub enum PathKind {
     /// A literal derivation (`.drv`-file), and the *names* of its outputs.
     Derivation { output_names: BTreeSet<String> },
 
@@ -29,10 +29,43 @@ pub enum PathType {
     Plain,
 }
 
+#[derive(Debug, PartialEq)]
+pub struct KnownPath {
+    pub path: String,
+    pub kind: PathKind,
+}
+
+impl KnownPath {
+    fn new(path: String, kind: PathKind) -> Self {
+        KnownPath { path, kind }
+    }
+}
+
+/// Internal struct to prevent accidental leaks of the truncated path
+/// names.
+#[repr(transparent)]
+#[derive(Clone, Debug, Default, PartialEq, PartialOrd, Ord, Eq, Hash)]
+pub struct PathName(String);
+
+impl From<&str> for PathName {
+    fn from(s: &str) -> Self {
+        PathName(s[..STORE_PATH_LEN].to_string())
+    }
+}
+
+/// This instance is required to pass PathName instances as needles to
+/// the reference scanner.
+impl AsRef<[u8]> for PathName {
+    fn as_ref(&self) -> &[u8] {
+        self.0.as_ref()
+    }
+}
+
 #[derive(Debug, Default)]
 pub struct KnownPaths {
-    /// All known paths, and their associated [`PathType`].
-    paths: HashMap<String, PathType>,
+    /// All known paths, keyed by a truncated version of their store
+    /// path used for reference scanning.
+    paths: HashMap<PathName, KnownPath>,
 
     /// All known replacement strings for derivations.
     ///
@@ -41,39 +74,40 @@ pub struct KnownPaths {
     replacements: HashMap<String, String>,
 }
 
-impl Index<&str> for KnownPaths {
-    type Output = PathType;
+impl Index<&PathName> for KnownPaths {
+    type Output = KnownPath;
 
-    fn index(&self, index: &str) -> &Self::Output {
-        &self.paths[&index[..STORE_PATH_LEN]]
+    fn index(&self, index: &PathName) -> &Self::Output {
+        &self.paths[index]
     }
 }
 
 impl KnownPaths {
-    fn insert_path(&mut self, path: String, path_type: PathType) {
-        let path = path[..STORE_PATH_LEN].to_owned();
-        assert_eq!(path.len(), STORE_PATH_LEN, "should match");
-        match self.paths.entry(path) {
+    fn insert_path(&mut self, path: String, path_kind: PathKind) {
+        match self.paths.entry(path.as_str().into()) {
             hash_map::Entry::Vacant(entry) => {
-                entry.insert(path_type);
+                entry.insert(KnownPath::new(path, path_kind));
             }
 
             hash_map::Entry::Occupied(mut entry) => {
-                match (path_type, entry.get_mut()) {
+                match (path_kind, &mut entry.get_mut().kind) {
                     // These variant combinations require no "merging action".
-                    (PathType::Plain, PathType::Plain) => (),
-                    (PathType::Output { .. }, PathType::Output { .. }) => (),
+                    (PathKind::Plain, PathKind::Plain) => (),
+                    (PathKind::Output { .. }, PathKind::Output { .. }) => (),
 
                     (
-                        PathType::Derivation { output_names: new },
-                        PathType::Derivation {
+                        PathKind::Derivation { output_names: new },
+                        PathKind::Derivation {
                             output_names: ref mut old,
                         },
                     ) => {
                         old.extend(new);
                     }
 
-                    _ => panic!("path '{}' inserted twice with different types", entry.key()),
+                    _ => panic!(
+                        "path '{}' inserted twice with different types",
+                        entry.key().0
+                    ),
                 };
             }
         };
@@ -81,14 +115,14 @@ impl KnownPaths {
 
     /// Mark a plain path as known.
     pub fn plain<S: ToString>(&mut self, path: S) {
-        self.insert_path(path.to_string(), PathType::Plain);
+        self.insert_path(path.to_string(), PathKind::Plain);
     }
 
     /// Mark a derivation as known.
     pub fn drv<P: ToString, O: ToString>(&mut self, path: P, outputs: &[O]) {
         self.insert_path(
             path.to_string(),
-            PathType::Derivation {
+            PathKind::Derivation {
                 output_names: outputs.into_iter().map(ToString::to_string).collect(),
             },
         );
@@ -103,7 +137,7 @@ impl KnownPaths {
     ) {
         self.insert_path(
             output_path.to_string(),
-            PathType::Output {
+            PathKind::Output {
                 name: name.to_string(),
                 derivation: drv_path.to_string(),
             },
@@ -117,7 +151,7 @@ impl KnownPaths {
     }
 
     /// Create a reference scanner from the current set of known paths.
-    pub fn reference_scanner(&self) -> ReferenceScanner {
+    pub fn reference_scanner(&self) -> ReferenceScanner<PathName> {
         let candidates = self.paths.keys().map(Clone::clone).collect();
         ReferenceScanner::new(candidates)
     }
diff --git a/tvix/cli/src/refscan.rs b/tvix/cli/src/refscan.rs
index 4314e01644..567a677ce1 100644
--- a/tvix/cli/src/refscan.rs
+++ b/tvix/cli/src/refscan.rs
@@ -14,16 +14,16 @@ pub const STORE_PATH_LEN: usize = "/nix/store/00000000000000000000000000000000".
 
 /// Represents a "primed" reference scanner with an automaton that knows the set
 /// of store paths to scan for.
-pub struct ReferenceScanner {
-    candidates: Vec<String>,
+pub struct ReferenceScanner<P: Ord + AsRef<[u8]>> {
+    candidates: Vec<P>,
     searcher: TwoByteWM,
     matches: Vec<usize>,
 }
 
-impl ReferenceScanner {
+impl<P: Clone + Ord + AsRef<[u8]>> ReferenceScanner<P> {
     /// Construct a new `ReferenceScanner` that knows how to scan for the given
     /// candidate store paths.
-    pub fn new(candidates: Vec<String>) -> Self {
+    pub fn new(candidates: Vec<P>) -> Self {
         let searcher = TwoByteWM::new(&candidates);
 
         ReferenceScanner {
@@ -46,7 +46,7 @@ impl ReferenceScanner {
     }
 
     /// Finalise the reference scanner and return the resulting matches.
-    pub fn finalise(self) -> BTreeSet<String> {
+    pub fn finalise(self) -> BTreeSet<P> {
         self.matches
             .into_iter()
             .map(|idx| self.candidates[idx].clone())
@@ -64,7 +64,7 @@ mod tests {
     #[test]
     fn test_single_match() {
         let mut scanner = ReferenceScanner::new(vec![
-            "/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".into(),
+            "/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".to_string(),
         ]);
         scanner.scan_str(HELLO_DRV);
 
@@ -78,11 +78,11 @@ mod tests {
     fn test_multiple_matches() {
         let candidates = vec![
             // these exist in the drv:
-            "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".into(),
-            "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".into(),
-            "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".into(),
+            "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".to_string(),
+            "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".to_string(),
+            "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".to_string(),
             // this doesn't:
-            "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".into(),
+            "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".to_string(),
         ];
 
         let mut scanner = ReferenceScanner::new(candidates.clone());