about summary refs log tree commit diff
path: root/tvix/cli/src
diff options
context:
space:
mode:
Diffstat (limited to 'tvix/cli/src')
-rw-r--r--tvix/cli/src/derivation.rs19
-rw-r--r--tvix/cli/src/known_paths.rs12
-rw-r--r--tvix/cli/src/refscan.rs61
3 files changed, 34 insertions, 58 deletions
diff --git a/tvix/cli/src/derivation.rs b/tvix/cli/src/derivation.rs
index fce9a628598d..6af3d24a2487 100644
--- a/tvix/cli/src/derivation.rs
+++ b/tvix/cli/src/derivation.rs
@@ -298,10 +298,19 @@ mod derivation_builtins {
         }
 
         // Scan references in relevant attributes to detect any build-references.
-        let mut refscan = state.borrow().reference_scanner();
-        drv.arguments.iter().for_each(|s| refscan.scan_str(s));
-        drv.environment.values().for_each(|s| refscan.scan_str(s));
-        refscan.scan_str(&drv.builder);
+        let references = {
+            let state = state.borrow();
+            if state.is_empty() {
+                // skip reference scanning, create an empty result
+                Default::default()
+            } else {
+                let mut refscan = state.reference_scanner();
+                drv.arguments.iter().for_each(|s| refscan.scan_str(s));
+                drv.environment.values().for_each(|s| refscan.scan_str(s));
+                refscan.scan_str(&drv.builder);
+                refscan.finalise()
+            }
+        };
 
         // Each output name needs to exist in the environment, at this
         // point initialised as an empty string because that is the
@@ -317,7 +326,7 @@ mod derivation_builtins {
         }
 
         let mut known_paths = state.borrow_mut();
-        populate_inputs(&mut drv, &known_paths, refscan.finalise());
+        populate_inputs(&mut drv, &known_paths, references);
 
         // At this point, derivation fields are fully populated from
         // eval data structures.
diff --git a/tvix/cli/src/known_paths.rs b/tvix/cli/src/known_paths.rs
index 165bc3ea4161..69651d418069 100644
--- a/tvix/cli/src/known_paths.rs
+++ b/tvix/cli/src/known_paths.rs
@@ -11,7 +11,7 @@
 //! Please see //tvix/eval/docs/build-references.md for more
 //! information.
 
-use crate::refscan::ReferenceScanner;
+use crate::refscan::{ReferenceScanner, STORE_PATH_LEN};
 use std::{
     collections::{hash_map, BTreeSet, HashMap},
     ops::Index,
@@ -45,12 +45,14 @@ impl Index<&str> for KnownPaths {
     type Output = PathType;
 
     fn index(&self, index: &str) -> &Self::Output {
-        &self.paths[index]
+        &self.paths[&index[..STORE_PATH_LEN]]
     }
 }
 
 impl KnownPaths {
     fn insert_path(&mut self, path: String, path_type: PathType) {
+        let path = path[..STORE_PATH_LEN].to_owned();
+        assert_eq!(path.len(), STORE_PATH_LEN, "should match");
         match self.paths.entry(path) {
             hash_map::Entry::Vacant(entry) => {
                 entry.insert(path_type);
@@ -108,6 +110,12 @@ impl KnownPaths {
         );
     }
 
+    /// Checks whether there are any known paths. If not, a reference
+    /// scanner can not be created.
+    pub fn is_empty(&self) -> bool {
+        self.paths.is_empty()
+    }
+
     /// Create a reference scanner from the current set of known paths.
     pub fn reference_scanner(&self) -> ReferenceScanner {
         let candidates = self.paths.keys().map(Clone::clone).collect();
diff --git a/tvix/cli/src/refscan.rs b/tvix/cli/src/refscan.rs
index 74110e108809..4314e0164429 100644
--- a/tvix/cli/src/refscan.rs
+++ b/tvix/cli/src/refscan.rs
@@ -7,15 +7,16 @@
 //! The scanner itself is an Aho-Corasick automaton, using the `aho-corasick`
 //! crate.
 
-use aho_corasick::AhoCorasick;
 use std::collections::BTreeSet;
-use std::io;
+use wu_manber::TwoByteWM;
+
+pub const STORE_PATH_LEN: usize = "/nix/store/00000000000000000000000000000000".len();
 
 /// Represents a "primed" reference scanner with an automaton that knows the set
 /// of store paths to scan for.
 pub struct ReferenceScanner {
     candidates: Vec<String>,
-    searcher: AhoCorasick,
+    searcher: TwoByteWM,
     matches: Vec<usize>,
 }
 
@@ -23,7 +24,7 @@ impl ReferenceScanner {
     /// Construct a new `ReferenceScanner` that knows how to scan for the given
     /// candidate store paths.
     pub fn new(candidates: Vec<String>) -> Self {
-        let searcher = AhoCorasick::new_auto_configured(&candidates);
+        let searcher = TwoByteWM::new(&candidates);
 
         ReferenceScanner {
             searcher,
@@ -34,25 +35,14 @@ impl ReferenceScanner {
 
     /// Scan the given string for all non-overlapping matches and collect them
     /// in the scanner.
-    pub fn scan_str<H: AsRef<[u8]>>(&mut self, haystack: H) {
-        for m in self.searcher.find_iter(&haystack) {
-            self.matches.push(m.pattern());
+    pub fn scan_str(&mut self, haystack: &str) {
+        if haystack.len() < STORE_PATH_LEN {
+            return;
         }
-    }
 
-    /// Scan the given reader for all non-overlapping matches, and collect them
-    /// in the scanner. On read failures, this method aborts and returns an
-    /// error to the caller.
-    ///
-    /// Please note that the internal machinery has its own buffering mechanism,
-    /// and where possible the given reader should be unbuffered. See
-    /// [`AhoCorasick::stream_find_iter`] for details on this.
-    pub fn scan_stream<R: io::Read>(&mut self, stream: R) -> io::Result<()> {
-        for m in self.searcher.stream_find_iter(stream) {
-            self.matches.push(m?.pattern());
+        for m in self.searcher.find(&haystack) {
+            self.matches.push(m.pat_idx);
         }
-
-        Ok(())
     }
 
     /// Finalise the reference scanner and return the resulting matches.
@@ -72,13 +62,6 @@ mod tests {
     const HELLO_DRV: &'static str = r#"Derive([("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1","","")],[("/nix/store/6z1jfnqqgyqr221zgbpm30v91yfj3r45-bash-5.1-p16.drv",["out"]),("/nix/store/ap9g09fxbicj836zm88d56dn3ff4clxl-stdenv-linux.drv",["out"]),("/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",["out"])],["/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs",""),("builder","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash"),("cmakeFlags",""),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck","1"),("doInstallCheck",""),("mesonFlags",""),("name","hello-2.12.1"),("nativeBuildInputs",""),("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1"),("outputs","out"),("patches",""),("pname","hello"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/pa10z4ngm0g83kx9mssrqzz30s84vq7k-hello-2.12.1.tar.gz"),("stdenv","/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux"),("strictDeps",""),("system","x86_64-linux"),("version","2.12.1")])"#;
 
     #[test]
-    fn test_empty() {
-        let mut scanner = ReferenceScanner::new(vec![]);
-        scanner.scan_str("hello world");
-        assert!(scanner.finalise().is_empty());
-    }
-
-    #[test]
     fn test_single_match() {
         let mut scanner = ReferenceScanner::new(vec![
             "/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".into(),
@@ -112,28 +95,4 @@ mod tests {
             assert!(result.contains(c));
         }
     }
-
-    #[test]
-    fn test_multiple_stream() {
-        let candidates = vec![
-            // these exist in the drv:
-            "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".into(),
-            "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".into(),
-            "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".into(),
-            // this doesn't:
-            "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".into(),
-        ];
-
-        let mut scanner = ReferenceScanner::new(candidates.clone());
-        scanner
-            .scan_stream(HELLO_DRV.as_bytes())
-            .expect("scanning should succeed");
-
-        let result = scanner.finalise();
-        assert_eq!(result.len(), 3);
-
-        for c in candidates[..3].iter() {
-            assert!(result.contains(c));
-        }
-    }
 }