about summary refs log tree commit diff
path: root/tvix/cli/src/refscan.rs
diff options
context:
space:
mode:
Diffstat (limited to 'tvix/cli/src/refscan.rs')
-rw-r--r--tvix/cli/src/refscan.rs61
1 files changed, 10 insertions, 51 deletions
diff --git a/tvix/cli/src/refscan.rs b/tvix/cli/src/refscan.rs
index 74110e108809..4314e0164429 100644
--- a/tvix/cli/src/refscan.rs
+++ b/tvix/cli/src/refscan.rs
@@ -7,15 +7,16 @@
 //! The scanner itself is an Aho-Corasick automaton, using the `aho-corasick`
 //! crate.
 
-use aho_corasick::AhoCorasick;
 use std::collections::BTreeSet;
-use std::io;
+use wu_manber::TwoByteWM;
+
+pub const STORE_PATH_LEN: usize = "/nix/store/00000000000000000000000000000000".len();
 
 /// Represents a "primed" reference scanner with an automaton that knows the set
 /// of store paths to scan for.
 pub struct ReferenceScanner {
     candidates: Vec<String>,
-    searcher: AhoCorasick,
+    searcher: TwoByteWM,
     matches: Vec<usize>,
 }
 
@@ -23,7 +24,7 @@ impl ReferenceScanner {
     /// Construct a new `ReferenceScanner` that knows how to scan for the given
     /// candidate store paths.
     pub fn new(candidates: Vec<String>) -> Self {
-        let searcher = AhoCorasick::new_auto_configured(&candidates);
+        let searcher = TwoByteWM::new(&candidates);
 
         ReferenceScanner {
             searcher,
@@ -34,25 +35,14 @@ impl ReferenceScanner {
 
     /// Scan the given string for all non-overlapping matches and collect them
     /// in the scanner.
-    pub fn scan_str<H: AsRef<[u8]>>(&mut self, haystack: H) {
-        for m in self.searcher.find_iter(&haystack) {
-            self.matches.push(m.pattern());
+    pub fn scan_str(&mut self, haystack: &str) {
+        if haystack.len() < STORE_PATH_LEN {
+            return;
         }
-    }
 
-    /// Scan the given reader for all non-overlapping matches, and collect them
-    /// in the scanner. On read failures, this method aborts and returns an
-    /// error to the caller.
-    ///
-    /// Please note that the internal machinery has its own buffering mechanism,
-    /// and where possible the given reader should be unbuffered. See
-    /// [`AhoCorasick::stream_find_iter`] for details on this.
-    pub fn scan_stream<R: io::Read>(&mut self, stream: R) -> io::Result<()> {
-        for m in self.searcher.stream_find_iter(stream) {
-            self.matches.push(m?.pattern());
+        for m in self.searcher.find(&haystack) {
+            self.matches.push(m.pat_idx);
         }
-
-        Ok(())
     }
 
     /// Finalise the reference scanner and return the resulting matches.
@@ -72,13 +62,6 @@ mod tests {
     const HELLO_DRV: &'static str = r#"Derive([("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1","","")],[("/nix/store/6z1jfnqqgyqr221zgbpm30v91yfj3r45-bash-5.1-p16.drv",["out"]),("/nix/store/ap9g09fxbicj836zm88d56dn3ff4clxl-stdenv-linux.drv",["out"]),("/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",["out"])],["/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs",""),("builder","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash"),("cmakeFlags",""),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck","1"),("doInstallCheck",""),("mesonFlags",""),("name","hello-2.12.1"),("nativeBuildInputs",""),("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1"),("outputs","out"),("patches",""),("pname","hello"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/pa10z4ngm0g83kx9mssrqzz30s84vq7k-hello-2.12.1.tar.gz"),("stdenv","/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux"),("strictDeps",""),("system","x86_64-linux"),("version","2.12.1")])"#;
 
     #[test]
-    fn test_empty() {
-        let mut scanner = ReferenceScanner::new(vec![]);
-        scanner.scan_str("hello world");
-        assert!(scanner.finalise().is_empty());
-    }
-
-    #[test]
     fn test_single_match() {
         let mut scanner = ReferenceScanner::new(vec![
             "/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".into(),
@@ -112,28 +95,4 @@ mod tests {
             assert!(result.contains(c));
         }
     }
-
-    #[test]
-    fn test_multiple_stream() {
-        let candidates = vec![
-            // these exist in the drv:
-            "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".into(),
-            "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".into(),
-            "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".into(),
-            // this doesn't:
-            "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".into(),
-        ];
-
-        let mut scanner = ReferenceScanner::new(candidates.clone());
-        scanner
-            .scan_stream(HELLO_DRV.as_bytes())
-            .expect("scanning should succeed");
-
-        let result = scanner.finalise();
-        assert_eq!(result.len(), 3);
-
-        for c in candidates[..3].iter() {
-            assert!(result.contains(c));
-        }
-    }
 }