about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--tvix/Cargo.lock7
-rw-r--r--tvix/Cargo.nix19
-rw-r--r--tvix/cli/Cargo.toml4
-rw-r--r--tvix/cli/src/derivation.rs19
-rw-r--r--tvix/cli/src/known_paths.rs12
-rw-r--r--tvix/cli/src/refscan.rs61
-rw-r--r--tvix/crate-hashes.json3
7 files changed, 64 insertions, 61 deletions
diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock
index dd7d71906a..131b868608 100644
--- a/tvix/Cargo.lock
+++ b/tvix/Cargo.lock
@@ -2622,7 +2622,6 @@ checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed"
 name = "tvix-cli"
 version = "0.1.0"
 dependencies = [
- "aho-corasick",
  "clap 4.1.4",
  "data-encoding",
  "dirs",
@@ -2632,6 +2631,7 @@ dependencies = [
  "ssri",
  "thiserror",
  "tvix-eval",
+ "wu-manber",
 ]
 
 [[package]]
@@ -2958,6 +2958,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd"
 
 [[package]]
+name = "wu-manber"
+version = "0.1.0"
+source = "git+https://github.com/jneem/wu-manber.git#acff1ffc303935ce55a6c36bd89b0819688b27b7"
+
+[[package]]
 name = "xml-rs"
 version = "0.8.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix
index 287e209557..1b566b5417 100644
--- a/tvix/Cargo.nix
+++ b/tvix/Cargo.nix
@@ -7809,6 +7809,10 @@ rec {
             name = "tvix-eval";
             packageId = "tvix-eval";
           }
+          {
+            name = "wu-manber";
+            packageId = "wu-manber";
+          }
         ];
 
       };
@@ -9464,6 +9468,21 @@ rec {
         ];
 
       };
+      "wu-manber" = rec {
+        crateName = "wu-manber";
+        version = "0.1.0";
+        edition = "2015";
+        workspace_member = null;
+        src = pkgs.fetchgit {
+          url = "https://github.com/jneem/wu-manber.git";
+          rev = "acff1ffc303935ce55a6c36bd89b0819688b27b7";
+          sha256 = "1r6r87746qry48rmsdk7x3l4w5wlhkny8al9mz2ic83j7vm1wivz";
+        };
+        authors = [
+          "Joe Neeman <joeneeman@gmail.com>"
+        ];
+
+      };
       "xml-rs" = rec {
         crateName = "xml-rs";
         version = "0.8.4";
diff --git a/tvix/cli/Cargo.toml b/tvix/cli/Cargo.toml
index 69b54bd299..2cda1c6c15 100644
--- a/tvix/cli/Cargo.toml
+++ b/tvix/cli/Cargo.toml
@@ -14,7 +14,9 @@ rustyline = "10.0.0"
 clap = { version = "4.0", features = ["derive", "env"] }
 dirs = "4.0.0"
 smol_str = "0.1"
-aho-corasick = "0.7"
 ssri = "7.0.0"
 data-encoding = "2.3.3"
 thiserror = "1.0.38"
+
+[dependencies.wu-manber]
+git = "https://github.com/jneem/wu-manber.git"
diff --git a/tvix/cli/src/derivation.rs b/tvix/cli/src/derivation.rs
index fce9a62859..6af3d24a24 100644
--- a/tvix/cli/src/derivation.rs
+++ b/tvix/cli/src/derivation.rs
@@ -298,10 +298,19 @@ mod derivation_builtins {
         }
 
         // Scan references in relevant attributes to detect any build-references.
-        let mut refscan = state.borrow().reference_scanner();
-        drv.arguments.iter().for_each(|s| refscan.scan_str(s));
-        drv.environment.values().for_each(|s| refscan.scan_str(s));
-        refscan.scan_str(&drv.builder);
+        let references = {
+            let state = state.borrow();
+            if state.is_empty() {
+                // skip reference scanning, create an empty result
+                Default::default()
+            } else {
+                let mut refscan = state.reference_scanner();
+                drv.arguments.iter().for_each(|s| refscan.scan_str(s));
+                drv.environment.values().for_each(|s| refscan.scan_str(s));
+                refscan.scan_str(&drv.builder);
+                refscan.finalise()
+            }
+        };
 
         // Each output name needs to exist in the environment, at this
         // point initialised as an empty string because that is the
@@ -317,7 +326,7 @@ mod derivation_builtins {
         }
 
         let mut known_paths = state.borrow_mut();
-        populate_inputs(&mut drv, &known_paths, refscan.finalise());
+        populate_inputs(&mut drv, &known_paths, references);
 
         // At this point, derivation fields are fully populated from
         // eval data structures.
diff --git a/tvix/cli/src/known_paths.rs b/tvix/cli/src/known_paths.rs
index 165bc3ea41..69651d4180 100644
--- a/tvix/cli/src/known_paths.rs
+++ b/tvix/cli/src/known_paths.rs
@@ -11,7 +11,7 @@
 //! Please see //tvix/eval/docs/build-references.md for more
 //! information.
 
-use crate::refscan::ReferenceScanner;
+use crate::refscan::{ReferenceScanner, STORE_PATH_LEN};
 use std::{
     collections::{hash_map, BTreeSet, HashMap},
     ops::Index,
@@ -45,12 +45,14 @@ impl Index<&str> for KnownPaths {
     type Output = PathType;
 
     fn index(&self, index: &str) -> &Self::Output {
-        &self.paths[index]
+        &self.paths[&index[..STORE_PATH_LEN]]
     }
 }
 
 impl KnownPaths {
     fn insert_path(&mut self, path: String, path_type: PathType) {
+        let path = path[..STORE_PATH_LEN].to_owned();
+        assert_eq!(path.len(), STORE_PATH_LEN, "should match");
         match self.paths.entry(path) {
             hash_map::Entry::Vacant(entry) => {
                 entry.insert(path_type);
@@ -108,6 +110,12 @@ impl KnownPaths {
         );
     }
 
+    /// Checks whether there are any known paths. If not, a reference
+    /// scanner can not be created.
+    pub fn is_empty(&self) -> bool {
+        self.paths.is_empty()
+    }
+
     /// Create a reference scanner from the current set of known paths.
     pub fn reference_scanner(&self) -> ReferenceScanner {
         let candidates = self.paths.keys().map(Clone::clone).collect();
diff --git a/tvix/cli/src/refscan.rs b/tvix/cli/src/refscan.rs
index 74110e1088..4314e01644 100644
--- a/tvix/cli/src/refscan.rs
+++ b/tvix/cli/src/refscan.rs
@@ -7,15 +7,16 @@
 //! The scanner itself is an Aho-Corasick automaton, using the `aho-corasick`
 //! crate.
 
-use aho_corasick::AhoCorasick;
 use std::collections::BTreeSet;
-use std::io;
+use wu_manber::TwoByteWM;
+
+pub const STORE_PATH_LEN: usize = "/nix/store/00000000000000000000000000000000".len();
 
 /// Represents a "primed" reference scanner with an automaton that knows the set
 /// of store paths to scan for.
 pub struct ReferenceScanner {
     candidates: Vec<String>,
-    searcher: AhoCorasick,
+    searcher: TwoByteWM,
     matches: Vec<usize>,
 }
 
@@ -23,7 +24,7 @@ impl ReferenceScanner {
     /// Construct a new `ReferenceScanner` that knows how to scan for the given
     /// candidate store paths.
     pub fn new(candidates: Vec<String>) -> Self {
-        let searcher = AhoCorasick::new_auto_configured(&candidates);
+        let searcher = TwoByteWM::new(&candidates);
 
         ReferenceScanner {
             searcher,
@@ -34,25 +35,14 @@ impl ReferenceScanner {
 
     /// Scan the given string for all non-overlapping matches and collect them
     /// in the scanner.
-    pub fn scan_str<H: AsRef<[u8]>>(&mut self, haystack: H) {
-        for m in self.searcher.find_iter(&haystack) {
-            self.matches.push(m.pattern());
+    pub fn scan_str(&mut self, haystack: &str) {
+        if haystack.len() < STORE_PATH_LEN {
+            return;
         }
-    }
 
-    /// Scan the given reader for all non-overlapping matches, and collect them
-    /// in the scanner. On read failures, this method aborts and returns an
-    /// error to the caller.
-    ///
-    /// Please note that the internal machinery has its own buffering mechanism,
-    /// and where possible the given reader should be unbuffered. See
-    /// [`AhoCorasick::stream_find_iter`] for details on this.
-    pub fn scan_stream<R: io::Read>(&mut self, stream: R) -> io::Result<()> {
-        for m in self.searcher.stream_find_iter(stream) {
-            self.matches.push(m?.pattern());
+        for m in self.searcher.find(&haystack) {
+            self.matches.push(m.pat_idx);
         }
-
-        Ok(())
     }
 
     /// Finalise the reference scanner and return the resulting matches.
@@ -72,13 +62,6 @@ mod tests {
     const HELLO_DRV: &'static str = r#"Derive([("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1","","")],[("/nix/store/6z1jfnqqgyqr221zgbpm30v91yfj3r45-bash-5.1-p16.drv",["out"]),("/nix/store/ap9g09fxbicj836zm88d56dn3ff4clxl-stdenv-linux.drv",["out"]),("/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",["out"])],["/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs",""),("builder","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash"),("cmakeFlags",""),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck","1"),("doInstallCheck",""),("mesonFlags",""),("name","hello-2.12.1"),("nativeBuildInputs",""),("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1"),("outputs","out"),("patches",""),("pname","hello"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/pa10z4ngm0g83kx9mssrqzz30s84vq7k-hello-2.12.1.tar.gz"),("stdenv","/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux"),("strictDeps",""),("system","x86_64-linux"),("version","2.12.1")])"#;
 
     #[test]
-    fn test_empty() {
-        let mut scanner = ReferenceScanner::new(vec![]);
-        scanner.scan_str("hello world");
-        assert!(scanner.finalise().is_empty());
-    }
-
-    #[test]
     fn test_single_match() {
         let mut scanner = ReferenceScanner::new(vec![
             "/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".into(),
@@ -112,28 +95,4 @@ mod tests {
             assert!(result.contains(c));
         }
     }
-
-    #[test]
-    fn test_multiple_stream() {
-        let candidates = vec![
-            // these exist in the drv:
-            "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".into(),
-            "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".into(),
-            "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".into(),
-            // this doesn't:
-            "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".into(),
-        ];
-
-        let mut scanner = ReferenceScanner::new(candidates.clone());
-        scanner
-            .scan_stream(HELLO_DRV.as_bytes())
-            .expect("scanning should succeed");
-
-        let result = scanner.finalise();
-        assert_eq!(result.len(), 3);
-
-        for c in candidates[..3].iter() {
-            assert!(result.contains(c));
-        }
-    }
 }
diff --git a/tvix/crate-hashes.json b/tvix/crate-hashes.json
index 5b07633342..2528588b18 100644
--- a/tvix/crate-hashes.json
+++ b/tvix/crate-hashes.json
@@ -1,4 +1,5 @@
 {
   "test-generator 0.3.0 (git+https://github.com/JamesGuthrie/test-generator.git?rev=82e799979980962aec1aa324ec6e0e4cad781f41#82e799979980962aec1aa324ec6e0e4cad781f41)": "08brp3qqa55hijc7xby3lam2cc84hvx1zzfqv6lj7smlczh8k32y",
-  "tonic-mock 0.1.0 (git+https://github.com/brainrake/tonic-mock?branch=bump-dependencies#ec1a15510875de99d709d684190db5d9beab175e)": "0lwa03hpp0mxa6aa1zv5w68k61y4hccfm0q2ykyq392fwal8vb50"
+  "tonic-mock 0.1.0 (git+https://github.com/brainrake/tonic-mock?branch=bump-dependencies#ec1a15510875de99d709d684190db5d9beab175e)": "0lwa03hpp0mxa6aa1zv5w68k61y4hccfm0q2ykyq392fwal8vb50",
+  "wu-manber 0.1.0 (git+https://github.com/jneem/wu-manber.git#acff1ffc303935ce55a6c36bd89b0819688b27b7)": "1r6r87746qry48rmsdk7x3l4w5wlhkny8al9mz2ic83j7vm1wivz"
 }
\ No newline at end of file