about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--tvix/Cargo.lock1
-rw-r--r--tvix/Cargo.nix4
-rw-r--r--tvix/cli/Cargo.toml1
-rw-r--r--tvix/cli/src/main.rs1
-rw-r--r--tvix/cli/src/refscan.rs97
5 files changed, 104 insertions, 0 deletions
diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock
index 197bee75e6c8..a5ea4b82e85c 100644
--- a/tvix/Cargo.lock
+++ b/tvix/Cargo.lock
@@ -2177,6 +2177,7 @@ checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642"
 name = "tvix-cli"
 version = "0.1.0"
 dependencies = [
+ "aho-corasick",
  "clap 4.0.32",
  "dirs",
  "rustyline",
diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix
index 9f13a1607336..0f2b6bdd98b4 100644
--- a/tvix/Cargo.nix
+++ b/tvix/Cargo.nix
@@ -6478,6 +6478,10 @@ rec {
           else ./cli;
         dependencies = [
           {
+            name = "aho-corasick";
+            packageId = "aho-corasick";
+          }
+          {
             name = "clap";
             packageId = "clap 4.0.32";
             features = [ "derive" "env" ];
diff --git a/tvix/cli/Cargo.toml b/tvix/cli/Cargo.toml
index 099002353dc2..f3324f2611ea 100644
--- a/tvix/cli/Cargo.toml
+++ b/tvix/cli/Cargo.toml
@@ -13,3 +13,4 @@ rustyline = "10.0.0"
 clap = { version = "4.0", features = ["derive", "env"] }
 dirs = "4.0.0"
 smol_str = "0.1"
+aho-corasick = "0.7"
diff --git a/tvix/cli/src/main.rs b/tvix/cli/src/main.rs
index eec4d8bbb2ac..0f837b346c2d 100644
--- a/tvix/cli/src/main.rs
+++ b/tvix/cli/src/main.rs
@@ -1,4 +1,5 @@
 mod nix_compat;
+mod refscan;
 
 use std::{fs, path::PathBuf};
 
diff --git a/tvix/cli/src/refscan.rs b/tvix/cli/src/refscan.rs
new file mode 100644
index 000000000000..76857142e87e
--- /dev/null
+++ b/tvix/cli/src/refscan.rs
@@ -0,0 +1,97 @@
+//! Simple scanner for non-overlapping, known references of Nix store paths in a
+//! given string.
+//!
+//! This is used for determining build references (see
+//! //tvix/eval/docs/build-references.md for more details).
+//!
+//! The scanner itself is an Aho-Corasick automaton, using the `aho-corasick`
+//! crate.
+
+use aho_corasick::AhoCorasick;
+use std::collections::BTreeSet;
+
+/// Represents a "primed" reference scanner with an automaton that knows the set
+/// of store paths to scan for.
+pub struct ReferenceScanner<'c, 's> {
+    candidates: &'c [&'s str],
+    searcher: AhoCorasick,
+    matches: BTreeSet<&'s str>,
+}
+
+impl<'c, 's> ReferenceScanner<'c, 's> {
+    /// Construct a new `ReferenceScanner` that knows how to scan for the given
+    /// candidate store paths.
+    pub fn new(candidates: &'c [&'s str]) -> Self {
+        let searcher = AhoCorasick::new_auto_configured(candidates);
+
+        ReferenceScanner {
+            searcher,
+            candidates,
+            matches: Default::default(),
+        }
+    }
+
+    /// Scan the given string for all non-overlapping matches and collect them
+    /// in the scanner.
+    pub fn scan_str<H: AsRef<[u8]>>(&mut self, haystack: H) {
+        for m in self.searcher.find_iter(&haystack) {
+            let needle = self.candidates[m.pattern()];
+            self.matches.insert(needle);
+        }
+    }
+
+    /// Finalise the reference scanner and return the resulting matches.
+    pub fn finalise(self) -> BTreeSet<&'s str> {
+        self.matches
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // The actual derivation of `nixpkgs.hello`.
+    const HELLO_DRV: &'static str = r#"Derive([("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1","","")],[("/nix/store/6z1jfnqqgyqr221zgbpm30v91yfj3r45-bash-5.1-p16.drv",["out"]),("/nix/store/ap9g09fxbicj836zm88d56dn3ff4clxl-stdenv-linux.drv",["out"]),("/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",["out"])],["/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs",""),("builder","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash"),("cmakeFlags",""),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck","1"),("doInstallCheck",""),("mesonFlags",""),("name","hello-2.12.1"),("nativeBuildInputs",""),("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1"),("outputs","out"),("patches",""),("pname","hello"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/pa10z4ngm0g83kx9mssrqzz30s84vq7k-hello-2.12.1.tar.gz"),("stdenv","/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux"),("strictDeps",""),("system","x86_64-linux"),("version","2.12.1")])"#;
+
+    #[test]
+    fn test_empty() {
+        let mut scanner = ReferenceScanner::new(&[]);
+        scanner.scan_str("hello world");
+        assert!(scanner.finalise().is_empty());
+    }
+
+    #[test]
+    fn test_single_match() {
+        let mut scanner =
+            ReferenceScanner::new(&["/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16"]);
+        scanner.scan_str(HELLO_DRV);
+
+        let result = scanner.finalise();
+
+        assert_eq!(result.len(), 1);
+        assert!(result.contains("/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16"));
+    }
+
+    #[test]
+    fn test_multiple_matches() {
+        let candidates = &[
+            // these exist in the drv:
+            "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1",
+            "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",
+            "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux",
+            // this doesn't:
+            "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv",
+        ];
+
+        let mut scanner = ReferenceScanner::new(candidates);
+        scanner.scan_str(HELLO_DRV);
+
+        let result = scanner.finalise();
+
+        assert_eq!(result.len(), 3);
+
+        for c in candidates[..3].iter() {
+            assert!(result.contains(c));
+        }
+    }
+}