diff options
-rw-r--r-- | tvix/Cargo.lock | 1 | ||||
-rw-r--r-- | tvix/Cargo.nix | 4 | ||||
-rw-r--r-- | tvix/cli/Cargo.toml | 1 | ||||
-rw-r--r-- | tvix/cli/src/main.rs | 1 | ||||
-rw-r--r-- | tvix/cli/src/refscan.rs | 97 |
5 files changed, 104 insertions, 0 deletions
diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index 197bee75e6c8..a5ea4b82e85c 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -2177,6 +2177,7 @@ checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" name = "tvix-cli" version = "0.1.0" dependencies = [ + "aho-corasick", "clap 4.0.32", "dirs", "rustyline", diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index 9f13a1607336..0f2b6bdd98b4 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -6478,6 +6478,10 @@ rec { else ./cli; dependencies = [ { + name = "aho-corasick"; + packageId = "aho-corasick"; + } + { name = "clap"; packageId = "clap 4.0.32"; features = [ "derive" "env" ]; diff --git a/tvix/cli/Cargo.toml b/tvix/cli/Cargo.toml index 099002353dc2..f3324f2611ea 100644 --- a/tvix/cli/Cargo.toml +++ b/tvix/cli/Cargo.toml @@ -13,3 +13,4 @@ rustyline = "10.0.0" clap = { version = "4.0", features = ["derive", "env"] } dirs = "4.0.0" smol_str = "0.1" +aho-corasick = "0.7" diff --git a/tvix/cli/src/main.rs b/tvix/cli/src/main.rs index eec4d8bbb2ac..0f837b346c2d 100644 --- a/tvix/cli/src/main.rs +++ b/tvix/cli/src/main.rs @@ -1,4 +1,5 @@ mod nix_compat; +mod refscan; use std::{fs, path::PathBuf}; diff --git a/tvix/cli/src/refscan.rs b/tvix/cli/src/refscan.rs new file mode 100644 index 000000000000..76857142e87e --- /dev/null +++ b/tvix/cli/src/refscan.rs @@ -0,0 +1,97 @@ +//! Simple scanner for non-overlapping, known references of Nix store paths in a +//! given string. +//! +//! This is used for determining build references (see +//! //tvix/eval/docs/build-references.md for more details). +//! +//! The scanner itself is an Aho-Corasick automaton, using the `aho-corasick` +//! crate. + +use aho_corasick::AhoCorasick; +use std::collections::BTreeSet; + +/// Represents a "primed" reference scanner with an automaton that knows the set +/// of store paths to scan for. +pub struct ReferenceScanner<'c, 's> { + candidates: &'c [&'s str], + searcher: AhoCorasick, + matches: BTreeSet<&'s str>, +} + +impl<'c, 's> ReferenceScanner<'c, 's> { + /// Construct a new `ReferenceScanner` that knows how to scan for the given + /// candidate store paths. + pub fn new(candidates: &'c [&'s str]) -> Self { + let searcher = AhoCorasick::new_auto_configured(candidates); + + ReferenceScanner { + searcher, + candidates, + matches: Default::default(), + } + } + + /// Scan the given string for all non-overlapping matches and collect them + /// in the scanner. + pub fn scan_str<H: AsRef<[u8]>>(&mut self, haystack: H) { + for m in self.searcher.find_iter(&haystack) { + let needle = self.candidates[m.pattern()]; + self.matches.insert(needle); + } + } + + /// Finalise the reference scanner and return the resulting matches. + pub fn finalise(self) -> BTreeSet<&'s str> { + self.matches + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // The actual derivation of `nixpkgs.hello`. + const HELLO_DRV: &'static str = r#"Derive([("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1","","")],[("/nix/store/6z1jfnqqgyqr221zgbpm30v91yfj3r45-bash-5.1-p16.drv",["out"]),("/nix/store/ap9g09fxbicj836zm88d56dn3ff4clxl-stdenv-linux.drv",["out"]),("/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",["out"])],["/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs",""),("builder","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash"),("cmakeFlags",""),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck","1"),("doInstallCheck",""),("mesonFlags",""),("name","hello-2.12.1"),("nativeBuildInputs",""),("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1"),("outputs","out"),("patches",""),("pname","hello"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/pa10z4ngm0g83kx9mssrqzz30s84vq7k-hello-2.12.1.tar.gz"),("stdenv","/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux"),("strictDeps",""),("system","x86_64-linux"),("version","2.12.1")])"#; + + #[test] + fn test_empty() { + let mut scanner = ReferenceScanner::new(&[]); + scanner.scan_str("hello world"); + assert!(scanner.finalise().is_empty()); + } + + #[test] + fn test_single_match() { + let mut scanner = + ReferenceScanner::new(&["/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16"]); + scanner.scan_str(HELLO_DRV); + + let result = scanner.finalise(); + + assert_eq!(result.len(), 1); + assert!(result.contains("/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16")); + } + + #[test] + fn test_multiple_matches() { + let candidates = &[ + // these exist in the drv: + "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1", + "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv", + "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux", + // this doesn't: + "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv", + ]; + + let mut scanner = ReferenceScanner::new(candidates); + scanner.scan_str(HELLO_DRV); + + let result = scanner.finalise(); + + assert_eq!(result.len(), 3); + + for c in candidates[..3].iter() { + assert!(result.contains(c)); + } + } +} |