diff options
Diffstat (limited to 'tvix')
-rw-r--r-- | tvix/Cargo.lock | 7 | ||||
-rw-r--r-- | tvix/Cargo.nix | 19 | ||||
-rw-r--r-- | tvix/cli/Cargo.toml | 4 | ||||
-rw-r--r-- | tvix/cli/src/derivation.rs | 19 | ||||
-rw-r--r-- | tvix/cli/src/known_paths.rs | 12 | ||||
-rw-r--r-- | tvix/cli/src/refscan.rs | 61 | ||||
-rw-r--r-- | tvix/crate-hashes.json | 3 |
7 files changed, 64 insertions, 61 deletions
diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index dd7d71906a69..131b86860805 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -2622,7 +2622,6 @@ checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" name = "tvix-cli" version = "0.1.0" dependencies = [ - "aho-corasick", "clap 4.1.4", "data-encoding", "dirs", @@ -2632,6 +2631,7 @@ dependencies = [ "ssri", "thiserror", "tvix-eval", + "wu-manber", ] [[package]] @@ -2958,6 +2958,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" [[package]] +name = "wu-manber" +version = "0.1.0" +source = "git+https://github.com/jneem/wu-manber.git#acff1ffc303935ce55a6c36bd89b0819688b27b7" + +[[package]] name = "xml-rs" version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index 287e2095576c..1b566b541703 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -7809,6 +7809,10 @@ rec { name = "tvix-eval"; packageId = "tvix-eval"; } + { + name = "wu-manber"; + packageId = "wu-manber"; + } ]; }; @@ -9464,6 +9468,21 @@ rec { ]; }; + "wu-manber" = rec { + crateName = "wu-manber"; + version = "0.1.0"; + edition = "2015"; + workspace_member = null; + src = pkgs.fetchgit { + url = "https://github.com/jneem/wu-manber.git"; + rev = "acff1ffc303935ce55a6c36bd89b0819688b27b7"; + sha256 = "1r6r87746qry48rmsdk7x3l4w5wlhkny8al9mz2ic83j7vm1wivz"; + }; + authors = [ + "Joe Neeman <joeneeman@gmail.com>" + ]; + + }; "xml-rs" = rec { crateName = "xml-rs"; version = "0.8.4"; diff --git a/tvix/cli/Cargo.toml b/tvix/cli/Cargo.toml index 69b54bd299ec..2cda1c6c15a5 100644 --- a/tvix/cli/Cargo.toml +++ b/tvix/cli/Cargo.toml @@ -14,7 +14,9 @@ rustyline = "10.0.0" clap = { version = "4.0", features = ["derive", "env"] } dirs = "4.0.0" smol_str = "0.1" -aho-corasick = "0.7" ssri = "7.0.0" data-encoding = "2.3.3" thiserror = "1.0.38" + +[dependencies.wu-manber] +git = "https://github.com/jneem/wu-manber.git" diff --git a/tvix/cli/src/derivation.rs b/tvix/cli/src/derivation.rs index fce9a628598d..6af3d24a2487 100644 --- a/tvix/cli/src/derivation.rs +++ b/tvix/cli/src/derivation.rs @@ -298,10 +298,19 @@ mod derivation_builtins { } // Scan references in relevant attributes to detect any build-references. - let mut refscan = state.borrow().reference_scanner(); - drv.arguments.iter().for_each(|s| refscan.scan_str(s)); - drv.environment.values().for_each(|s| refscan.scan_str(s)); - refscan.scan_str(&drv.builder); + let references = { + let state = state.borrow(); + if state.is_empty() { + // skip reference scanning, create an empty result + Default::default() + } else { + let mut refscan = state.reference_scanner(); + drv.arguments.iter().for_each(|s| refscan.scan_str(s)); + drv.environment.values().for_each(|s| refscan.scan_str(s)); + refscan.scan_str(&drv.builder); + refscan.finalise() + } + }; // Each output name needs to exist in the environment, at this // point initialised as an empty string because that is the @@ -317,7 +326,7 @@ mod derivation_builtins { } let mut known_paths = state.borrow_mut(); - populate_inputs(&mut drv, &known_paths, refscan.finalise()); + populate_inputs(&mut drv, &known_paths, references); // At this point, derivation fields are fully populated from // eval data structures. diff --git a/tvix/cli/src/known_paths.rs b/tvix/cli/src/known_paths.rs index 165bc3ea4161..69651d418069 100644 --- a/tvix/cli/src/known_paths.rs +++ b/tvix/cli/src/known_paths.rs @@ -11,7 +11,7 @@ //! Please see //tvix/eval/docs/build-references.md for more //! information. -use crate::refscan::ReferenceScanner; +use crate::refscan::{ReferenceScanner, STORE_PATH_LEN}; use std::{ collections::{hash_map, BTreeSet, HashMap}, ops::Index, @@ -45,12 +45,14 @@ impl Index<&str> for KnownPaths { type Output = PathType; fn index(&self, index: &str) -> &Self::Output { - &self.paths[index] + &self.paths[&index[..STORE_PATH_LEN]] } } impl KnownPaths { fn insert_path(&mut self, path: String, path_type: PathType) { + let path = path[..STORE_PATH_LEN].to_owned(); + assert_eq!(path.len(), STORE_PATH_LEN, "should match"); match self.paths.entry(path) { hash_map::Entry::Vacant(entry) => { entry.insert(path_type); @@ -108,6 +110,12 @@ impl KnownPaths { ); } + /// Checks whether there are any known paths. If not, a reference + /// scanner can not be created. + pub fn is_empty(&self) -> bool { + self.paths.is_empty() + } + /// Create a reference scanner from the current set of known paths. pub fn reference_scanner(&self) -> ReferenceScanner { let candidates = self.paths.keys().map(Clone::clone).collect(); diff --git a/tvix/cli/src/refscan.rs b/tvix/cli/src/refscan.rs index 74110e108809..4314e0164429 100644 --- a/tvix/cli/src/refscan.rs +++ b/tvix/cli/src/refscan.rs @@ -7,15 +7,16 @@ //! The scanner itself is an Aho-Corasick automaton, using the `aho-corasick` //! crate. -use aho_corasick::AhoCorasick; use std::collections::BTreeSet; -use std::io; +use wu_manber::TwoByteWM; + +pub const STORE_PATH_LEN: usize = "/nix/store/00000000000000000000000000000000".len(); /// Represents a "primed" reference scanner with an automaton that knows the set /// of store paths to scan for. pub struct ReferenceScanner { candidates: Vec<String>, - searcher: AhoCorasick, + searcher: TwoByteWM, matches: Vec<usize>, } @@ -23,7 +24,7 @@ impl ReferenceScanner { /// Construct a new `ReferenceScanner` that knows how to scan for the given /// candidate store paths. pub fn new(candidates: Vec<String>) -> Self { - let searcher = AhoCorasick::new_auto_configured(&candidates); + let searcher = TwoByteWM::new(&candidates); ReferenceScanner { searcher, @@ -34,25 +35,14 @@ impl ReferenceScanner { /// Scan the given string for all non-overlapping matches and collect them /// in the scanner. - pub fn scan_str<H: AsRef<[u8]>>(&mut self, haystack: H) { - for m in self.searcher.find_iter(&haystack) { - self.matches.push(m.pattern()); + pub fn scan_str(&mut self, haystack: &str) { + if haystack.len() < STORE_PATH_LEN { + return; } - } - /// Scan the given reader for all non-overlapping matches, and collect them - /// in the scanner. On read failures, this method aborts and returns an - /// error to the caller. - /// - /// Please note that the internal machinery has its own buffering mechanism, - /// and where possible the given reader should be unbuffered. See - /// [`AhoCorasick::stream_find_iter`] for details on this. - pub fn scan_stream<R: io::Read>(&mut self, stream: R) -> io::Result<()> { - for m in self.searcher.stream_find_iter(stream) { - self.matches.push(m?.pattern()); + for m in self.searcher.find(&haystack) { + self.matches.push(m.pat_idx); } - - Ok(()) } /// Finalise the reference scanner and return the resulting matches. @@ -72,13 +62,6 @@ mod tests { const HELLO_DRV: &'static str = r#"Derive([("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1","","")],[("/nix/store/6z1jfnqqgyqr221zgbpm30v91yfj3r45-bash-5.1-p16.drv",["out"]),("/nix/store/ap9g09fxbicj836zm88d56dn3ff4clxl-stdenv-linux.drv",["out"]),("/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",["out"])],["/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs",""),("builder","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash"),("cmakeFlags",""),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck","1"),("doInstallCheck",""),("mesonFlags",""),("name","hello-2.12.1"),("nativeBuildInputs",""),("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1"),("outputs","out"),("patches",""),("pname","hello"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/pa10z4ngm0g83kx9mssrqzz30s84vq7k-hello-2.12.1.tar.gz"),("stdenv","/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux"),("strictDeps",""),("system","x86_64-linux"),("version","2.12.1")])"#; #[test] - fn test_empty() { - let mut scanner = ReferenceScanner::new(vec![]); - scanner.scan_str("hello world"); - assert!(scanner.finalise().is_empty()); - } - - #[test] fn test_single_match() { let mut scanner = ReferenceScanner::new(vec![ "/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".into(), @@ -112,28 +95,4 @@ mod tests { assert!(result.contains(c)); } } - - #[test] - fn test_multiple_stream() { - let candidates = vec![ - // these exist in the drv: - "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".into(), - "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".into(), - "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".into(), - // this doesn't: - "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".into(), - ]; - - let mut scanner = ReferenceScanner::new(candidates.clone()); - scanner - .scan_stream(HELLO_DRV.as_bytes()) - .expect("scanning should succeed"); - - let result = scanner.finalise(); - assert_eq!(result.len(), 3); - - for c in candidates[..3].iter() { - assert!(result.contains(c)); - } - } } diff --git a/tvix/crate-hashes.json b/tvix/crate-hashes.json index 5b076333427b..2528588b186d 100644 --- a/tvix/crate-hashes.json +++ b/tvix/crate-hashes.json @@ -1,4 +1,5 @@ { "test-generator 0.3.0 (git+https://github.com/JamesGuthrie/test-generator.git?rev=82e799979980962aec1aa324ec6e0e4cad781f41#82e799979980962aec1aa324ec6e0e4cad781f41)": "08brp3qqa55hijc7xby3lam2cc84hvx1zzfqv6lj7smlczh8k32y", - "tonic-mock 0.1.0 (git+https://github.com/brainrake/tonic-mock?branch=bump-dependencies#ec1a15510875de99d709d684190db5d9beab175e)": "0lwa03hpp0mxa6aa1zv5w68k61y4hccfm0q2ykyq392fwal8vb50" + "tonic-mock 0.1.0 (git+https://github.com/brainrake/tonic-mock?branch=bump-dependencies#ec1a15510875de99d709d684190db5d9beab175e)": "0lwa03hpp0mxa6aa1zv5w68k61y4hccfm0q2ykyq392fwal8vb50", + "wu-manber 0.1.0 (git+https://github.com/jneem/wu-manber.git#acff1ffc303935ce55a6c36bd89b0819688b27b7)": "1r6r87746qry48rmsdk7x3l4w5wlhkny8al9mz2ic83j7vm1wivz" } \ No newline at end of file |