diff options
author | Florian Klink <flokli@flokli.de> | 2023-11-03T11·34+0200 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2023-11-04T15·18+0000 |
commit | 3196fe0143b6ff729c177fa5d17fa03c9e9627c9 (patch) | |
tree | 33a6cbb1a965a739e8cdd96dc2faec79b83635de /tvix/glue/src/refscan.rs | |
parent | a51d277764d73582bc9bf816f6f4163d2df7f9c4 (diff) |
refactor(tvix): move tvix glue code into glue crate r/6936
There's various bits and pieces in tvix-cli that use both the store and evaluator, as well as nix-compat. For example, builtins.derivation, as well as the reference scanning implementation. This "glue code" currently isn't accessible from anywhere else, but it'd be very useful if it were. Move it out into a `glue` crate, and make `tvix-cli` a consumer of it. All the KnownPaths setup and passing around, as well as NIX_PATH handling is also something that should probably be moved into the glue crate as well, but that's something left for a future CL. Change-Id: I080ed3d1825ab23790666486840f301f00856277 Reviewed-on: https://cl.tvl.fyi/c/depot/+/9908 Autosubmit: flokli <flokli@flokli.de> Tested-by: BuildkiteCI Reviewed-by: raitobezarius <tvl@lahfa.xyz>
Diffstat (limited to 'tvix/glue/src/refscan.rs')
-rw-r--r-- | tvix/glue/src/refscan.rs | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/tvix/glue/src/refscan.rs b/tvix/glue/src/refscan.rs new file mode 100644 index 000000000000..0e0bb6c77828 --- /dev/null +++ b/tvix/glue/src/refscan.rs @@ -0,0 +1,115 @@ +//! Simple scanner for non-overlapping, known references of Nix store paths in a +//! given string. +//! +//! This is used for determining build references (see +//! //tvix/eval/docs/build-references.md for more details). +//! +//! The scanner itself is using the Wu-Manber string-matching algorithm, using +//! our fork of the `wu-mamber` crate. + +use std::collections::BTreeSet; +use wu_manber::TwoByteWM; + +pub const STORE_PATH_LEN: usize = "/nix/store/00000000000000000000000000000000".len(); + +/// Represents a "primed" reference scanner with an automaton that knows the set +/// of store paths to scan for. +pub struct ReferenceScanner<P: Ord + AsRef<[u8]>> { + candidates: Vec<P>, + searcher: Option<TwoByteWM>, + matches: Vec<usize>, +} + +impl<P: Clone + Ord + AsRef<[u8]>> ReferenceScanner<P> { + /// Construct a new `ReferenceScanner` that knows how to scan for the given + /// candidate store paths. + pub fn new(candidates: Vec<P>) -> Self { + let searcher = if candidates.is_empty() { + None + } else { + Some(TwoByteWM::new(&candidates)) + }; + + ReferenceScanner { + searcher, + candidates, + matches: Default::default(), + } + } + + /// Scan the given str for all non-overlapping matches and collect them + /// in the scanner. + pub fn scan<S: AsRef<[u8]>>(&mut self, haystack: S) { + if haystack.as_ref().len() < STORE_PATH_LEN { + return; + } + + if let Some(searcher) = &self.searcher { + for m in searcher.find(haystack) { + self.matches.push(m.pat_idx); + } + } + } + + /// Finalise the reference scanner and return the resulting matches. + pub fn finalise(self) -> BTreeSet<P> { + self.matches + .into_iter() + .map(|idx| self.candidates[idx].clone()) + .collect() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // The actual derivation of `nixpkgs.hello`. + const HELLO_DRV: &str = r#"Derive([("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1","","")],[("/nix/store/6z1jfnqqgyqr221zgbpm30v91yfj3r45-bash-5.1-p16.drv",["out"]),("/nix/store/ap9g09fxbicj836zm88d56dn3ff4clxl-stdenv-linux.drv",["out"]),("/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",["out"])],["/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs",""),("builder","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash"),("cmakeFlags",""),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck","1"),("doInstallCheck",""),("mesonFlags",""),("name","hello-2.12.1"),("nativeBuildInputs",""),("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1"),("outputs","out"),("patches",""),("pname","hello"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/pa10z4ngm0g83kx9mssrqzz30s84vq7k-hello-2.12.1.tar.gz"),("stdenv","/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux"),("strictDeps",""),("system","x86_64-linux"),("version","2.12.1")])"#; + + #[test] + fn test_no_patterns() { + let mut scanner: ReferenceScanner<String> = ReferenceScanner::new(vec![]); + + scanner.scan(HELLO_DRV); + + let result = scanner.finalise(); + + assert_eq!(result.len(), 0); + } + + #[test] + fn test_single_match() { + let mut scanner = ReferenceScanner::new(vec![ + "/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".to_string(), + ]); + scanner.scan(HELLO_DRV); + + let result = scanner.finalise(); + + assert_eq!(result.len(), 1); + assert!(result.contains("/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16")); + } + + #[test] + fn test_multiple_matches() { + let candidates = vec![ + // these exist in the drv: + "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".to_string(), + "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".to_string(), + "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".to_string(), + // this doesn't: + "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".to_string(), + ]; + + let mut scanner = ReferenceScanner::new(candidates.clone()); + scanner.scan(HELLO_DRV); + + let result = scanner.finalise(); + assert_eq!(result.len(), 3); + + for c in candidates[..3].iter() { + assert!(result.contains(c)); + } + } +} |