about summary refs log tree commit diff
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2023-01-13T22·45+0300
committertazjin <tazjin@tvl.su>2023-01-17T10·31+0000
commit9cb3daee20ad68f6d8351f116d50da789d3f1daf (patch)
tree05ba37073fed16b91962a0efc06cb3c06550dd59
parent940251b87f9d73087e2f51411fff9eba84a7108e (diff)
refactor(tvix/cli): reference scanner owns all the strings r/5671
This gets very complex very quickly otherwise, as all the construction
paths for a reference scanner and all the access patterns for the
KnownPaths structure are not yet fully understood.

Change-Id: Ibadf1f18b476695f3c286fc6896ae557760edf63
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7827
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
-rw-r--r--tvix/cli/src/known_paths.rs4
-rw-r--r--tvix/cli/src/refscan.rs75
2 files changed, 29 insertions, 50 deletions
diff --git a/tvix/cli/src/known_paths.rs b/tvix/cli/src/known_paths.rs
index 10191c967e..63372c9caa 100644
--- a/tvix/cli/src/known_paths.rs
+++ b/tvix/cli/src/known_paths.rs
@@ -107,8 +107,8 @@ impl KnownPaths {
     }
 
     /// Create a reference scanner from the current set of known paths.
-    pub fn reference_scanner<'a>(&'a self) -> ReferenceScanner<'a> {
-        let candidates: Vec<&'a str> = self.paths.keys().map(|s| s.as_str()).collect();
+    pub fn reference_scanner(&self) -> ReferenceScanner {
+        let candidates = self.paths.keys().map(Clone::clone).collect();
         ReferenceScanner::new(candidates)
     }
 }
diff --git a/tvix/cli/src/refscan.rs b/tvix/cli/src/refscan.rs
index 6ef486385d..74110e1088 100644
--- a/tvix/cli/src/refscan.rs
+++ b/tvix/cli/src/refscan.rs
@@ -13,33 +13,16 @@ use std::io;
 
 /// Represents a "primed" reference scanner with an automaton that knows the set
 /// of store paths to scan for.
-pub struct ReferenceScanner<'s> {
-    candidates: Vec<&'s str>,
+pub struct ReferenceScanner {
+    candidates: Vec<String>,
     searcher: AhoCorasick,
-    matches: BTreeSet<&'s str>,
+    matches: Vec<usize>,
 }
 
-pub trait ToOwnedVec<T> {
-    fn to_owned_vec(self) -> Vec<T>;
-}
-
-impl<T: Clone> ToOwnedVec<T> for &[T] {
-    fn to_owned_vec(self) -> Vec<T> {
-        self.to_vec()
-    }
-}
-
-impl<T> ToOwnedVec<T> for Vec<T> {
-    fn to_owned_vec(self) -> Vec<T> {
-        self
-    }
-}
-
-impl<'s> ReferenceScanner<'s> {
+impl ReferenceScanner {
     /// Construct a new `ReferenceScanner` that knows how to scan for the given
     /// candidate store paths.
-    pub fn new<V: ToOwnedVec<&'s str>>(candidates: V) -> Self {
-        let candidates = candidates.to_owned_vec();
+    pub fn new(candidates: Vec<String>) -> Self {
         let searcher = AhoCorasick::new_auto_configured(&candidates);
 
         ReferenceScanner {
@@ -53,8 +36,7 @@ impl<'s> ReferenceScanner<'s> {
     /// in the scanner.
     pub fn scan_str<H: AsRef<[u8]>>(&mut self, haystack: H) {
         for m in self.searcher.find_iter(&haystack) {
-            let needle = self.candidates[m.pattern()];
-            self.matches.insert(needle);
+            self.matches.push(m.pattern());
         }
     }
 
@@ -67,16 +49,18 @@ impl<'s> ReferenceScanner<'s> {
     /// [`AhoCorasick::stream_find_iter`] for details on this.
     pub fn scan_stream<R: io::Read>(&mut self, stream: R) -> io::Result<()> {
         for m in self.searcher.stream_find_iter(stream) {
-            let needle = self.candidates[m?.pattern()];
-            self.matches.insert(needle);
+            self.matches.push(m?.pattern());
         }
 
         Ok(())
     }
 
     /// Finalise the reference scanner and return the resulting matches.
-    pub fn finalise(self) -> BTreeSet<&'s str> {
+    pub fn finalise(self) -> BTreeSet<String> {
         self.matches
+            .into_iter()
+            .map(|idx| self.candidates[idx].clone())
+            .collect()
     }
 }
 
@@ -87,23 +71,18 @@ mod tests {
     // The actual derivation of `nixpkgs.hello`.
     const HELLO_DRV: &'static str = r#"Derive([("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1","","")],[("/nix/store/6z1jfnqqgyqr221zgbpm30v91yfj3r45-bash-5.1-p16.drv",["out"]),("/nix/store/ap9g09fxbicj836zm88d56dn3ff4clxl-stdenv-linux.drv",["out"]),("/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",["out"])],["/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs",""),("builder","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash"),("cmakeFlags",""),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck","1"),("doInstallCheck",""),("mesonFlags",""),("name","hello-2.12.1"),("nativeBuildInputs",""),("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1"),("outputs","out"),("patches",""),("pname","hello"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/pa10z4ngm0g83kx9mssrqzz30s84vq7k-hello-2.12.1.tar.gz"),("stdenv","/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux"),("strictDeps",""),("system","x86_64-linux"),("version","2.12.1")])"#;
 
-    impl<T: Clone, const N: usize> ToOwnedVec<T> for &[T; N] {
-        fn to_owned_vec(self) -> Vec<T> {
-            self.to_vec()
-        }
-    }
-
     #[test]
     fn test_empty() {
-        let mut scanner = ReferenceScanner::new(&[]);
+        let mut scanner = ReferenceScanner::new(vec![]);
         scanner.scan_str("hello world");
         assert!(scanner.finalise().is_empty());
     }
 
     #[test]
     fn test_single_match() {
-        let mut scanner =
-            ReferenceScanner::new(&["/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16"]);
+        let mut scanner = ReferenceScanner::new(vec![
+            "/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".into(),
+        ]);
         scanner.scan_str(HELLO_DRV);
 
         let result = scanner.finalise();
@@ -114,16 +93,16 @@ mod tests {
 
     #[test]
     fn test_multiple_matches() {
-        let candidates = &[
+        let candidates = vec![
             // these exist in the drv:
-            "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1",
-            "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",
-            "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux",
+            "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".into(),
+            "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".into(),
+            "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".into(),
             // this doesn't:
-            "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv",
+            "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".into(),
         ];
 
-        let mut scanner = ReferenceScanner::new(candidates);
+        let mut scanner = ReferenceScanner::new(candidates.clone());
         scanner.scan_str(HELLO_DRV);
 
         let result = scanner.finalise();
@@ -136,16 +115,16 @@ mod tests {
 
     #[test]
     fn test_multiple_stream() {
-        let candidates = &[
+        let candidates = vec![
             // these exist in the drv:
-            "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1",
-            "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",
-            "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux",
+            "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".into(),
+            "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".into(),
+            "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".into(),
             // this doesn't:
-            "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv",
+            "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".into(),
         ];
 
-        let mut scanner = ReferenceScanner::new(candidates);
+        let mut scanner = ReferenceScanner::new(candidates.clone());
         scanner
             .scan_stream(HELLO_DRV.as_bytes())
             .expect("scanning should succeed");