about summary refs log tree commit diff
path: root/tvix/store
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2023-05-14T11·49+0300
committerflokli <flokli@flokli.de>2023-05-18T06·26+0000
commita1324513ad8c6d8b9cce2881fe90b890541a9b77 (patch)
tree329fdcd152ce2b750d46e79a935fc08c65685672 /tvix/store
parent3a4e29c26141a24caa71a0dbaf40a6f8d1c2adef (diff)
feat(tvix/store/directorysvc): add traverse_to r/6150
This walks from a node further down until it reaches the requested path.

Change-Id: I2f9a15a8601db4d06c95d7b47cd6153264e203e3
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8568
Reviewed-by: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
Autosubmit: flokli <flokli@flokli.de>
Diffstat (limited to 'tvix/store')
-rw-r--r--tvix/store/src/directoryservice/mod.rs2
-rw-r--r--tvix/store/src/directoryservice/traverse.rs236
2 files changed, 238 insertions, 0 deletions
diff --git a/tvix/store/src/directoryservice/mod.rs b/tvix/store/src/directoryservice/mod.rs
index 05feb85d4e7b..e8f11269ec1d 100644
--- a/tvix/store/src/directoryservice/mod.rs
+++ b/tvix/store/src/directoryservice/mod.rs
@@ -2,11 +2,13 @@ use crate::{proto, Error};
 mod grpc;
 mod memory;
 mod sled;
+mod traverse;
 mod utils;
 
 pub use self::grpc::GRPCDirectoryService;
 pub use self::memory::MemoryDirectoryService;
 pub use self::sled::SledDirectoryService;
+pub use self::traverse::traverse_to;
 pub use self::utils::DirectoryTraverser;
 
 /// The base trait all Directory services need to implement.
diff --git a/tvix/store/src/directoryservice/traverse.rs b/tvix/store/src/directoryservice/traverse.rs
new file mode 100644
index 000000000000..c407c62c305e
--- /dev/null
+++ b/tvix/store/src/directoryservice/traverse.rs
@@ -0,0 +1,236 @@
+use super::DirectoryService;
+use crate::Error;
+use std::os::unix::prelude::OsStrExt;
+use tracing::{instrument, warn};
+
+/// This traverses from a (root) node to the given (sub)path, returning the Node
+/// at that path, or none, if there's nothing at that path.
+/// TODO: Do we want to rewrite this in a non-recursing fashion, and use
+/// [DirectoryService.get_recursive] to do less lookups?
+/// Or do we consider this to be a non-issue due to store composition and local caching?
+/// TODO: the name of this function (and mod) is a bit bad, because it doesn't
+/// clearly distinguish it from the BFS traversers.
+#[instrument(skip(directory_service))]
+pub fn traverse_to<DS: DirectoryService>(
+    directory_service: &mut DS,
+    node: crate::proto::node::Node,
+    path: &std::path::Path,
+) -> Result<Option<crate::proto::node::Node>, Error> {
+    // strip a possible `/` prefix from the path.
+    let path = {
+        if path.starts_with("/") {
+            path.strip_prefix("/").unwrap()
+        } else {
+            path
+        }
+    };
+
+    let mut it = path.components();
+
+    match it.next() {
+        None => {
+            // the (remaining) path is empty, return the node we've been called with.
+            Ok(Some(node))
+        }
+        Some(first_component) => {
+            // convert first_component to bytes, which are later used for comparison.
+            let first_component_bytes: &[u8] = first_component.as_os_str().as_bytes();
+
+            match node {
+                crate::proto::node::Node::File(_) | crate::proto::node::Node::Symlink(_) => {
+                    // There's still some path left, but the current node is no directory.
+                    // This means the path doesn't exist, as we can't reach it.
+                    Ok(None)
+                }
+                crate::proto::node::Node::Directory(directory_node) => {
+                    // fetch the linked node from the directory_service
+                    let digest: [u8; 32] = directory_node
+                        .digest
+                        .try_into()
+                        .map_err(|_e| Error::StorageError("invalid digest length".to_string()))?;
+
+                    match directory_service.get(&digest)? {
+                        // If we didn't get the directory node that's linked, that's a store inconsistency, bail out!
+                        None => {
+                            let digest_b64 = data_encoding::BASE64.encode(&digest);
+                            warn!("directory {} does not exist", digest_b64);
+                            return Err(Error::StorageError(format!(
+                                "directory {} does not exist",
+                                digest_b64
+                            )));
+                        }
+                        Some(directory) => {
+                            // look for first_component in the [Directory].
+                            // FUTUREWORK: as the nodes() iterator returns in a sorted fashion, we
+                            // could stop as soon as e.name is larger than the search string.
+                            let child_node = directory.nodes().find(|n| match n {
+                                crate::proto::node::Node::Directory(e) => {
+                                    &e.name.to_string().into_bytes() == first_component_bytes
+                                }
+                                crate::proto::node::Node::File(e) => {
+                                    &e.name.to_string().into_bytes() == first_component_bytes
+                                }
+                                crate::proto::node::Node::Symlink(e) => {
+                                    &e.name.to_string().into_bytes() == first_component_bytes
+                                }
+                            });
+
+                            match child_node {
+                                // child node not found means there's no such element inside the directory.
+                                None => Ok(None),
+                                // child node found, recurse with it and the rest of the path.
+                                Some(child_node) => {
+                                    let rest_path: std::path::PathBuf = it.collect();
+                                    traverse_to(directory_service, child_node, &rest_path)
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::path::PathBuf;
+
+    use crate::{
+        directoryservice::DirectoryPutter,
+        directoryservice::DirectoryService,
+        tests::{
+            fixtures::{DIRECTORY_COMPLICATED, DIRECTORY_WITH_KEEP},
+            utils::gen_directory_service,
+        },
+    };
+
+    use super::traverse_to;
+
+    #[test]
+    fn test_traverse_to() {
+        let mut directory_service = gen_directory_service();
+
+        let mut handle = directory_service.put_multiple_start();
+        handle
+            .put(DIRECTORY_WITH_KEEP.clone())
+            .expect("must succeed");
+        handle
+            .put(DIRECTORY_COMPLICATED.clone())
+            .expect("must succeed");
+
+        // construct the node for DIRECTORY_COMPLICATED
+        let node_directory_complicated =
+            crate::proto::node::Node::Directory(crate::proto::DirectoryNode {
+                name: "doesntmatter".to_string(),
+                digest: DIRECTORY_COMPLICATED.digest().to_vec(),
+                size: DIRECTORY_COMPLICATED.size(),
+            });
+
+        // construct the node for DIRECTORY_COMPLICATED
+        let node_directory_with_keep = crate::proto::node::Node::Directory(
+            DIRECTORY_COMPLICATED.directories.first().unwrap().clone(),
+        );
+
+        // construct the node for the .keep file
+        let node_file_keep =
+            crate::proto::node::Node::File(DIRECTORY_WITH_KEEP.files.first().unwrap().clone());
+
+        // traversal to an empty subpath should return the root node.
+        {
+            let resp = traverse_to(
+                &mut directory_service,
+                node_directory_complicated.clone(),
+                &PathBuf::from(""),
+            )
+            .expect("must succeed");
+
+            assert_eq!(Some(node_directory_complicated.clone()), resp);
+        }
+
+        // traversal to `keep` should return the node for DIRECTORY_WITH_KEEP
+        {
+            let resp = traverse_to(
+                &mut directory_service,
+                node_directory_complicated.clone(),
+                &PathBuf::from("keep"),
+            )
+            .expect("must succeed");
+
+            assert_eq!(Some(node_directory_with_keep.clone()), resp);
+        }
+
+        // traversal to `keep/.keep` should return the node for the .keep file
+        {
+            let resp = traverse_to(
+                &mut directory_service,
+                node_directory_complicated.clone(),
+                &PathBuf::from("keep/.keep"),
+            )
+            .expect("must succeed");
+
+            assert_eq!(Some(node_file_keep.clone()), resp);
+        }
+
+        // traversal to `keep/.keep` should return the node for the .keep file
+        {
+            let resp = traverse_to(
+                &mut directory_service,
+                node_directory_complicated.clone(),
+                &PathBuf::from("/keep/.keep"),
+            )
+            .expect("must succeed");
+
+            assert_eq!(Some(node_file_keep.clone()), resp);
+        }
+
+        // traversal to `void` should return None (doesn't exist)
+        {
+            let resp = traverse_to(
+                &mut directory_service,
+                node_directory_complicated.clone(),
+                &PathBuf::from("void"),
+            )
+            .expect("must succeed");
+
+            assert_eq!(None, resp);
+        }
+
+        // traversal to `void` should return None (doesn't exist)
+        {
+            let resp = traverse_to(
+                &mut directory_service,
+                node_directory_complicated.clone(),
+                &PathBuf::from("//v/oid"),
+            )
+            .expect("must succeed");
+
+            assert_eq!(None, resp);
+        }
+
+        // traversal to `keep/.keep/404` should return None (the path can't be
+        // reached, as keep/.keep already is a file)
+        {
+            let resp = traverse_to(
+                &mut directory_service,
+                node_directory_complicated.clone(),
+                &PathBuf::from("keep/.keep/foo"),
+            )
+            .expect("must succeed");
+
+            assert_eq!(None, resp);
+        }
+
+        // traversal to a subpath of '/' should return the root node.
+        {
+            let resp = traverse_to(
+                &mut directory_service,
+                node_directory_complicated.clone(),
+                &PathBuf::from("/"),
+            )
+            .expect("must succeed");
+
+            assert_eq!(Some(node_directory_complicated.clone()), resp);
+        }
+    }
+}