From a1324513ad8c6d8b9cce2881fe90b890541a9b77 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Sun, 14 May 2023 14:49:53 +0300 Subject: feat(tvix/store/directorysvc): add traverse_to This walks from a node further down until it reaches the requested path. Change-Id: I2f9a15a8601db4d06c95d7b47cd6153264e203e3 Reviewed-on: https://cl.tvl.fyi/c/depot/+/8568 Reviewed-by: tazjin Tested-by: BuildkiteCI Autosubmit: flokli --- tvix/store/src/directoryservice/mod.rs | 2 + tvix/store/src/directoryservice/traverse.rs | 236 ++++++++++++++++++++++++++++ 2 files changed, 238 insertions(+) create mode 100644 tvix/store/src/directoryservice/traverse.rs diff --git a/tvix/store/src/directoryservice/mod.rs b/tvix/store/src/directoryservice/mod.rs index 05feb85d4e7b..e8f11269ec1d 100644 --- a/tvix/store/src/directoryservice/mod.rs +++ b/tvix/store/src/directoryservice/mod.rs @@ -2,11 +2,13 @@ use crate::{proto, Error}; mod grpc; mod memory; mod sled; +mod traverse; mod utils; pub use self::grpc::GRPCDirectoryService; pub use self::memory::MemoryDirectoryService; pub use self::sled::SledDirectoryService; +pub use self::traverse::traverse_to; pub use self::utils::DirectoryTraverser; /// The base trait all Directory services need to implement. diff --git a/tvix/store/src/directoryservice/traverse.rs b/tvix/store/src/directoryservice/traverse.rs new file mode 100644 index 000000000000..c407c62c305e --- /dev/null +++ b/tvix/store/src/directoryservice/traverse.rs @@ -0,0 +1,236 @@ +use super::DirectoryService; +use crate::Error; +use std::os::unix::prelude::OsStrExt; +use tracing::{instrument, warn}; + +/// This traverses from a (root) node to the given (sub)path, returning the Node +/// at that path, or none, if there's nothing at that path. +/// TODO: Do we want to rewrite this in a non-recursing fashion, and use +/// [DirectoryService.get_recursive] to do less lookups? +/// Or do we consider this to be a non-issue due to store composition and local caching? +/// TODO: the name of this function (and mod) is a bit bad, because it doesn't +/// clearly distinguish it from the BFS traversers. +#[instrument(skip(directory_service))] +pub fn traverse_to( + directory_service: &mut DS, + node: crate::proto::node::Node, + path: &std::path::Path, +) -> Result, Error> { + // strip a possible `/` prefix from the path. + let path = { + if path.starts_with("/") { + path.strip_prefix("/").unwrap() + } else { + path + } + }; + + let mut it = path.components(); + + match it.next() { + None => { + // the (remaining) path is empty, return the node we've been called with. + Ok(Some(node)) + } + Some(first_component) => { + // convert first_component to bytes, which are later used for comparison. + let first_component_bytes: &[u8] = first_component.as_os_str().as_bytes(); + + match node { + crate::proto::node::Node::File(_) | crate::proto::node::Node::Symlink(_) => { + // There's still some path left, but the current node is no directory. + // This means the path doesn't exist, as we can't reach it. + Ok(None) + } + crate::proto::node::Node::Directory(directory_node) => { + // fetch the linked node from the directory_service + let digest: [u8; 32] = directory_node + .digest + .try_into() + .map_err(|_e| Error::StorageError("invalid digest length".to_string()))?; + + match directory_service.get(&digest)? { + // If we didn't get the directory node that's linked, that's a store inconsistency, bail out! + None => { + let digest_b64 = data_encoding::BASE64.encode(&digest); + warn!("directory {} does not exist", digest_b64); + return Err(Error::StorageError(format!( + "directory {} does not exist", + digest_b64 + ))); + } + Some(directory) => { + // look for first_component in the [Directory]. + // FUTUREWORK: as the nodes() iterator returns in a sorted fashion, we + // could stop as soon as e.name is larger than the search string. + let child_node = directory.nodes().find(|n| match n { + crate::proto::node::Node::Directory(e) => { + &e.name.to_string().into_bytes() == first_component_bytes + } + crate::proto::node::Node::File(e) => { + &e.name.to_string().into_bytes() == first_component_bytes + } + crate::proto::node::Node::Symlink(e) => { + &e.name.to_string().into_bytes() == first_component_bytes + } + }); + + match child_node { + // child node not found means there's no such element inside the directory. + None => Ok(None), + // child node found, recurse with it and the rest of the path. + Some(child_node) => { + let rest_path: std::path::PathBuf = it.collect(); + traverse_to(directory_service, child_node, &rest_path) + } + } + } + } + } + } + } + } +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use crate::{ + directoryservice::DirectoryPutter, + directoryservice::DirectoryService, + tests::{ + fixtures::{DIRECTORY_COMPLICATED, DIRECTORY_WITH_KEEP}, + utils::gen_directory_service, + }, + }; + + use super::traverse_to; + + #[test] + fn test_traverse_to() { + let mut directory_service = gen_directory_service(); + + let mut handle = directory_service.put_multiple_start(); + handle + .put(DIRECTORY_WITH_KEEP.clone()) + .expect("must succeed"); + handle + .put(DIRECTORY_COMPLICATED.clone()) + .expect("must succeed"); + + // construct the node for DIRECTORY_COMPLICATED + let node_directory_complicated = + crate::proto::node::Node::Directory(crate::proto::DirectoryNode { + name: "doesntmatter".to_string(), + digest: DIRECTORY_COMPLICATED.digest().to_vec(), + size: DIRECTORY_COMPLICATED.size(), + }); + + // construct the node for DIRECTORY_COMPLICATED + let node_directory_with_keep = crate::proto::node::Node::Directory( + DIRECTORY_COMPLICATED.directories.first().unwrap().clone(), + ); + + // construct the node for the .keep file + let node_file_keep = + crate::proto::node::Node::File(DIRECTORY_WITH_KEEP.files.first().unwrap().clone()); + + // traversal to an empty subpath should return the root node. + { + let resp = traverse_to( + &mut directory_service, + node_directory_complicated.clone(), + &PathBuf::from(""), + ) + .expect("must succeed"); + + assert_eq!(Some(node_directory_complicated.clone()), resp); + } + + // traversal to `keep` should return the node for DIRECTORY_WITH_KEEP + { + let resp = traverse_to( + &mut directory_service, + node_directory_complicated.clone(), + &PathBuf::from("keep"), + ) + .expect("must succeed"); + + assert_eq!(Some(node_directory_with_keep.clone()), resp); + } + + // traversal to `keep/.keep` should return the node for the .keep file + { + let resp = traverse_to( + &mut directory_service, + node_directory_complicated.clone(), + &PathBuf::from("keep/.keep"), + ) + .expect("must succeed"); + + assert_eq!(Some(node_file_keep.clone()), resp); + } + + // traversal to `keep/.keep` should return the node for the .keep file + { + let resp = traverse_to( + &mut directory_service, + node_directory_complicated.clone(), + &PathBuf::from("/keep/.keep"), + ) + .expect("must succeed"); + + assert_eq!(Some(node_file_keep.clone()), resp); + } + + // traversal to `void` should return None (doesn't exist) + { + let resp = traverse_to( + &mut directory_service, + node_directory_complicated.clone(), + &PathBuf::from("void"), + ) + .expect("must succeed"); + + assert_eq!(None, resp); + } + + // traversal to `void` should return None (doesn't exist) + { + let resp = traverse_to( + &mut directory_service, + node_directory_complicated.clone(), + &PathBuf::from("//v/oid"), + ) + .expect("must succeed"); + + assert_eq!(None, resp); + } + + // traversal to `keep/.keep/404` should return None (the path can't be + // reached, as keep/.keep already is a file) + { + let resp = traverse_to( + &mut directory_service, + node_directory_complicated.clone(), + &PathBuf::from("keep/.keep/foo"), + ) + .expect("must succeed"); + + assert_eq!(None, resp); + } + + // traversal to a subpath of '/' should return the root node. + { + let resp = traverse_to( + &mut directory_service, + node_directory_complicated.clone(), + &PathBuf::from("/"), + ) + .expect("must succeed"); + + assert_eq!(Some(node_directory_complicated.clone()), resp); + } + } +} -- cgit 1.4.1