diff options
author | Florian Klink <flokli@flokli.de> | 2024-08-13T17·04+0300 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2024-08-13T18·39+0000 |
commit | c7845f3c882d0f1b215813bf0ef8231c2c03d7b6 (patch) | |
tree | 681774cbfa98e5021aa68b361bc937b88881b5c8 /tvix/castore/src/directoryservice | |
parent | 2f4185ff1a54e1bdbaa062a9e4e1c8137d141762 (diff) |
refactor(tvix/castore): move *Node and Directory to crate root r/8486
*Node and Directory are types of the tvix-castore model, not the tvix DirectoryService model. A DirectoryService only happens to send Directories. Move types into individual files in a nodes/ subdirectory, as it's gotten too cluttered in a single file, and (re-)export all types from the crate root. This has the effect that we now cannot poke at private fields directly from other files inside `crate::directoryservice` (as it's not all in the same file anymore), but that's a good thing, it now forces us to go through the proper accessors. For the same reasons, we currently also need to introduce the `rename` functions on each *Node directly. A followup is gonna move the names out of the individual enum kinds, so we can better represent "unnamed nodes". Change-Id: Icdb34dcfe454c41c94f2396e8e99973d27db8418 Reviewed-on: https://cl.tvl.fyi/c/depot/+/12199 Reviewed-by: yuka <yuka@yuka.dev> Autosubmit: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/castore/src/directoryservice')
-rw-r--r-- | tvix/castore/src/directoryservice/directory_graph.rs | 13 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/mod.rs | 463 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/order_validator.rs | 6 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/tests/mod.rs | 2 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/traverse.rs | 12 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/utils.rs | 2 |
6 files changed, 17 insertions, 481 deletions
diff --git a/tvix/castore/src/directoryservice/directory_graph.rs b/tvix/castore/src/directoryservice/directory_graph.rs index aa60f68a3197..0cf2f71adb06 100644 --- a/tvix/castore/src/directoryservice/directory_graph.rs +++ b/tvix/castore/src/directoryservice/directory_graph.rs @@ -10,8 +10,7 @@ use petgraph::{ use tracing::instrument; use super::order_validator::{LeavesToRootValidator, OrderValidator, RootToLeavesValidator}; -use super::{Directory, DirectoryNode}; -use crate::B3Digest; +use crate::{B3Digest, Directory, DirectoryNode, NamedNode}; #[derive(thiserror::Error, Debug)] pub enum Error { @@ -72,11 +71,11 @@ pub struct ValidatedDirectoryGraph { fn check_edge(dir: &DirectoryNode, child: &Directory) -> Result<(), Error> { // Ensure the size specified in the child node matches our records. - if dir.size != child.size() { + if dir.size() != child.size() { return Err(Error::ValidationError(format!( "'{}' has wrong size, specified {}, recorded {}", - dir.name.as_bstr(), - dir.size, + dir.get_name().as_bstr(), + dir.size(), child.size(), ))); } @@ -145,7 +144,7 @@ impl<O: OrderValidator> DirectoryGraph<O> { for subdir in directory.directories() { let child_ix = *self .digest_to_node_ix - .entry(subdir.digest.clone()) + .entry(subdir.digest().clone()) .or_insert_with(|| self.graph.add_node(None)); let pending_edge_check = match &self.graph[child_ix] { @@ -268,8 +267,8 @@ impl ValidatedDirectoryGraph { */ #[cfg(test)] mod tests { - use crate::directoryservice::{Directory, DirectoryNode, Node}; use crate::fixtures::{DIRECTORY_A, DIRECTORY_B, DIRECTORY_C}; + use crate::{Directory, DirectoryNode, Node}; use lazy_static::lazy_static; use rstest::rstest; diff --git a/tvix/castore/src/directoryservice/mod.rs b/tvix/castore/src/directoryservice/mod.rs index 0141a48f75bc..9f23176e31d0 100644 --- a/tvix/castore/src/directoryservice/mod.rs +++ b/tvix/castore/src/directoryservice/mod.rs @@ -1,9 +1,6 @@ use crate::composition::{Registry, ServiceBuilder}; -use crate::proto; -use crate::{B3Digest, Error}; -use crate::{ValidateDirectoryError, ValidateNodeError}; +use crate::{B3Digest, Directory, Error}; -use bytes::Bytes; use futures::stream::BoxStream; use tonic::async_trait; mod combinators; @@ -148,461 +145,3 @@ pub(crate) fn register_directory_services(reg: &mut Registry) { reg.register::<Box<dyn ServiceBuilder<Output = dyn DirectoryService>>, super::directoryservice::BigtableParameters>("bigtable"); } } - -/// A Directory can contain Directory, File or Symlink nodes. -/// Each of these nodes have a name attribute, which is the basename in that -/// directory and node type specific attributes. -/// While a Node by itself may have any name, the names of Directory entries: -/// - MUST not contain slashes or null bytes -/// - MUST not be '.' or '..' -/// - MUST be unique across all three lists -#[derive(Default, Debug, Clone, PartialEq, Eq)] -pub struct Directory { - nodes: Vec<Node>, -} - -/// A DirectoryNode is a pointer to a [Directory], by its [Directory::digest]. -/// It also gives it a `name` and `size`. -/// Such a node is either an element in the [Directory] it itself is contained in, -/// or a standalone root node./ -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct DirectoryNode { - /// The (base)name of the directory - name: Bytes, - /// The blake3 hash of a Directory message, serialized in protobuf canonical form. - digest: B3Digest, - /// Number of child elements in the Directory referred to by `digest`. - /// Calculated by summing up the numbers of nodes, and for each directory. - /// its size field. Can be used for inode allocation. - /// This field is precisely as verifiable as any other Merkle tree edge. - /// Resolve `digest`, and you can compute it incrementally. Resolve the entire - /// tree, and you can fully compute it from scratch. - /// A credulous implementation won't reject an excessive size, but this is - /// harmless: you'll have some ordinals without nodes. Undersizing is obvious - /// and easy to reject: you won't have an ordinal for some nodes. - size: u64, -} - -impl DirectoryNode { - pub fn new(name: Bytes, digest: B3Digest, size: u64) -> Result<Self, ValidateNodeError> { - Ok(Self { name, digest, size }) - } - - pub fn digest(&self) -> &B3Digest { - &self.digest - } - pub fn size(&self) -> u64 { - self.size - } -} - -/// A FileNode represents a regular or executable file in a Directory or at the root. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct FileNode { - /// The (base)name of the file - name: Bytes, - - /// The blake3 digest of the file contents - digest: B3Digest, - - /// The file content size - size: u64, - - /// Whether the file is executable - executable: bool, -} - -impl FileNode { - pub fn new( - name: Bytes, - digest: B3Digest, - size: u64, - executable: bool, - ) -> Result<Self, ValidateNodeError> { - Ok(Self { - name, - digest, - size, - executable, - }) - } - - pub fn digest(&self) -> &B3Digest { - &self.digest - } - - pub fn size(&self) -> u64 { - self.size - } - - pub fn executable(&self) -> bool { - self.executable - } -} - -/// A SymlinkNode represents a symbolic link in a Directory or at the root. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct SymlinkNode { - /// The (base)name of the symlink - name: Bytes, - /// The target of the symlink. - target: Bytes, -} - -impl SymlinkNode { - pub fn new(name: Bytes, target: Bytes) -> Result<Self, ValidateNodeError> { - if target.is_empty() || target.contains(&b'\0') { - return Err(ValidateNodeError::InvalidSymlinkTarget(target)); - } - Ok(Self { name, target }) - } - - pub fn target(&self) -> &bytes::Bytes { - &self.target - } -} - -/// A Node is either a [DirectoryNode], [FileNode] or [SymlinkNode]. -/// While a Node by itself may have any name, only those matching specific requirements -/// can can be added as entries to a [Directory] (see the documentation on [Directory] for details). -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Node { - Directory(DirectoryNode), - File(FileNode), - Symlink(SymlinkNode), -} - -/// NamedNode is implemented for [FileNode], [DirectoryNode] and [SymlinkNode] -/// and [Node], so we can ask all of them for the name easily. -pub trait NamedNode { - fn get_name(&self) -> &bytes::Bytes; -} - -impl NamedNode for &FileNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } -} -impl NamedNode for FileNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } -} - -impl NamedNode for &DirectoryNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } -} -impl NamedNode for DirectoryNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } -} - -impl NamedNode for &SymlinkNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } -} -impl NamedNode for SymlinkNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } -} - -impl NamedNode for &Node { - fn get_name(&self) -> &bytes::Bytes { - match self { - Node::File(node_file) => &node_file.name, - Node::Directory(node_directory) => &node_directory.name, - Node::Symlink(node_symlink) => &node_symlink.name, - } - } -} -impl NamedNode for Node { - fn get_name(&self) -> &bytes::Bytes { - match self { - Node::File(node_file) => &node_file.name, - Node::Directory(node_directory) => &node_directory.name, - Node::Symlink(node_symlink) => &node_symlink.name, - } - } -} - -impl Node { - /// Returns the node with a new name. - pub fn rename(self, name: bytes::Bytes) -> Self { - match self { - Node::Directory(n) => Node::Directory(DirectoryNode { name, ..n }), - Node::File(n) => Node::File(FileNode { name, ..n }), - Node::Symlink(n) => Node::Symlink(SymlinkNode { name, ..n }), - } - } -} - -impl PartialOrd for Node { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for Node { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -impl PartialOrd for FileNode { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for FileNode { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -impl PartialOrd for DirectoryNode { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for DirectoryNode { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -impl PartialOrd for SymlinkNode { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for SymlinkNode { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -fn checked_sum(iter: impl IntoIterator<Item = u64>) -> Option<u64> { - iter.into_iter().try_fold(0u64, |acc, i| acc.checked_add(i)) -} - -impl Directory { - pub fn new() -> Self { - Directory { nodes: vec![] } - } - - /// The size of a directory is the number of all regular and symlink elements, - /// the number of directory elements, and their size fields. - pub fn size(&self) -> u64 { - // It's impossible to create a Directory where the size overflows, because we - // check before every add() that the size won't overflow. - (self.nodes.len() as u64) + self.directories().map(|e| e.size).sum::<u64>() - } - - /// Calculates the digest of a Directory, which is the blake3 hash of a - /// Directory protobuf message, serialized in protobuf canonical form. - pub fn digest(&self) -> B3Digest { - proto::Directory::from(self).digest() - } - - /// Allows iterating over all nodes (directories, files and symlinks) - /// ordered by their name. - pub fn nodes(&self) -> impl Iterator<Item = &Node> + Send + Sync + '_ { - self.nodes.iter() - } - - /// Allows iterating over the FileNode entries of this directory - /// ordered by their name - pub fn files(&self) -> impl Iterator<Item = &FileNode> + Send + Sync + '_ { - self.nodes.iter().filter_map(|node| match node { - Node::File(n) => Some(n), - _ => None, - }) - } - - /// Allows iterating over the subdirectories of this directory - /// ordered by their name - pub fn directories(&self) -> impl Iterator<Item = &DirectoryNode> + Send + Sync + '_ { - self.nodes.iter().filter_map(|node| match node { - Node::Directory(n) => Some(n), - _ => None, - }) - } - - /// Allows iterating over the SymlinkNode entries of this directory - /// ordered by their name - pub fn symlinks(&self) -> impl Iterator<Item = &SymlinkNode> + Send + Sync + '_ { - self.nodes.iter().filter_map(|node| match node { - Node::Symlink(n) => Some(n), - _ => None, - }) - } - - /// Checks a Node name for validity as a directory entry - /// We disallow slashes, null bytes, '.', '..' and the empty string. - pub(crate) fn validate_node_name(name: &[u8]) -> Result<(), ValidateNodeError> { - if name.is_empty() - || name == b".." - || name == b"." - || name.contains(&0x00) - || name.contains(&b'/') - { - Err(ValidateNodeError::InvalidName(name.to_owned().into())) - } else { - Ok(()) - } - } - - /// Adds the specified [Node] to the [Directory], preserving sorted entries. - /// - /// Inserting an element that already exists with the same name in the directory will yield an - /// error. - /// Inserting an element will validate that its name fulfills the stricter requirements for - /// directory entries and yield an error if it is not. - pub fn add(&mut self, node: Node) -> Result<(), ValidateDirectoryError> { - Self::validate_node_name(node.get_name()) - .map_err(|e| ValidateDirectoryError::InvalidNode(node.get_name().clone().into(), e))?; - - // Check that the even after adding this new directory entry, the size calculation will not - // overflow - // FUTUREWORK: add some sort of batch add interface which only does this check once with - // all the to-be-added entries - checked_sum([ - self.size(), - 1, - match node { - Node::Directory(ref dir) => dir.size, - _ => 0, - }, - ]) - .ok_or(ValidateDirectoryError::SizeOverflow)?; - - // This assumes the [Directory] is sorted, since we don't allow accessing the nodes list - // directly and all previous inserts should have been in-order - let pos = match self - .nodes - .binary_search_by_key(&node.get_name(), |n| n.get_name()) - { - Err(pos) => pos, // There is no node with this name; good! - Ok(_) => { - return Err(ValidateDirectoryError::DuplicateName( - node.get_name().to_vec(), - )) - } - }; - - self.nodes.insert(pos, node); - Ok(()) - } -} - -#[cfg(test)] -mod test { - use super::{Directory, DirectoryNode, FileNode, Node, SymlinkNode}; - use crate::fixtures::DUMMY_DIGEST; - use crate::ValidateDirectoryError; - - #[test] - fn add_nodes_to_directory() { - let mut d = Directory::new(); - - d.add(Node::Directory( - DirectoryNode::new("b".into(), DUMMY_DIGEST.clone(), 1).unwrap(), - )) - .unwrap(); - d.add(Node::Directory( - DirectoryNode::new("a".into(), DUMMY_DIGEST.clone(), 1).unwrap(), - )) - .unwrap(); - d.add(Node::Directory( - DirectoryNode::new("z".into(), DUMMY_DIGEST.clone(), 1).unwrap(), - )) - .unwrap(); - - d.add(Node::File( - FileNode::new("f".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(), - )) - .unwrap(); - d.add(Node::File( - FileNode::new("c".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(), - )) - .unwrap(); - d.add(Node::File( - FileNode::new("g".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(), - )) - .unwrap(); - - d.add(Node::Symlink( - SymlinkNode::new("t".into(), "a".into()).unwrap(), - )) - .unwrap(); - d.add(Node::Symlink( - SymlinkNode::new("o".into(), "a".into()).unwrap(), - )) - .unwrap(); - d.add(Node::Symlink( - SymlinkNode::new("e".into(), "a".into()).unwrap(), - )) - .unwrap(); - - // Convert to proto struct and back to ensure we are not generating any invalid structures - crate::directoryservice::Directory::try_from(crate::proto::Directory::from(d)) - .expect("directory should be valid"); - } - - #[test] - fn validate_overflow() { - let mut d = Directory::new(); - - assert_eq!( - d.add(Node::Directory( - DirectoryNode::new("foo".into(), DUMMY_DIGEST.clone(), u64::MAX).unwrap(), - )), - Err(ValidateDirectoryError::SizeOverflow) - ); - } - - #[test] - fn add_duplicate_node_to_directory() { - let mut d = Directory::new(); - - d.add(Node::Directory( - DirectoryNode::new("a".into(), DUMMY_DIGEST.clone(), 1).unwrap(), - )) - .unwrap(); - assert_eq!( - format!( - "{}", - d.add(Node::File( - FileNode::new("a".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(), - )) - .expect_err("adding duplicate dir entry must fail") - ), - "\"a\" is a duplicate name" - ); - } - - /// Attempt to add a directory entry with a name which should be rejected. - #[tokio::test] - async fn directory_reject_invalid_name() { - let mut dir = Directory::new(); - assert!( - dir.add(Node::Symlink( - SymlinkNode::new( - "".into(), // wrong! can not be added to directory - "doesntmatter".into(), - ) - .unwrap() - )) - .is_err(), - "invalid symlink entry be rejected" - ); - } -} diff --git a/tvix/castore/src/directoryservice/order_validator.rs b/tvix/castore/src/directoryservice/order_validator.rs index 17431fc8d3b0..f03bb7ff9a86 100644 --- a/tvix/castore/src/directoryservice/order_validator.rs +++ b/tvix/castore/src/directoryservice/order_validator.rs @@ -50,7 +50,7 @@ impl RootToLeavesValidator { for subdir in directory.directories() { // Allow the children to appear next - self.expected_digests.insert(subdir.digest.clone()); + self.expected_digests.insert(subdir.digest().clone()); } } } @@ -80,10 +80,10 @@ impl OrderValidator for LeavesToRootValidator { let digest = directory.digest(); for subdir in directory.directories() { - if !self.allowed_references.contains(&subdir.digest) { + if !self.allowed_references.contains(subdir.digest()) { warn!( directory.digest = %digest, - subdirectory.digest = %subdir.digest, + subdirectory.digest = %subdir.digest(), "unexpected directory reference" ); return false; diff --git a/tvix/castore/src/directoryservice/tests/mod.rs b/tvix/castore/src/directoryservice/tests/mod.rs index 3923e66dc2c2..7a15f44a686e 100644 --- a/tvix/castore/src/directoryservice/tests/mod.rs +++ b/tvix/castore/src/directoryservice/tests/mod.rs @@ -8,8 +8,8 @@ use rstest_reuse::{self, *}; use super::DirectoryService; use crate::directoryservice; -use crate::directoryservice::{Directory, DirectoryNode, Node}; use crate::fixtures::{DIRECTORY_A, DIRECTORY_B, DIRECTORY_C, DIRECTORY_D}; +use crate::{Directory, DirectoryNode, Node}; mod utils; use self::utils::make_grpc_directory_service_client; diff --git a/tvix/castore/src/directoryservice/traverse.rs b/tvix/castore/src/directoryservice/traverse.rs index 3e6dc73a4d6b..ff667e0f65c0 100644 --- a/tvix/castore/src/directoryservice/traverse.rs +++ b/tvix/castore/src/directoryservice/traverse.rs @@ -1,5 +1,4 @@ -use super::{DirectoryService, NamedNode, Node}; -use crate::{Error, Path}; +use crate::{directoryservice::DirectoryService, Error, NamedNode, Node, Path}; use tracing::{instrument, warn}; /// This descends from a (root) node to the given (sub)path, returning the Node @@ -25,15 +24,15 @@ where // fetch the linked node from the directory_service. let directory = directory_service .as_ref() - .get(&directory_node.digest) + .get(directory_node.digest()) .await? .ok_or_else(|| { // If we didn't get the directory node that's linked, that's a store inconsistency, bail out! - warn!("directory {} does not exist", directory_node.digest); + warn!("directory {} does not exist", directory_node.digest()); Error::StorageError(format!( "directory {} does not exist", - directory_node.digest + directory_node.digest() )) })?; @@ -59,9 +58,8 @@ where mod tests { use crate::{ directoryservice, - directoryservice::{DirectoryNode, Node}, fixtures::{DIRECTORY_COMPLICATED, DIRECTORY_WITH_KEEP}, - PathBuf, + DirectoryNode, Node, PathBuf, }; use super::descend_to; diff --git a/tvix/castore/src/directoryservice/utils.rs b/tvix/castore/src/directoryservice/utils.rs index 352362811a97..f1133663ede7 100644 --- a/tvix/castore/src/directoryservice/utils.rs +++ b/tvix/castore/src/directoryservice/utils.rs @@ -59,7 +59,7 @@ pub fn traverse_directory<'a, DS: DirectoryService + 'static>( // This panics if the digest looks invalid, it's supposed to be checked first. for child_directory_node in current_directory.directories() { // TODO: propagate error - let child_digest: B3Digest = child_directory_node.digest.clone(); + let child_digest: B3Digest = child_directory_node.digest().clone(); if worklist_directory_digests.contains(&child_digest) || sent_directory_digests.contains(&child_digest) |