diff options
Diffstat (limited to 'tvix/castore/src/nodes')
-rw-r--r-- | tvix/castore/src/nodes/directory.rs | 216 | ||||
-rw-r--r-- | tvix/castore/src/nodes/directory_node.rs | 37 | ||||
-rw-r--r-- | tvix/castore/src/nodes/file_node.rs | 44 | ||||
-rw-r--r-- | tvix/castore/src/nodes/mod.rs | 53 | ||||
-rw-r--r-- | tvix/castore/src/nodes/symlink_node.rs | 35 |
5 files changed, 125 insertions, 260 deletions
diff --git a/tvix/castore/src/nodes/directory.rs b/tvix/castore/src/nodes/directory.rs index 1752681c89c3..d58b7822a434 100644 --- a/tvix/castore/src/nodes/directory.rs +++ b/tvix/castore/src/nodes/directory.rs @@ -1,23 +1,25 @@ -use crate::{ - proto, B3Digest, DirectoryNode, FileNode, NamedNode, Node, SymlinkNode, ValidateDirectoryError, - ValidateNodeError, -}; - -/// A Directory can contain Directory, File or Symlink nodes. -/// Each of these nodes have a name attribute, which is the basename in that -/// directory and node type specific attributes. -/// While a Node by itself may have any name, the names of Directory entries: +use std::collections::BTreeMap; + +use crate::{errors::DirectoryError, proto, B3Digest, DirectoryNode, FileNode, Node, SymlinkNode}; + +/// A Directory contains nodes, which can be Directory, File or Symlink nodes. +/// It attached names to these nodes, which is the basename in that directory. +/// These names: /// - MUST not contain slashes or null bytes /// - MUST not be '.' or '..' /// - MUST be unique across all three lists #[derive(Default, Debug, Clone, PartialEq, Eq)] pub struct Directory { - nodes: Vec<Node>, + nodes: BTreeMap<bytes::Bytes, Node>, } impl Directory { + /// Constructs a new, empty Directory. + /// FUTUREWORK: provide a constructor from an interator of (sorted) names and nodes. pub fn new() -> Self { - Directory { nodes: vec![] } + Directory { + nodes: BTreeMap::new(), + } } /// The size of a directory is the number of all regular and symlink elements, @@ -25,7 +27,7 @@ impl Directory { pub fn size(&self) -> u64 { // It's impossible to create a Directory where the size overflows, because we // check before every add() that the size won't overflow. - (self.nodes.len() as u64) + self.directories().map(|e| e.size()).sum::<u64>() + (self.nodes.len() as u64) + self.directories().map(|(_name, dn)| dn.size()).sum::<u64>() } /// Calculates the digest of a Directory, which is the blake3 hash of a @@ -35,62 +37,66 @@ impl Directory { } /// Allows iterating over all nodes (directories, files and symlinks) - /// ordered by their name. - pub fn nodes(&self) -> impl Iterator<Item = &Node> + Send + Sync + '_ { + /// For each, it returns a tuple of its name and node. + /// The elements are sorted by their names. + pub fn nodes(&self) -> impl Iterator<Item = (&bytes::Bytes, &Node)> + Send + Sync + '_ { self.nodes.iter() } - /// Allows iterating over the FileNode entries of this directory - /// ordered by their name - pub fn files(&self) -> impl Iterator<Item = &FileNode> + Send + Sync + '_ { - self.nodes.iter().filter_map(|node| match node { - Node::File(n) => Some(n), + /// Allows iterating over the FileNode entries of this directory. + /// For each, it returns a tuple of its name and node. + /// The elements are sorted by their names. + pub fn files(&self) -> impl Iterator<Item = (&bytes::Bytes, &FileNode)> + Send + Sync + '_ { + self.nodes.iter().filter_map(|(name, node)| match node { + Node::File(n) => Some((name, n)), _ => None, }) } - /// Allows iterating over the subdirectories of this directory - /// ordered by their name - pub fn directories(&self) -> impl Iterator<Item = &DirectoryNode> + Send + Sync + '_ { - self.nodes.iter().filter_map(|node| match node { - Node::Directory(n) => Some(n), + /// Allows iterating over the DirectoryNode entries (subdirectories) of this directory. + /// For each, it returns a tuple of its name and node. + /// The elements are sorted by their names. + pub fn directories( + &self, + ) -> impl Iterator<Item = (&bytes::Bytes, &DirectoryNode)> + Send + Sync + '_ { + self.nodes.iter().filter_map(|(name, node)| match node { + Node::Directory(n) => Some((name, n)), _ => None, }) } /// Allows iterating over the SymlinkNode entries of this directory - /// ordered by their name - pub fn symlinks(&self) -> impl Iterator<Item = &SymlinkNode> + Send + Sync + '_ { - self.nodes.iter().filter_map(|node| match node { - Node::Symlink(n) => Some(n), + /// For each, it returns a tuple of its name and node. + /// The elements are sorted by their names. + pub fn symlinks( + &self, + ) -> impl Iterator<Item = (&bytes::Bytes, &SymlinkNode)> + Send + Sync + '_ { + self.nodes.iter().filter_map(|(name, node)| match node { + Node::Symlink(n) => Some((name, n)), _ => None, }) } /// Checks a Node name for validity as a directory entry /// We disallow slashes, null bytes, '.', '..' and the empty string. - pub(crate) fn validate_node_name(name: &[u8]) -> Result<(), ValidateNodeError> { - if name.is_empty() + pub(crate) fn is_valid_name(name: &[u8]) -> bool { + !(name.is_empty() || name == b".." || name == b"." || name.contains(&0x00) - || name.contains(&b'/') - { - Err(ValidateNodeError::InvalidName(name.to_owned().into())) - } else { - Ok(()) - } + || name.contains(&b'/')) } - /// Adds the specified [Node] to the [Directory], preserving sorted entries. + /// Adds the specified [Node] to the [Directory] with a given name. /// /// Inserting an element that already exists with the same name in the directory will yield an /// error. - /// Inserting an element will validate that its name fulfills the stricter requirements for - /// directory entries and yield an error if it is not. - pub fn add(&mut self, node: Node) -> Result<(), ValidateDirectoryError> { - Self::validate_node_name(node.get_name()) - .map_err(|e| ValidateDirectoryError::InvalidNode(node.get_name().clone().into(), e))?; + /// Inserting an element will validate that its name fulfills the + /// requirements for directory entries and yield an error if it is not. + pub fn add(&mut self, name: bytes::Bytes, node: Node) -> Result<(), DirectoryError> { + if !Self::is_valid_name(&name) { + return Err(DirectoryError::InvalidName(name)); + } // Check that the even after adding this new directory entry, the size calculation will not // overflow @@ -104,24 +110,17 @@ impl Directory { _ => 0, }, ]) - .ok_or(ValidateDirectoryError::SizeOverflow)?; - - // This assumes the [Directory] is sorted, since we don't allow accessing the nodes list - // directly and all previous inserts should have been in-order - let pos = match self - .nodes - .binary_search_by_key(&node.get_name(), |n| n.get_name()) - { - Err(pos) => pos, // There is no node with this name; good! - Ok(_) => { - return Err(ValidateDirectoryError::DuplicateName( - node.get_name().to_vec(), - )) - } - }; + .ok_or(DirectoryError::SizeOverflow)?; - self.nodes.insert(pos, node); - Ok(()) + match self.nodes.entry(name) { + std::collections::btree_map::Entry::Vacant(e) => { + e.insert(node); + Ok(()) + } + std::collections::btree_map::Entry::Occupied(occupied) => { + Err(DirectoryError::DuplicateName(occupied.key().to_vec())) + } + } } } @@ -133,49 +132,58 @@ fn checked_sum(iter: impl IntoIterator<Item = u64>) -> Option<u64> { mod test { use super::{Directory, DirectoryNode, FileNode, Node, SymlinkNode}; use crate::fixtures::DUMMY_DIGEST; - use crate::ValidateDirectoryError; + use crate::DirectoryError; #[test] fn add_nodes_to_directory() { let mut d = Directory::new(); - d.add(Node::Directory( - DirectoryNode::new("b".into(), DUMMY_DIGEST.clone(), 1).unwrap(), - )) + d.add( + "b".into(), + Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)), + ) .unwrap(); - d.add(Node::Directory( - DirectoryNode::new("a".into(), DUMMY_DIGEST.clone(), 1).unwrap(), - )) + d.add( + "a".into(), + Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)), + ) .unwrap(); - d.add(Node::Directory( - DirectoryNode::new("z".into(), DUMMY_DIGEST.clone(), 1).unwrap(), - )) + d.add( + "z".into(), + Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)), + ) .unwrap(); - d.add(Node::File( - FileNode::new("f".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(), - )) + d.add( + "f".into(), + Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)), + ) .unwrap(); - d.add(Node::File( - FileNode::new("c".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(), - )) + d.add( + "c".into(), + Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)), + ) .unwrap(); - d.add(Node::File( - FileNode::new("g".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(), - )) + d.add( + "g".into(), + Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)), + ) .unwrap(); - d.add(Node::Symlink( - SymlinkNode::new("t".into(), "a".into()).unwrap(), - )) + d.add( + "t".into(), + Node::Symlink(SymlinkNode::new("a".into()).unwrap()), + ) .unwrap(); - d.add(Node::Symlink( - SymlinkNode::new("o".into(), "a".into()).unwrap(), - )) + d.add( + "o".into(), + Node::Symlink(SymlinkNode::new("a".into()).unwrap()), + ) .unwrap(); - d.add(Node::Symlink( - SymlinkNode::new("e".into(), "a".into()).unwrap(), - )) + d.add( + "e".into(), + Node::Symlink(SymlinkNode::new("a".into()).unwrap()), + ) .unwrap(); // Convert to proto struct and back to ensure we are not generating any invalid structures @@ -188,10 +196,11 @@ mod test { let mut d = Directory::new(); assert_eq!( - d.add(Node::Directory( - DirectoryNode::new("foo".into(), DUMMY_DIGEST.clone(), u64::MAX).unwrap(), - )), - Err(ValidateDirectoryError::SizeOverflow) + d.add( + "foo".into(), + Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), u64::MAX)) + ), + Err(DirectoryError::SizeOverflow) ); } @@ -199,16 +208,18 @@ mod test { fn add_duplicate_node_to_directory() { let mut d = Directory::new(); - d.add(Node::Directory( - DirectoryNode::new("a".into(), DUMMY_DIGEST.clone(), 1).unwrap(), - )) + d.add( + "a".into(), + Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)), + ) .unwrap(); assert_eq!( format!( "{}", - d.add(Node::File( - FileNode::new("a".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(), - )) + d.add( + "a".into(), + Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)) + ) .expect_err("adding duplicate dir entry must fail") ), "\"a\" is a duplicate name" @@ -220,13 +231,10 @@ mod test { async fn directory_reject_invalid_name() { let mut dir = Directory::new(); assert!( - dir.add(Node::Symlink( - SymlinkNode::new( - "".into(), // wrong! can not be added to directory - "doesntmatter".into(), - ) - .unwrap() - )) + dir.add( + "".into(), // wrong! can not be added to directory + Node::Symlink(SymlinkNode::new("doesntmatter".into(),).unwrap()) + ) .is_err(), "invalid symlink entry be rejected" ); diff --git a/tvix/castore/src/nodes/directory_node.rs b/tvix/castore/src/nodes/directory_node.rs index afbae1695c63..e6e6312a7a0c 100644 --- a/tvix/castore/src/nodes/directory_node.rs +++ b/tvix/castore/src/nodes/directory_node.rs @@ -1,13 +1,11 @@ -use crate::{B3Digest, NamedNode, ValidateNodeError}; +use crate::B3Digest; /// A DirectoryNode is a pointer to a [Directory], by its [Directory::digest]. -/// It also gives it a `name` and `size`. +/// It also records a`size`. /// Such a node is either an element in the [Directory] it itself is contained in, /// or a standalone root node./ #[derive(Debug, Clone, PartialEq, Eq)] pub struct DirectoryNode { - /// The (base)name of the directory - name: bytes::Bytes, /// The blake3 hash of a Directory message, serialized in protobuf canonical form. digest: B3Digest, /// Number of child elements in the Directory referred to by `digest`. @@ -23,8 +21,8 @@ pub struct DirectoryNode { } impl DirectoryNode { - pub fn new(name: bytes::Bytes, digest: B3Digest, size: u64) -> Result<Self, ValidateNodeError> { - Ok(Self { name, digest, size }) + pub fn new(digest: B3Digest, size: u64) -> Self { + Self { digest, size } } pub fn digest(&self) -> &B3Digest { @@ -34,31 +32,4 @@ impl DirectoryNode { pub fn size(&self) -> u64 { self.size } - - pub fn rename(self, name: bytes::Bytes) -> Self { - Self { name, ..self } - } -} - -impl PartialOrd for DirectoryNode { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for DirectoryNode { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -impl NamedNode for &DirectoryNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } -} -impl NamedNode for DirectoryNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } } diff --git a/tvix/castore/src/nodes/file_node.rs b/tvix/castore/src/nodes/file_node.rs index 5dd677c17b49..73abe21e59b4 100644 --- a/tvix/castore/src/nodes/file_node.rs +++ b/tvix/castore/src/nodes/file_node.rs @@ -1,11 +1,8 @@ -use crate::{B3Digest, NamedNode, ValidateNodeError}; +use crate::B3Digest; /// A FileNode represents a regular or executable file in a Directory or at the root. #[derive(Debug, Clone, PartialEq, Eq)] pub struct FileNode { - /// The (base)name of the file - name: bytes::Bytes, - /// The blake3 digest of the file contents digest: B3Digest, @@ -17,18 +14,12 @@ pub struct FileNode { } impl FileNode { - pub fn new( - name: bytes::Bytes, - digest: B3Digest, - size: u64, - executable: bool, - ) -> Result<Self, ValidateNodeError> { - Ok(Self { - name, + pub fn new(digest: B3Digest, size: u64, executable: bool) -> Self { + Self { digest, size, executable, - }) + } } pub fn digest(&self) -> &B3Digest { @@ -42,31 +33,4 @@ impl FileNode { pub fn executable(&self) -> bool { self.executable } - - pub fn rename(self, name: bytes::Bytes) -> Self { - Self { name, ..self } - } -} - -impl PartialOrd for FileNode { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for FileNode { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -impl NamedNode for &FileNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } -} -impl NamedNode for FileNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } } diff --git a/tvix/castore/src/nodes/mod.rs b/tvix/castore/src/nodes/mod.rs index bb8e14cf856e..47f39ae8f94a 100644 --- a/tvix/castore/src/nodes/mod.rs +++ b/tvix/castore/src/nodes/mod.rs @@ -4,66 +4,17 @@ mod directory_node; mod file_node; mod symlink_node; -use bytes::Bytes; pub use directory::Directory; pub use directory_node::DirectoryNode; pub use file_node::FileNode; pub use symlink_node::SymlinkNode; /// A Node is either a [DirectoryNode], [FileNode] or [SymlinkNode]. -/// While a Node by itself may have any name, only those matching specific requirements -/// can can be added as entries to a [Directory] (see the documentation on [Directory] for details). +/// Nodes themselves don't have names, what gives them names is either them +/// being inside a [Directory], or a root node with its own name attached to it. #[derive(Debug, Clone, PartialEq, Eq)] pub enum Node { Directory(DirectoryNode), File(FileNode), Symlink(SymlinkNode), } - -impl Node { - /// Returns the node with a new name. - pub fn rename(self, name: Bytes) -> Self { - match self { - Node::Directory(n) => Node::Directory(n.rename(name)), - Node::File(n) => Node::File(n.rename(name)), - Node::Symlink(n) => Node::Symlink(n.rename(name)), - } - } -} - -impl PartialOrd for Node { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for Node { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -/// NamedNode is implemented for [FileNode], [DirectoryNode] and [SymlinkNode] -/// and [Node], so we can ask all of them for the name easily. -pub trait NamedNode { - fn get_name(&self) -> &Bytes; -} - -impl NamedNode for &Node { - fn get_name(&self) -> &Bytes { - match self { - Node::File(node_file) => node_file.get_name(), - Node::Directory(node_directory) => node_directory.get_name(), - Node::Symlink(node_symlink) => node_symlink.get_name(), - } - } -} -impl NamedNode for Node { - fn get_name(&self) -> &Bytes { - match self { - Node::File(node_file) => node_file.get_name(), - Node::Directory(node_directory) => node_directory.get_name(), - Node::Symlink(node_symlink) => node_symlink.get_name(), - } - } -} diff --git a/tvix/castore/src/nodes/symlink_node.rs b/tvix/castore/src/nodes/symlink_node.rs index 2fac27231a85..6b9df96a5dd3 100644 --- a/tvix/castore/src/nodes/symlink_node.rs +++ b/tvix/castore/src/nodes/symlink_node.rs @@ -1,50 +1,21 @@ -use crate::{NamedNode, ValidateNodeError}; +use crate::ValidateNodeError; /// A SymlinkNode represents a symbolic link in a Directory or at the root. #[derive(Debug, Clone, PartialEq, Eq)] pub struct SymlinkNode { - /// The (base)name of the symlink - name: bytes::Bytes, /// The target of the symlink. target: bytes::Bytes, } impl SymlinkNode { - pub fn new(name: bytes::Bytes, target: bytes::Bytes) -> Result<Self, ValidateNodeError> { + pub fn new(target: bytes::Bytes) -> Result<Self, ValidateNodeError> { if target.is_empty() || target.contains(&b'\0') { return Err(ValidateNodeError::InvalidSymlinkTarget(target)); } - Ok(Self { name, target }) + Ok(Self { target }) } pub fn target(&self) -> &bytes::Bytes { &self.target } - - pub(crate) fn rename(self, name: bytes::Bytes) -> Self { - Self { name, ..self } - } -} - -impl PartialOrd for SymlinkNode { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for SymlinkNode { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -impl NamedNode for &SymlinkNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } -} -impl NamedNode for SymlinkNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } } |