about summary refs log tree commit diff
path: root/tvix/castore/src/nodes/directory.rs
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2024-08-14T19·00+0300
committerclbot <clbot@tvl.fyi>2024-08-17T09·45+0000
commit49b173786cba575dbeb9d28fa7a62275d45ce41a (patch)
tree722f6e45824c483572cf8d2cafedddbbe86c1bba /tvix/castore/src/nodes/directory.rs
parent04e9531e65159309d5bb18bbfac0ff29c830f50a (diff)
refactor(tvix/castore): remove `name` from Nodes r/8504
Nodes only have names if they're contained inside a Directory, or if
they're a root node and have something else possibly giving them a name
externally.

This removes all `name` fields in the three different Nodes, and instead
maintains it inside a BTreeMap inside the Directory.

It also removes the NamedNode trait (they don't have a get_name()), as
well as Node::rename(self, name), and all [Partial]Ord implementations
for Node (as they don't have names to use for sorting).

The `nodes()`, `directories()`, `files()` iterators inside a `Directory`
now return a tuple of Name and Node, as does the RootNodesProvider.

The different {Directory,File,Symlink}Node struct constructors got
simpler, and the {Directory,File}Node ones became infallible - as
there's no more possibility to represent invalid state.

The proto structs stayed the same - there's now from_name_and_node and
into_name_and_node to convert back and forth between the two `Node`
structs.

Some further cleanups:

The error types for Node validation were renamed. Everything related to
names is now in the DirectoryError (not yet happy about the naming)

There's some leftover cleanups to do:
 - There should be a from_(sorted_)iter and into_iter in Directory, so
   we can construct and deconstruct in one go.
   That should also enable us to implement conversions from and to the
   proto representation that moves, rather than clones.

 - The BuildRequest and PathInfo structs are still proto-based, so we
   still do a bunch of conversions back and forth there (and have some
   ugly expect there). There's not much point for error handling here,
   this will be moved to stricter types in a followup CL.

Change-Id: I7369a8e3a426f44419c349077cb4fcab2044ebb6
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12205
Tested-by: BuildkiteCI
Reviewed-by: yuka <yuka@yuka.dev>
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: benjaminedwardwebb <benjaminedwardwebb@gmail.com>
Reviewed-by: Connor Brewster <cbrewster@hey.com>
Diffstat (limited to 'tvix/castore/src/nodes/directory.rs')
-rw-r--r--tvix/castore/src/nodes/directory.rs216
1 files changed, 112 insertions, 104 deletions
diff --git a/tvix/castore/src/nodes/directory.rs b/tvix/castore/src/nodes/directory.rs
index 1752681c89c3..d58b7822a434 100644
--- a/tvix/castore/src/nodes/directory.rs
+++ b/tvix/castore/src/nodes/directory.rs
@@ -1,23 +1,25 @@
-use crate::{
-    proto, B3Digest, DirectoryNode, FileNode, NamedNode, Node, SymlinkNode, ValidateDirectoryError,
-    ValidateNodeError,
-};
-
-/// A Directory can contain Directory, File or Symlink nodes.
-/// Each of these nodes have a name attribute, which is the basename in that
-/// directory and node type specific attributes.
-/// While a Node by itself may have any name, the names of Directory entries:
+use std::collections::BTreeMap;
+
+use crate::{errors::DirectoryError, proto, B3Digest, DirectoryNode, FileNode, Node, SymlinkNode};
+
+/// A Directory contains nodes, which can be Directory, File or Symlink nodes.
+/// It attached names to these nodes, which is the basename in that directory.
+/// These names:
 ///  - MUST not contain slashes or null bytes
 ///  - MUST not be '.' or '..'
 ///  - MUST be unique across all three lists
 #[derive(Default, Debug, Clone, PartialEq, Eq)]
 pub struct Directory {
-    nodes: Vec<Node>,
+    nodes: BTreeMap<bytes::Bytes, Node>,
 }
 
 impl Directory {
+    /// Constructs a new, empty Directory.
+    /// FUTUREWORK: provide a constructor from an interator of (sorted) names and nodes.
     pub fn new() -> Self {
-        Directory { nodes: vec![] }
+        Directory {
+            nodes: BTreeMap::new(),
+        }
     }
 
     /// The size of a directory is the number of all regular and symlink elements,
@@ -25,7 +27,7 @@ impl Directory {
     pub fn size(&self) -> u64 {
         // It's impossible to create a Directory where the size overflows, because we
         // check before every add() that the size won't overflow.
-        (self.nodes.len() as u64) + self.directories().map(|e| e.size()).sum::<u64>()
+        (self.nodes.len() as u64) + self.directories().map(|(_name, dn)| dn.size()).sum::<u64>()
     }
 
     /// Calculates the digest of a Directory, which is the blake3 hash of a
@@ -35,62 +37,66 @@ impl Directory {
     }
 
     /// Allows iterating over all nodes (directories, files and symlinks)
-    /// ordered by their name.
-    pub fn nodes(&self) -> impl Iterator<Item = &Node> + Send + Sync + '_ {
+    /// For each, it returns a tuple of its name and node.
+    /// The elements are sorted by their names.
+    pub fn nodes(&self) -> impl Iterator<Item = (&bytes::Bytes, &Node)> + Send + Sync + '_ {
         self.nodes.iter()
     }
 
-    /// Allows iterating over the FileNode entries of this directory
-    /// ordered by their name
-    pub fn files(&self) -> impl Iterator<Item = &FileNode> + Send + Sync + '_ {
-        self.nodes.iter().filter_map(|node| match node {
-            Node::File(n) => Some(n),
+    /// Allows iterating over the FileNode entries of this directory.
+    /// For each, it returns a tuple of its name and node.
+    /// The elements are sorted by their names.
+    pub fn files(&self) -> impl Iterator<Item = (&bytes::Bytes, &FileNode)> + Send + Sync + '_ {
+        self.nodes.iter().filter_map(|(name, node)| match node {
+            Node::File(n) => Some((name, n)),
             _ => None,
         })
     }
 
-    /// Allows iterating over the subdirectories of this directory
-    /// ordered by their name
-    pub fn directories(&self) -> impl Iterator<Item = &DirectoryNode> + Send + Sync + '_ {
-        self.nodes.iter().filter_map(|node| match node {
-            Node::Directory(n) => Some(n),
+    /// Allows iterating over the DirectoryNode entries (subdirectories) of this directory.
+    /// For each, it returns a tuple of its name and node.
+    /// The elements are sorted by their names.
+    pub fn directories(
+        &self,
+    ) -> impl Iterator<Item = (&bytes::Bytes, &DirectoryNode)> + Send + Sync + '_ {
+        self.nodes.iter().filter_map(|(name, node)| match node {
+            Node::Directory(n) => Some((name, n)),
             _ => None,
         })
     }
 
     /// Allows iterating over the SymlinkNode entries of this directory
-    /// ordered by their name
-    pub fn symlinks(&self) -> impl Iterator<Item = &SymlinkNode> + Send + Sync + '_ {
-        self.nodes.iter().filter_map(|node| match node {
-            Node::Symlink(n) => Some(n),
+    /// For each, it returns a tuple of its name and node.
+    /// The elements are sorted by their names.
+    pub fn symlinks(
+        &self,
+    ) -> impl Iterator<Item = (&bytes::Bytes, &SymlinkNode)> + Send + Sync + '_ {
+        self.nodes.iter().filter_map(|(name, node)| match node {
+            Node::Symlink(n) => Some((name, n)),
             _ => None,
         })
     }
 
     /// Checks a Node name for validity as a directory entry
     /// We disallow slashes, null bytes, '.', '..' and the empty string.
-    pub(crate) fn validate_node_name(name: &[u8]) -> Result<(), ValidateNodeError> {
-        if name.is_empty()
+    pub(crate) fn is_valid_name(name: &[u8]) -> bool {
+        !(name.is_empty()
             || name == b".."
             || name == b"."
             || name.contains(&0x00)
-            || name.contains(&b'/')
-        {
-            Err(ValidateNodeError::InvalidName(name.to_owned().into()))
-        } else {
-            Ok(())
-        }
+            || name.contains(&b'/'))
     }
 
-    /// Adds the specified [Node] to the [Directory], preserving sorted entries.
+    /// Adds the specified [Node] to the [Directory] with a given name.
     ///
     /// Inserting an element that already exists with the same name in the directory will yield an
     /// error.
-    /// Inserting an element will validate that its name fulfills the stricter requirements for
-    /// directory entries and yield an error if it is not.
-    pub fn add(&mut self, node: Node) -> Result<(), ValidateDirectoryError> {
-        Self::validate_node_name(node.get_name())
-            .map_err(|e| ValidateDirectoryError::InvalidNode(node.get_name().clone().into(), e))?;
+    /// Inserting an element will validate that its name fulfills the
+    /// requirements for directory entries and yield an error if it is not.
+    pub fn add(&mut self, name: bytes::Bytes, node: Node) -> Result<(), DirectoryError> {
+        if !Self::is_valid_name(&name) {
+            return Err(DirectoryError::InvalidName(name));
+        }
 
         // Check that the even after adding this new directory entry, the size calculation will not
         // overflow
@@ -104,24 +110,17 @@ impl Directory {
                 _ => 0,
             },
         ])
-        .ok_or(ValidateDirectoryError::SizeOverflow)?;
-
-        // This assumes the [Directory] is sorted, since we don't allow accessing the nodes list
-        // directly and all previous inserts should have been in-order
-        let pos = match self
-            .nodes
-            .binary_search_by_key(&node.get_name(), |n| n.get_name())
-        {
-            Err(pos) => pos, // There is no node with this name; good!
-            Ok(_) => {
-                return Err(ValidateDirectoryError::DuplicateName(
-                    node.get_name().to_vec(),
-                ))
-            }
-        };
+        .ok_or(DirectoryError::SizeOverflow)?;
 
-        self.nodes.insert(pos, node);
-        Ok(())
+        match self.nodes.entry(name) {
+            std::collections::btree_map::Entry::Vacant(e) => {
+                e.insert(node);
+                Ok(())
+            }
+            std::collections::btree_map::Entry::Occupied(occupied) => {
+                Err(DirectoryError::DuplicateName(occupied.key().to_vec()))
+            }
+        }
     }
 }
 
@@ -133,49 +132,58 @@ fn checked_sum(iter: impl IntoIterator<Item = u64>) -> Option<u64> {
 mod test {
     use super::{Directory, DirectoryNode, FileNode, Node, SymlinkNode};
     use crate::fixtures::DUMMY_DIGEST;
-    use crate::ValidateDirectoryError;
+    use crate::DirectoryError;
 
     #[test]
     fn add_nodes_to_directory() {
         let mut d = Directory::new();
 
-        d.add(Node::Directory(
-            DirectoryNode::new("b".into(), DUMMY_DIGEST.clone(), 1).unwrap(),
-        ))
+        d.add(
+            "b".into(),
+            Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)),
+        )
         .unwrap();
-        d.add(Node::Directory(
-            DirectoryNode::new("a".into(), DUMMY_DIGEST.clone(), 1).unwrap(),
-        ))
+        d.add(
+            "a".into(),
+            Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)),
+        )
         .unwrap();
-        d.add(Node::Directory(
-            DirectoryNode::new("z".into(), DUMMY_DIGEST.clone(), 1).unwrap(),
-        ))
+        d.add(
+            "z".into(),
+            Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)),
+        )
         .unwrap();
 
-        d.add(Node::File(
-            FileNode::new("f".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(),
-        ))
+        d.add(
+            "f".into(),
+            Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)),
+        )
         .unwrap();
-        d.add(Node::File(
-            FileNode::new("c".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(),
-        ))
+        d.add(
+            "c".into(),
+            Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)),
+        )
         .unwrap();
-        d.add(Node::File(
-            FileNode::new("g".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(),
-        ))
+        d.add(
+            "g".into(),
+            Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)),
+        )
         .unwrap();
 
-        d.add(Node::Symlink(
-            SymlinkNode::new("t".into(), "a".into()).unwrap(),
-        ))
+        d.add(
+            "t".into(),
+            Node::Symlink(SymlinkNode::new("a".into()).unwrap()),
+        )
         .unwrap();
-        d.add(Node::Symlink(
-            SymlinkNode::new("o".into(), "a".into()).unwrap(),
-        ))
+        d.add(
+            "o".into(),
+            Node::Symlink(SymlinkNode::new("a".into()).unwrap()),
+        )
         .unwrap();
-        d.add(Node::Symlink(
-            SymlinkNode::new("e".into(), "a".into()).unwrap(),
-        ))
+        d.add(
+            "e".into(),
+            Node::Symlink(SymlinkNode::new("a".into()).unwrap()),
+        )
         .unwrap();
 
         // Convert to proto struct and back to ensure we are not generating any invalid structures
@@ -188,10 +196,11 @@ mod test {
         let mut d = Directory::new();
 
         assert_eq!(
-            d.add(Node::Directory(
-                DirectoryNode::new("foo".into(), DUMMY_DIGEST.clone(), u64::MAX).unwrap(),
-            )),
-            Err(ValidateDirectoryError::SizeOverflow)
+            d.add(
+                "foo".into(),
+                Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), u64::MAX))
+            ),
+            Err(DirectoryError::SizeOverflow)
         );
     }
 
@@ -199,16 +208,18 @@ mod test {
     fn add_duplicate_node_to_directory() {
         let mut d = Directory::new();
 
-        d.add(Node::Directory(
-            DirectoryNode::new("a".into(), DUMMY_DIGEST.clone(), 1).unwrap(),
-        ))
+        d.add(
+            "a".into(),
+            Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)),
+        )
         .unwrap();
         assert_eq!(
             format!(
                 "{}",
-                d.add(Node::File(
-                    FileNode::new("a".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(),
-                ))
+                d.add(
+                    "a".into(),
+                    Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true))
+                )
                 .expect_err("adding duplicate dir entry must fail")
             ),
             "\"a\" is a duplicate name"
@@ -220,13 +231,10 @@ mod test {
     async fn directory_reject_invalid_name() {
         let mut dir = Directory::new();
         assert!(
-            dir.add(Node::Symlink(
-                SymlinkNode::new(
-                    "".into(), // wrong! can not be added to directory
-                    "doesntmatter".into(),
-                )
-                .unwrap()
-            ))
+            dir.add(
+                "".into(), // wrong! can not be added to directory
+                Node::Symlink(SymlinkNode::new("doesntmatter".into(),).unwrap())
+            )
             .is_err(),
             "invalid symlink entry be rejected"
         );