about summary refs log tree commit diff
path: root/tvix/castore/src/nodes
diff options
context:
space:
mode:
Diffstat (limited to 'tvix/castore/src/nodes')
-rw-r--r--tvix/castore/src/nodes/directory.rs216
-rw-r--r--tvix/castore/src/nodes/directory_node.rs37
-rw-r--r--tvix/castore/src/nodes/file_node.rs44
-rw-r--r--tvix/castore/src/nodes/mod.rs53
-rw-r--r--tvix/castore/src/nodes/symlink_node.rs35
5 files changed, 125 insertions, 260 deletions
diff --git a/tvix/castore/src/nodes/directory.rs b/tvix/castore/src/nodes/directory.rs
index 1752681c89c3..d58b7822a434 100644
--- a/tvix/castore/src/nodes/directory.rs
+++ b/tvix/castore/src/nodes/directory.rs
@@ -1,23 +1,25 @@
-use crate::{
-    proto, B3Digest, DirectoryNode, FileNode, NamedNode, Node, SymlinkNode, ValidateDirectoryError,
-    ValidateNodeError,
-};
-
-/// A Directory can contain Directory, File or Symlink nodes.
-/// Each of these nodes have a name attribute, which is the basename in that
-/// directory and node type specific attributes.
-/// While a Node by itself may have any name, the names of Directory entries:
+use std::collections::BTreeMap;
+
+use crate::{errors::DirectoryError, proto, B3Digest, DirectoryNode, FileNode, Node, SymlinkNode};
+
+/// A Directory contains nodes, which can be Directory, File or Symlink nodes.
+/// It attached names to these nodes, which is the basename in that directory.
+/// These names:
 ///  - MUST not contain slashes or null bytes
 ///  - MUST not be '.' or '..'
 ///  - MUST be unique across all three lists
 #[derive(Default, Debug, Clone, PartialEq, Eq)]
 pub struct Directory {
-    nodes: Vec<Node>,
+    nodes: BTreeMap<bytes::Bytes, Node>,
 }
 
 impl Directory {
+    /// Constructs a new, empty Directory.
+    /// FUTUREWORK: provide a constructor from an interator of (sorted) names and nodes.
     pub fn new() -> Self {
-        Directory { nodes: vec![] }
+        Directory {
+            nodes: BTreeMap::new(),
+        }
     }
 
     /// The size of a directory is the number of all regular and symlink elements,
@@ -25,7 +27,7 @@ impl Directory {
     pub fn size(&self) -> u64 {
         // It's impossible to create a Directory where the size overflows, because we
         // check before every add() that the size won't overflow.
-        (self.nodes.len() as u64) + self.directories().map(|e| e.size()).sum::<u64>()
+        (self.nodes.len() as u64) + self.directories().map(|(_name, dn)| dn.size()).sum::<u64>()
     }
 
     /// Calculates the digest of a Directory, which is the blake3 hash of a
@@ -35,62 +37,66 @@ impl Directory {
     }
 
     /// Allows iterating over all nodes (directories, files and symlinks)
-    /// ordered by their name.
-    pub fn nodes(&self) -> impl Iterator<Item = &Node> + Send + Sync + '_ {
+    /// For each, it returns a tuple of its name and node.
+    /// The elements are sorted by their names.
+    pub fn nodes(&self) -> impl Iterator<Item = (&bytes::Bytes, &Node)> + Send + Sync + '_ {
         self.nodes.iter()
     }
 
-    /// Allows iterating over the FileNode entries of this directory
-    /// ordered by their name
-    pub fn files(&self) -> impl Iterator<Item = &FileNode> + Send + Sync + '_ {
-        self.nodes.iter().filter_map(|node| match node {
-            Node::File(n) => Some(n),
+    /// Allows iterating over the FileNode entries of this directory.
+    /// For each, it returns a tuple of its name and node.
+    /// The elements are sorted by their names.
+    pub fn files(&self) -> impl Iterator<Item = (&bytes::Bytes, &FileNode)> + Send + Sync + '_ {
+        self.nodes.iter().filter_map(|(name, node)| match node {
+            Node::File(n) => Some((name, n)),
             _ => None,
         })
     }
 
-    /// Allows iterating over the subdirectories of this directory
-    /// ordered by their name
-    pub fn directories(&self) -> impl Iterator<Item = &DirectoryNode> + Send + Sync + '_ {
-        self.nodes.iter().filter_map(|node| match node {
-            Node::Directory(n) => Some(n),
+    /// Allows iterating over the DirectoryNode entries (subdirectories) of this directory.
+    /// For each, it returns a tuple of its name and node.
+    /// The elements are sorted by their names.
+    pub fn directories(
+        &self,
+    ) -> impl Iterator<Item = (&bytes::Bytes, &DirectoryNode)> + Send + Sync + '_ {
+        self.nodes.iter().filter_map(|(name, node)| match node {
+            Node::Directory(n) => Some((name, n)),
             _ => None,
         })
     }
 
     /// Allows iterating over the SymlinkNode entries of this directory
-    /// ordered by their name
-    pub fn symlinks(&self) -> impl Iterator<Item = &SymlinkNode> + Send + Sync + '_ {
-        self.nodes.iter().filter_map(|node| match node {
-            Node::Symlink(n) => Some(n),
+    /// For each, it returns a tuple of its name and node.
+    /// The elements are sorted by their names.
+    pub fn symlinks(
+        &self,
+    ) -> impl Iterator<Item = (&bytes::Bytes, &SymlinkNode)> + Send + Sync + '_ {
+        self.nodes.iter().filter_map(|(name, node)| match node {
+            Node::Symlink(n) => Some((name, n)),
             _ => None,
         })
     }
 
     /// Checks a Node name for validity as a directory entry
     /// We disallow slashes, null bytes, '.', '..' and the empty string.
-    pub(crate) fn validate_node_name(name: &[u8]) -> Result<(), ValidateNodeError> {
-        if name.is_empty()
+    pub(crate) fn is_valid_name(name: &[u8]) -> bool {
+        !(name.is_empty()
             || name == b".."
             || name == b"."
             || name.contains(&0x00)
-            || name.contains(&b'/')
-        {
-            Err(ValidateNodeError::InvalidName(name.to_owned().into()))
-        } else {
-            Ok(())
-        }
+            || name.contains(&b'/'))
     }
 
-    /// Adds the specified [Node] to the [Directory], preserving sorted entries.
+    /// Adds the specified [Node] to the [Directory] with a given name.
     ///
     /// Inserting an element that already exists with the same name in the directory will yield an
     /// error.
-    /// Inserting an element will validate that its name fulfills the stricter requirements for
-    /// directory entries and yield an error if it is not.
-    pub fn add(&mut self, node: Node) -> Result<(), ValidateDirectoryError> {
-        Self::validate_node_name(node.get_name())
-            .map_err(|e| ValidateDirectoryError::InvalidNode(node.get_name().clone().into(), e))?;
+    /// Inserting an element will validate that its name fulfills the
+    /// requirements for directory entries and yield an error if it is not.
+    pub fn add(&mut self, name: bytes::Bytes, node: Node) -> Result<(), DirectoryError> {
+        if !Self::is_valid_name(&name) {
+            return Err(DirectoryError::InvalidName(name));
+        }
 
         // Check that the even after adding this new directory entry, the size calculation will not
         // overflow
@@ -104,24 +110,17 @@ impl Directory {
                 _ => 0,
             },
         ])
-        .ok_or(ValidateDirectoryError::SizeOverflow)?;
-
-        // This assumes the [Directory] is sorted, since we don't allow accessing the nodes list
-        // directly and all previous inserts should have been in-order
-        let pos = match self
-            .nodes
-            .binary_search_by_key(&node.get_name(), |n| n.get_name())
-        {
-            Err(pos) => pos, // There is no node with this name; good!
-            Ok(_) => {
-                return Err(ValidateDirectoryError::DuplicateName(
-                    node.get_name().to_vec(),
-                ))
-            }
-        };
+        .ok_or(DirectoryError::SizeOverflow)?;
 
-        self.nodes.insert(pos, node);
-        Ok(())
+        match self.nodes.entry(name) {
+            std::collections::btree_map::Entry::Vacant(e) => {
+                e.insert(node);
+                Ok(())
+            }
+            std::collections::btree_map::Entry::Occupied(occupied) => {
+                Err(DirectoryError::DuplicateName(occupied.key().to_vec()))
+            }
+        }
     }
 }
 
@@ -133,49 +132,58 @@ fn checked_sum(iter: impl IntoIterator<Item = u64>) -> Option<u64> {
 mod test {
     use super::{Directory, DirectoryNode, FileNode, Node, SymlinkNode};
     use crate::fixtures::DUMMY_DIGEST;
-    use crate::ValidateDirectoryError;
+    use crate::DirectoryError;
 
     #[test]
     fn add_nodes_to_directory() {
         let mut d = Directory::new();
 
-        d.add(Node::Directory(
-            DirectoryNode::new("b".into(), DUMMY_DIGEST.clone(), 1).unwrap(),
-        ))
+        d.add(
+            "b".into(),
+            Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)),
+        )
         .unwrap();
-        d.add(Node::Directory(
-            DirectoryNode::new("a".into(), DUMMY_DIGEST.clone(), 1).unwrap(),
-        ))
+        d.add(
+            "a".into(),
+            Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)),
+        )
         .unwrap();
-        d.add(Node::Directory(
-            DirectoryNode::new("z".into(), DUMMY_DIGEST.clone(), 1).unwrap(),
-        ))
+        d.add(
+            "z".into(),
+            Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)),
+        )
         .unwrap();
 
-        d.add(Node::File(
-            FileNode::new("f".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(),
-        ))
+        d.add(
+            "f".into(),
+            Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)),
+        )
         .unwrap();
-        d.add(Node::File(
-            FileNode::new("c".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(),
-        ))
+        d.add(
+            "c".into(),
+            Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)),
+        )
         .unwrap();
-        d.add(Node::File(
-            FileNode::new("g".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(),
-        ))
+        d.add(
+            "g".into(),
+            Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)),
+        )
         .unwrap();
 
-        d.add(Node::Symlink(
-            SymlinkNode::new("t".into(), "a".into()).unwrap(),
-        ))
+        d.add(
+            "t".into(),
+            Node::Symlink(SymlinkNode::new("a".into()).unwrap()),
+        )
         .unwrap();
-        d.add(Node::Symlink(
-            SymlinkNode::new("o".into(), "a".into()).unwrap(),
-        ))
+        d.add(
+            "o".into(),
+            Node::Symlink(SymlinkNode::new("a".into()).unwrap()),
+        )
         .unwrap();
-        d.add(Node::Symlink(
-            SymlinkNode::new("e".into(), "a".into()).unwrap(),
-        ))
+        d.add(
+            "e".into(),
+            Node::Symlink(SymlinkNode::new("a".into()).unwrap()),
+        )
         .unwrap();
 
         // Convert to proto struct and back to ensure we are not generating any invalid structures
@@ -188,10 +196,11 @@ mod test {
         let mut d = Directory::new();
 
         assert_eq!(
-            d.add(Node::Directory(
-                DirectoryNode::new("foo".into(), DUMMY_DIGEST.clone(), u64::MAX).unwrap(),
-            )),
-            Err(ValidateDirectoryError::SizeOverflow)
+            d.add(
+                "foo".into(),
+                Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), u64::MAX))
+            ),
+            Err(DirectoryError::SizeOverflow)
         );
     }
 
@@ -199,16 +208,18 @@ mod test {
     fn add_duplicate_node_to_directory() {
         let mut d = Directory::new();
 
-        d.add(Node::Directory(
-            DirectoryNode::new("a".into(), DUMMY_DIGEST.clone(), 1).unwrap(),
-        ))
+        d.add(
+            "a".into(),
+            Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)),
+        )
         .unwrap();
         assert_eq!(
             format!(
                 "{}",
-                d.add(Node::File(
-                    FileNode::new("a".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(),
-                ))
+                d.add(
+                    "a".into(),
+                    Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true))
+                )
                 .expect_err("adding duplicate dir entry must fail")
             ),
             "\"a\" is a duplicate name"
@@ -220,13 +231,10 @@ mod test {
     async fn directory_reject_invalid_name() {
         let mut dir = Directory::new();
         assert!(
-            dir.add(Node::Symlink(
-                SymlinkNode::new(
-                    "".into(), // wrong! can not be added to directory
-                    "doesntmatter".into(),
-                )
-                .unwrap()
-            ))
+            dir.add(
+                "".into(), // wrong! can not be added to directory
+                Node::Symlink(SymlinkNode::new("doesntmatter".into(),).unwrap())
+            )
             .is_err(),
             "invalid symlink entry be rejected"
         );
diff --git a/tvix/castore/src/nodes/directory_node.rs b/tvix/castore/src/nodes/directory_node.rs
index afbae1695c63..e6e6312a7a0c 100644
--- a/tvix/castore/src/nodes/directory_node.rs
+++ b/tvix/castore/src/nodes/directory_node.rs
@@ -1,13 +1,11 @@
-use crate::{B3Digest, NamedNode, ValidateNodeError};
+use crate::B3Digest;
 
 /// A DirectoryNode is a pointer to a [Directory], by its [Directory::digest].
-/// It also gives it a `name` and `size`.
+/// It also records a`size`.
 /// Such a node is either an element in the [Directory] it itself is contained in,
 /// or a standalone root node./
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct DirectoryNode {
-    /// The (base)name of the directory
-    name: bytes::Bytes,
     /// The blake3 hash of a Directory message, serialized in protobuf canonical form.
     digest: B3Digest,
     /// Number of child elements in the Directory referred to by `digest`.
@@ -23,8 +21,8 @@ pub struct DirectoryNode {
 }
 
 impl DirectoryNode {
-    pub fn new(name: bytes::Bytes, digest: B3Digest, size: u64) -> Result<Self, ValidateNodeError> {
-        Ok(Self { name, digest, size })
+    pub fn new(digest: B3Digest, size: u64) -> Self {
+        Self { digest, size }
     }
 
     pub fn digest(&self) -> &B3Digest {
@@ -34,31 +32,4 @@ impl DirectoryNode {
     pub fn size(&self) -> u64 {
         self.size
     }
-
-    pub fn rename(self, name: bytes::Bytes) -> Self {
-        Self { name, ..self }
-    }
-}
-
-impl PartialOrd for DirectoryNode {
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for DirectoryNode {
-    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-        self.get_name().cmp(other.get_name())
-    }
-}
-
-impl NamedNode for &DirectoryNode {
-    fn get_name(&self) -> &bytes::Bytes {
-        &self.name
-    }
-}
-impl NamedNode for DirectoryNode {
-    fn get_name(&self) -> &bytes::Bytes {
-        &self.name
-    }
 }
diff --git a/tvix/castore/src/nodes/file_node.rs b/tvix/castore/src/nodes/file_node.rs
index 5dd677c17b49..73abe21e59b4 100644
--- a/tvix/castore/src/nodes/file_node.rs
+++ b/tvix/castore/src/nodes/file_node.rs
@@ -1,11 +1,8 @@
-use crate::{B3Digest, NamedNode, ValidateNodeError};
+use crate::B3Digest;
 
 /// A FileNode represents a regular or executable file in a Directory or at the root.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct FileNode {
-    /// The (base)name of the file
-    name: bytes::Bytes,
-
     /// The blake3 digest of the file contents
     digest: B3Digest,
 
@@ -17,18 +14,12 @@ pub struct FileNode {
 }
 
 impl FileNode {
-    pub fn new(
-        name: bytes::Bytes,
-        digest: B3Digest,
-        size: u64,
-        executable: bool,
-    ) -> Result<Self, ValidateNodeError> {
-        Ok(Self {
-            name,
+    pub fn new(digest: B3Digest, size: u64, executable: bool) -> Self {
+        Self {
             digest,
             size,
             executable,
-        })
+        }
     }
 
     pub fn digest(&self) -> &B3Digest {
@@ -42,31 +33,4 @@ impl FileNode {
     pub fn executable(&self) -> bool {
         self.executable
     }
-
-    pub fn rename(self, name: bytes::Bytes) -> Self {
-        Self { name, ..self }
-    }
-}
-
-impl PartialOrd for FileNode {
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for FileNode {
-    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-        self.get_name().cmp(other.get_name())
-    }
-}
-
-impl NamedNode for &FileNode {
-    fn get_name(&self) -> &bytes::Bytes {
-        &self.name
-    }
-}
-impl NamedNode for FileNode {
-    fn get_name(&self) -> &bytes::Bytes {
-        &self.name
-    }
 }
diff --git a/tvix/castore/src/nodes/mod.rs b/tvix/castore/src/nodes/mod.rs
index bb8e14cf856e..47f39ae8f94a 100644
--- a/tvix/castore/src/nodes/mod.rs
+++ b/tvix/castore/src/nodes/mod.rs
@@ -4,66 +4,17 @@ mod directory_node;
 mod file_node;
 mod symlink_node;
 
-use bytes::Bytes;
 pub use directory::Directory;
 pub use directory_node::DirectoryNode;
 pub use file_node::FileNode;
 pub use symlink_node::SymlinkNode;
 
 /// A Node is either a [DirectoryNode], [FileNode] or [SymlinkNode].
-/// While a Node by itself may have any name, only those matching specific requirements
-/// can can be added as entries to a [Directory] (see the documentation on [Directory] for details).
+/// Nodes themselves don't have names, what gives them names is either them
+/// being inside a [Directory], or a root node with its own name attached to it.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum Node {
     Directory(DirectoryNode),
     File(FileNode),
     Symlink(SymlinkNode),
 }
-
-impl Node {
-    /// Returns the node with a new name.
-    pub fn rename(self, name: Bytes) -> Self {
-        match self {
-            Node::Directory(n) => Node::Directory(n.rename(name)),
-            Node::File(n) => Node::File(n.rename(name)),
-            Node::Symlink(n) => Node::Symlink(n.rename(name)),
-        }
-    }
-}
-
-impl PartialOrd for Node {
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for Node {
-    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-        self.get_name().cmp(other.get_name())
-    }
-}
-
-/// NamedNode is implemented for [FileNode], [DirectoryNode] and [SymlinkNode]
-/// and [Node], so we can ask all of them for the name easily.
-pub trait NamedNode {
-    fn get_name(&self) -> &Bytes;
-}
-
-impl NamedNode for &Node {
-    fn get_name(&self) -> &Bytes {
-        match self {
-            Node::File(node_file) => node_file.get_name(),
-            Node::Directory(node_directory) => node_directory.get_name(),
-            Node::Symlink(node_symlink) => node_symlink.get_name(),
-        }
-    }
-}
-impl NamedNode for Node {
-    fn get_name(&self) -> &Bytes {
-        match self {
-            Node::File(node_file) => node_file.get_name(),
-            Node::Directory(node_directory) => node_directory.get_name(),
-            Node::Symlink(node_symlink) => node_symlink.get_name(),
-        }
-    }
-}
diff --git a/tvix/castore/src/nodes/symlink_node.rs b/tvix/castore/src/nodes/symlink_node.rs
index 2fac27231a85..6b9df96a5dd3 100644
--- a/tvix/castore/src/nodes/symlink_node.rs
+++ b/tvix/castore/src/nodes/symlink_node.rs
@@ -1,50 +1,21 @@
-use crate::{NamedNode, ValidateNodeError};
+use crate::ValidateNodeError;
 
 /// A SymlinkNode represents a symbolic link in a Directory or at the root.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct SymlinkNode {
-    /// The (base)name of the symlink
-    name: bytes::Bytes,
     /// The target of the symlink.
     target: bytes::Bytes,
 }
 
 impl SymlinkNode {
-    pub fn new(name: bytes::Bytes, target: bytes::Bytes) -> Result<Self, ValidateNodeError> {
+    pub fn new(target: bytes::Bytes) -> Result<Self, ValidateNodeError> {
         if target.is_empty() || target.contains(&b'\0') {
             return Err(ValidateNodeError::InvalidSymlinkTarget(target));
         }
-        Ok(Self { name, target })
+        Ok(Self { target })
     }
 
     pub fn target(&self) -> &bytes::Bytes {
         &self.target
     }
-
-    pub(crate) fn rename(self, name: bytes::Bytes) -> Self {
-        Self { name, ..self }
-    }
-}
-
-impl PartialOrd for SymlinkNode {
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for SymlinkNode {
-    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-        self.get_name().cmp(other.get_name())
-    }
-}
-
-impl NamedNode for &SymlinkNode {
-    fn get_name(&self) -> &bytes::Bytes {
-        &self.name
-    }
-}
-impl NamedNode for SymlinkNode {
-    fn get_name(&self) -> &bytes::Bytes {
-        &self.name
-    }
 }