about summary refs log tree commit diff
path: root/tvix/castore/src/nodes
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2024-08-17T14·40+0300
committerclbot <clbot@tvl.fyi>2024-08-18T16·49+0000
commit96832c04116fb5a3be2d314659e701a3669ec65d (patch)
treefca0c6fae125dabba212be6b8ebb2de10224c223 /tvix/castore/src/nodes
parent76839683a7ab0453afcdc3374a5cc80cca2f9510 (diff)
feat(tvix/castore): add Directory::try_from_iter() r/8511
This provides a batched variant to construct a Directory, which reuses
the previously calculated size.

Checking and inserting code is factored out into a check_insert_node
function, taking the current size as a parameter and returning the new
size.

Change-Id: Ia6c2970a0c12181b7c40e63cf7ce8c93298ea37c
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12225
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: edef <edef@edef.eu>
Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/castore/src/nodes')
-rw-r--r--tvix/castore/src/nodes/directory.rs235
1 files changed, 146 insertions, 89 deletions
diff --git a/tvix/castore/src/nodes/directory.rs b/tvix/castore/src/nodes/directory.rs
index a11fbf314bd0..f80e055dde80 100644
--- a/tvix/castore/src/nodes/directory.rs
+++ b/tvix/castore/src/nodes/directory.rs
@@ -1,9 +1,9 @@
-use std::collections::BTreeMap;
+use std::collections::btree_map::{self, BTreeMap};
 
 use crate::{errors::DirectoryError, path::PathComponent, proto, B3Digest, Node};
 
 /// A Directory contains nodes, which can be Directory, File or Symlink nodes.
-/// It attached names to these nodes, which is the basename in that directory.
+/// It attaches names to these nodes, which is the basename in that directory.
 /// These names:
 ///  - MUST not contain slashes or null bytes
 ///  - MUST not be '.' or '..'
@@ -15,13 +15,28 @@ pub struct Directory {
 
 impl Directory {
     /// Constructs a new, empty Directory.
-    /// FUTUREWORK: provide a constructor from an interator of (sorted) names and nodes.
     pub fn new() -> Self {
         Directory {
             nodes: BTreeMap::new(),
         }
     }
 
+    /// Construct a [Directory] from tuples of name and [Node].
+    ///
+    /// Inserting multiple elements with the same name will yield an error, as
+    /// well as exceeding the maximum size.
+    pub fn try_from_iter<T: IntoIterator<Item = (PathComponent, Node)>>(
+        iter: T,
+    ) -> Result<Directory, DirectoryError> {
+        let mut nodes = BTreeMap::new();
+
+        iter.into_iter().try_fold(0u64, |size, (name, node)| {
+            check_insert_node(size, &mut nodes, name, node)
+        })?;
+
+        Ok(Self { nodes })
+    }
+
     /// The size of a directory is the number of all regular and symlink elements,
     /// the number of directory elements, and their size fields.
     pub fn size(&self) -> u64 {
@@ -58,34 +73,14 @@ impl Directory {
 
     /// Adds the specified [Node] to the [Directory] with a given name.
     ///
-    /// Inserting an element that already exists with the same name in the directory will yield an
-    /// error.
-    /// Inserting an element will validate that its name fulfills the
-    /// requirements for directory entries and yield an error if it is not.
+    /// Inserting a node that already exists with the same name in the directory
+    /// will yield an error, as well as exceeding the maximum size.
+    ///
+    /// In case you want to construct a [Directory] from multiple elements, use
+    /// [from_iter] instead.
     pub fn add(&mut self, name: PathComponent, node: Node) -> Result<(), DirectoryError> {
-        // Check that the even after adding this new directory entry, the size calculation will not
-        // overflow
-        // FUTUREWORK: add some sort of batch add interface which only does this check once with
-        // all the to-be-added entries
-        checked_sum([
-            self.size(),
-            1,
-            match node {
-                Node::Directory { size, .. } => size,
-                _ => 0,
-            },
-        ])
-        .ok_or(DirectoryError::SizeOverflow)?;
-
-        match self.nodes.entry(name) {
-            std::collections::btree_map::Entry::Vacant(e) => {
-                e.insert(node);
-                Ok(())
-            }
-            std::collections::btree_map::Entry::Occupied(occupied) => {
-                Err(DirectoryError::DuplicateName(occupied.key().to_owned()))
-            }
-        }
+        check_insert_node(self.size(), &mut self.nodes, name, node)?;
+        Ok(())
     }
 }
 
@@ -93,87 +88,149 @@ fn checked_sum(iter: impl IntoIterator<Item = u64>) -> Option<u64> {
     iter.into_iter().try_fold(0u64, |acc, i| acc.checked_add(i))
 }
 
+/// Helper function dealing with inserting nodes into the nodes [BTreeMap],
+/// after ensuring the new size doesn't overlow and the key doesn't exist already.
+///
+/// Returns the new total size, or an error.
+fn check_insert_node(
+    current_size: u64,
+    nodes: &mut BTreeMap<PathComponent, Node>,
+    name: PathComponent,
+    node: Node,
+) -> Result<u64, DirectoryError> {
+    // Check that the even after adding this new directory entry, the size calculation will not
+    // overflow
+    let new_size = checked_sum([
+        current_size,
+        1,
+        match node {
+            Node::Directory { size, .. } => size,
+            _ => 0,
+        },
+    ])
+    .ok_or(DirectoryError::SizeOverflow)?;
+
+    match nodes.entry(name) {
+        btree_map::Entry::Vacant(e) => {
+            e.insert(node);
+        }
+        btree_map::Entry::Occupied(occupied) => {
+            return Err(DirectoryError::DuplicateName(occupied.key().to_owned()))
+        }
+    }
+
+    Ok(new_size)
+}
+
 #[cfg(test)]
 mod test {
     use super::{Directory, Node};
     use crate::fixtures::DUMMY_DIGEST;
-    use crate::DirectoryError;
+    use crate::{DirectoryError, PathComponent};
 
     #[test]
-    fn add_nodes_to_directory() {
-        let mut d = Directory::new();
-
-        d.add(
-            "b".try_into().unwrap(),
+    fn from_iter_single() {
+        Directory::try_from_iter([(
+            PathComponent::try_from("b").unwrap(),
             Node::Directory {
                 digest: DUMMY_DIGEST.clone(),
                 size: 1,
             },
-        )
-        .unwrap();
-        d.add(
-            "a".try_into().unwrap(),
-            Node::Directory {
-                digest: DUMMY_DIGEST.clone(),
-                size: 1,
-            },
-        )
-        .unwrap();
-        d.add(
-            "z".try_into().unwrap(),
-            Node::Directory {
-                digest: DUMMY_DIGEST.clone(),
-                size: 1,
-            },
-        )
+        )])
         .unwrap();
+    }
 
-        d.add(
-            "f".try_into().unwrap(),
-            Node::File {
-                digest: DUMMY_DIGEST.clone(),
-                size: 1,
-                executable: true,
-            },
-        )
+    #[test]
+    fn from_iter_multiple() {
+        let d = Directory::try_from_iter([
+            (
+                "b".try_into().unwrap(),
+                Node::Directory {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: 1,
+                },
+            ),
+            (
+                "a".try_into().unwrap(),
+                Node::Directory {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: 1,
+                },
+            ),
+            (
+                "z".try_into().unwrap(),
+                Node::Directory {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: 1,
+                },
+            ),
+            (
+                "f".try_into().unwrap(),
+                Node::File {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: 1,
+                    executable: true,
+                },
+            ),
+            (
+                "c".try_into().unwrap(),
+                Node::File {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: 1,
+                    executable: true,
+                },
+            ),
+            (
+                "g".try_into().unwrap(),
+                Node::File {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: 1,
+                    executable: true,
+                },
+            ),
+            (
+                "t".try_into().unwrap(),
+                Node::Symlink {
+                    target: "a".try_into().unwrap(),
+                },
+            ),
+            (
+                "o".try_into().unwrap(),
+                Node::Symlink {
+                    target: "a".try_into().unwrap(),
+                },
+            ),
+            (
+                "e".try_into().unwrap(),
+                Node::Symlink {
+                    target: "a".try_into().unwrap(),
+                },
+            ),
+        ])
         .unwrap();
+
+        // Convert to proto struct and back to ensure we are not generating any invalid structures
+        crate::Directory::try_from(crate::proto::Directory::from(d))
+            .expect("directory should be valid");
+    }
+
+    #[test]
+    fn add_nodes_to_directory() {
+        let mut d = Directory::new();
+
         d.add(
-            "c".try_into().unwrap(),
-            Node::File {
+            "b".try_into().unwrap(),
+            Node::Directory {
                 digest: DUMMY_DIGEST.clone(),
                 size: 1,
-                executable: true,
             },
         )
         .unwrap();
         d.add(
-            "g".try_into().unwrap(),
-            Node::File {
+            "a".try_into().unwrap(),
+            Node::Directory {
                 digest: DUMMY_DIGEST.clone(),
                 size: 1,
-                executable: true,
-            },
-        )
-        .unwrap();
-
-        d.add(
-            "t".try_into().unwrap(),
-            Node::Symlink {
-                target: "a".try_into().unwrap(),
-            },
-        )
-        .unwrap();
-        d.add(
-            "o".try_into().unwrap(),
-            Node::Symlink {
-                target: "a".try_into().unwrap(),
-            },
-        )
-        .unwrap();
-        d.add(
-            "e".try_into().unwrap(),
-            Node::Symlink {
-                target: "a".try_into().unwrap(),
             },
         )
         .unwrap();