about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2024-08-17T14·40+0300
committerclbot <clbot@tvl.fyi>2024-08-18T16·49+0000
commit96832c04116fb5a3be2d314659e701a3669ec65d (patch)
treefca0c6fae125dabba212be6b8ebb2de10224c223
parent76839683a7ab0453afcdc3374a5cc80cca2f9510 (diff)
feat(tvix/castore): add Directory::try_from_iter() r/8511
This provides a batched variant to construct a Directory, which reuses
the previously calculated size.

Checking and inserting code is factored out into a check_insert_node
function, taking the current size as a parameter and returning the new
size.

Change-Id: Ia6c2970a0c12181b7c40e63cf7ce8c93298ea37c
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12225
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: edef <edef@edef.eu>
Tested-by: BuildkiteCI
-rw-r--r--tvix/castore/src/directoryservice/directory_graph.rs20
-rw-r--r--tvix/castore/src/directoryservice/tests/mod.rs21
-rw-r--r--tvix/castore/src/fixtures.rs85
-rw-r--r--tvix/castore/src/nodes/directory.rs235
4 files changed, 201 insertions, 160 deletions
diff --git a/tvix/castore/src/directoryservice/directory_graph.rs b/tvix/castore/src/directoryservice/directory_graph.rs
index d8d8e7370510..017cef024059 100644
--- a/tvix/castore/src/directoryservice/directory_graph.rs
+++ b/tvix/castore/src/directoryservice/directory_graph.rs
@@ -290,16 +290,16 @@ mod tests {
     use super::{DirectoryGraph, LeavesToRootValidator, RootToLeavesValidator};
 
     lazy_static! {
-        pub static ref BROKEN_PARENT_DIRECTORY: Directory = {
-            let mut dir = Directory::new();
-            dir.add(
-                "foo".try_into().unwrap(),
-                Node::Directory{
-                    digest: DIRECTORY_A.digest(),
-                    size: DIRECTORY_A.size() + 42, // wrong!
-                }).unwrap();
-            dir
-        };
+        pub static ref BROKEN_PARENT_DIRECTORY: Directory =
+            Directory::try_from_iter([
+                (
+                    "foo".try_into().unwrap(),
+                    Node::Directory{
+                        digest: DIRECTORY_A.digest(),
+                        size: DIRECTORY_A.size() + 42, // wrong!
+                    }
+                )
+            ]).unwrap();
     }
 
     #[rstest]
diff --git a/tvix/castore/src/directoryservice/tests/mod.rs b/tvix/castore/src/directoryservice/tests/mod.rs
index 01e42130456f..ad189564bfe7 100644
--- a/tvix/castore/src/directoryservice/tests/mod.rs
+++ b/tvix/castore/src/directoryservice/tests/mod.rs
@@ -216,19 +216,14 @@ async fn upload_reject_dangling_pointer(directory_service: impl DirectoryService
 #[apply(directory_services)]
 #[tokio::test]
 async fn upload_reject_wrong_size(directory_service: impl DirectoryService) {
-    let wrong_parent_directory = {
-        let mut dir = Directory::new();
-        dir.add(
-            "foo".try_into().unwrap(),
-            Node::Directory {
-                digest: DIRECTORY_A.digest(),
-                size: DIRECTORY_A.size() + 42, // wrong!
-            },
-        )
-        .unwrap();
-
-        dir
-    };
+    let wrong_parent_directory = Directory::try_from_iter([(
+        "foo".try_into().unwrap(),
+        Node::Directory {
+            digest: DIRECTORY_A.digest(),
+            size: DIRECTORY_A.size() + 42, // wrong!
+        },
+    )])
+    .unwrap();
 
     // Now upload both. Ensure it either fails during the second put, or during
     // the close.
diff --git a/tvix/castore/src/fixtures.rs b/tvix/castore/src/fixtures.rs
index 00cf3682d194..05bad916d55f 100644
--- a/tvix/castore/src/fixtures.rs
+++ b/tvix/castore/src/fixtures.rs
@@ -31,85 +31,74 @@ lazy_static! {
     pub static ref BLOB_B_DIGEST: B3Digest = blake3::hash(&BLOB_B).as_bytes().into();
 
     // Directories
-    pub static ref DIRECTORY_WITH_KEEP: Directory = {
-        let mut dir = Directory::new();
-        dir.add(
-            ".keep".try_into().unwrap(),
-            Node::File{
-                digest: EMPTY_BLOB_DIGEST.clone(),
-                size: 0,
-                executable: false
-            }).unwrap();
-
-        dir
-    };
-    pub static ref DIRECTORY_COMPLICATED: Directory = {
-        let mut dir = Directory::new();
-        dir.add(
+    pub static ref DIRECTORY_WITH_KEEP: Directory = Directory::try_from_iter([(
+        ".keep".try_into().unwrap(),
+        Node::File{
+            digest: EMPTY_BLOB_DIGEST.clone(),
+            size: 0,
+            executable: false
+    })]).unwrap();
+    pub static ref DIRECTORY_COMPLICATED: Directory = Directory::try_from_iter([
+        (
             "keep".try_into().unwrap(),
             Node::Directory{
                 digest: DIRECTORY_WITH_KEEP.digest(),
                 size: DIRECTORY_WITH_KEEP.size()
-            }).unwrap();
-        dir.add(
+            }
+        ),
+        (
             ".keep".try_into().unwrap(),
             Node::File{
                 digest: EMPTY_BLOB_DIGEST.clone(),
                 size: 0,
                 executable: false
-            }).unwrap();
-        dir.add(
+            }
+        ),
+        (
             "aa".try_into().unwrap(),
             Node::Symlink{
                 target: "/nix/store/somewhereelse".try_into().unwrap()
-            }).unwrap();
-
-        dir
-    };
+            }
+        )
+    ]).unwrap();
     pub static ref DIRECTORY_A: Directory = Directory::new();
-    pub static ref DIRECTORY_B: Directory = {
-        let mut dir = Directory::new();
-        dir.add(
+    pub static ref DIRECTORY_B: Directory = Directory::try_from_iter([(
             "a".try_into().unwrap(),
             Node::Directory{
                 digest: DIRECTORY_A.digest(),
                 size: DIRECTORY_A.size(),
-            }).unwrap();
-
-        dir
-    };
-    pub static ref DIRECTORY_C: Directory = {
-        let mut dir = Directory::new();
-        dir.add(
+            }
+    )]).unwrap();
+    pub static ref DIRECTORY_C: Directory = Directory::try_from_iter([
+        (
             "a".try_into().unwrap(),
             Node::Directory{
                 digest: DIRECTORY_A.digest(),
                 size: DIRECTORY_A.size(),
-            }).unwrap();
-        dir.add(
+            }
+        ),
+        (
             "a'".try_into().unwrap(),
             Node::Directory{
                 digest: DIRECTORY_A.digest(),
                 size: DIRECTORY_A.size(),
-        }).unwrap();
-
-        dir
-    };
-    pub static ref DIRECTORY_D: Directory = {
-        let mut dir = Directory::new();
-        dir.add(
+            }
+        )
+    ]).unwrap();
+    pub static ref DIRECTORY_D: Directory = Directory::try_from_iter([
+        (
             "a".try_into().unwrap(),
             Node::Directory{
                 digest: DIRECTORY_A.digest(),
                 size: DIRECTORY_A.size(),
-            }).unwrap();
-        dir.add(
+            }
+        ),
+        (
             "b".try_into().unwrap(),
             Node::Directory{
                 digest: DIRECTORY_B.digest(),
                 size: DIRECTORY_B.size(),
-            }).unwrap();
-
-        dir
-    };
+            }
+        )
+    ]).unwrap();
 }
diff --git a/tvix/castore/src/nodes/directory.rs b/tvix/castore/src/nodes/directory.rs
index a11fbf314bd0..f80e055dde80 100644
--- a/tvix/castore/src/nodes/directory.rs
+++ b/tvix/castore/src/nodes/directory.rs
@@ -1,9 +1,9 @@
-use std::collections::BTreeMap;
+use std::collections::btree_map::{self, BTreeMap};
 
 use crate::{errors::DirectoryError, path::PathComponent, proto, B3Digest, Node};
 
 /// A Directory contains nodes, which can be Directory, File or Symlink nodes.
-/// It attached names to these nodes, which is the basename in that directory.
+/// It attaches names to these nodes, which is the basename in that directory.
 /// These names:
 ///  - MUST not contain slashes or null bytes
 ///  - MUST not be '.' or '..'
@@ -15,13 +15,28 @@ pub struct Directory {
 
 impl Directory {
     /// Constructs a new, empty Directory.
-    /// FUTUREWORK: provide a constructor from an interator of (sorted) names and nodes.
     pub fn new() -> Self {
         Directory {
             nodes: BTreeMap::new(),
         }
     }
 
+    /// Construct a [Directory] from tuples of name and [Node].
+    ///
+    /// Inserting multiple elements with the same name will yield an error, as
+    /// well as exceeding the maximum size.
+    pub fn try_from_iter<T: IntoIterator<Item = (PathComponent, Node)>>(
+        iter: T,
+    ) -> Result<Directory, DirectoryError> {
+        let mut nodes = BTreeMap::new();
+
+        iter.into_iter().try_fold(0u64, |size, (name, node)| {
+            check_insert_node(size, &mut nodes, name, node)
+        })?;
+
+        Ok(Self { nodes })
+    }
+
     /// The size of a directory is the number of all regular and symlink elements,
     /// the number of directory elements, and their size fields.
     pub fn size(&self) -> u64 {
@@ -58,34 +73,14 @@ impl Directory {
 
     /// Adds the specified [Node] to the [Directory] with a given name.
     ///
-    /// Inserting an element that already exists with the same name in the directory will yield an
-    /// error.
-    /// Inserting an element will validate that its name fulfills the
-    /// requirements for directory entries and yield an error if it is not.
+    /// Inserting a node that already exists with the same name in the directory
+    /// will yield an error, as well as exceeding the maximum size.
+    ///
+    /// In case you want to construct a [Directory] from multiple elements, use
+    /// [from_iter] instead.
     pub fn add(&mut self, name: PathComponent, node: Node) -> Result<(), DirectoryError> {
-        // Check that the even after adding this new directory entry, the size calculation will not
-        // overflow
-        // FUTUREWORK: add some sort of batch add interface which only does this check once with
-        // all the to-be-added entries
-        checked_sum([
-            self.size(),
-            1,
-            match node {
-                Node::Directory { size, .. } => size,
-                _ => 0,
-            },
-        ])
-        .ok_or(DirectoryError::SizeOverflow)?;
-
-        match self.nodes.entry(name) {
-            std::collections::btree_map::Entry::Vacant(e) => {
-                e.insert(node);
-                Ok(())
-            }
-            std::collections::btree_map::Entry::Occupied(occupied) => {
-                Err(DirectoryError::DuplicateName(occupied.key().to_owned()))
-            }
-        }
+        check_insert_node(self.size(), &mut self.nodes, name, node)?;
+        Ok(())
     }
 }
 
@@ -93,87 +88,149 @@ fn checked_sum(iter: impl IntoIterator<Item = u64>) -> Option<u64> {
     iter.into_iter().try_fold(0u64, |acc, i| acc.checked_add(i))
 }
 
+/// Helper function dealing with inserting nodes into the nodes [BTreeMap],
+/// after ensuring the new size doesn't overlow and the key doesn't exist already.
+///
+/// Returns the new total size, or an error.
+fn check_insert_node(
+    current_size: u64,
+    nodes: &mut BTreeMap<PathComponent, Node>,
+    name: PathComponent,
+    node: Node,
+) -> Result<u64, DirectoryError> {
+    // Check that the even after adding this new directory entry, the size calculation will not
+    // overflow
+    let new_size = checked_sum([
+        current_size,
+        1,
+        match node {
+            Node::Directory { size, .. } => size,
+            _ => 0,
+        },
+    ])
+    .ok_or(DirectoryError::SizeOverflow)?;
+
+    match nodes.entry(name) {
+        btree_map::Entry::Vacant(e) => {
+            e.insert(node);
+        }
+        btree_map::Entry::Occupied(occupied) => {
+            return Err(DirectoryError::DuplicateName(occupied.key().to_owned()))
+        }
+    }
+
+    Ok(new_size)
+}
+
 #[cfg(test)]
 mod test {
     use super::{Directory, Node};
     use crate::fixtures::DUMMY_DIGEST;
-    use crate::DirectoryError;
+    use crate::{DirectoryError, PathComponent};
 
     #[test]
-    fn add_nodes_to_directory() {
-        let mut d = Directory::new();
-
-        d.add(
-            "b".try_into().unwrap(),
+    fn from_iter_single() {
+        Directory::try_from_iter([(
+            PathComponent::try_from("b").unwrap(),
             Node::Directory {
                 digest: DUMMY_DIGEST.clone(),
                 size: 1,
             },
-        )
-        .unwrap();
-        d.add(
-            "a".try_into().unwrap(),
-            Node::Directory {
-                digest: DUMMY_DIGEST.clone(),
-                size: 1,
-            },
-        )
-        .unwrap();
-        d.add(
-            "z".try_into().unwrap(),
-            Node::Directory {
-                digest: DUMMY_DIGEST.clone(),
-                size: 1,
-            },
-        )
+        )])
         .unwrap();
+    }
 
-        d.add(
-            "f".try_into().unwrap(),
-            Node::File {
-                digest: DUMMY_DIGEST.clone(),
-                size: 1,
-                executable: true,
-            },
-        )
+    #[test]
+    fn from_iter_multiple() {
+        let d = Directory::try_from_iter([
+            (
+                "b".try_into().unwrap(),
+                Node::Directory {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: 1,
+                },
+            ),
+            (
+                "a".try_into().unwrap(),
+                Node::Directory {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: 1,
+                },
+            ),
+            (
+                "z".try_into().unwrap(),
+                Node::Directory {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: 1,
+                },
+            ),
+            (
+                "f".try_into().unwrap(),
+                Node::File {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: 1,
+                    executable: true,
+                },
+            ),
+            (
+                "c".try_into().unwrap(),
+                Node::File {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: 1,
+                    executable: true,
+                },
+            ),
+            (
+                "g".try_into().unwrap(),
+                Node::File {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: 1,
+                    executable: true,
+                },
+            ),
+            (
+                "t".try_into().unwrap(),
+                Node::Symlink {
+                    target: "a".try_into().unwrap(),
+                },
+            ),
+            (
+                "o".try_into().unwrap(),
+                Node::Symlink {
+                    target: "a".try_into().unwrap(),
+                },
+            ),
+            (
+                "e".try_into().unwrap(),
+                Node::Symlink {
+                    target: "a".try_into().unwrap(),
+                },
+            ),
+        ])
         .unwrap();
+
+        // Convert to proto struct and back to ensure we are not generating any invalid structures
+        crate::Directory::try_from(crate::proto::Directory::from(d))
+            .expect("directory should be valid");
+    }
+
+    #[test]
+    fn add_nodes_to_directory() {
+        let mut d = Directory::new();
+
         d.add(
-            "c".try_into().unwrap(),
-            Node::File {
+            "b".try_into().unwrap(),
+            Node::Directory {
                 digest: DUMMY_DIGEST.clone(),
                 size: 1,
-                executable: true,
             },
         )
         .unwrap();
         d.add(
-            "g".try_into().unwrap(),
-            Node::File {
+            "a".try_into().unwrap(),
+            Node::Directory {
                 digest: DUMMY_DIGEST.clone(),
                 size: 1,
-                executable: true,
-            },
-        )
-        .unwrap();
-
-        d.add(
-            "t".try_into().unwrap(),
-            Node::Symlink {
-                target: "a".try_into().unwrap(),
-            },
-        )
-        .unwrap();
-        d.add(
-            "o".try_into().unwrap(),
-            Node::Symlink {
-                target: "a".try_into().unwrap(),
-            },
-        )
-        .unwrap();
-        d.add(
-            "e".try_into().unwrap(),
-            Node::Symlink {
-                target: "a".try_into().unwrap(),
             },
         )
         .unwrap();