about summary refs log tree commit diff
path: root/tvix/castore/src/directoryservice
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2024-08-13T17·04+0300
committerclbot <clbot@tvl.fyi>2024-08-13T18·39+0000
commitc7845f3c882d0f1b215813bf0ef8231c2c03d7b6 (patch)
tree681774cbfa98e5021aa68b361bc937b88881b5c8 /tvix/castore/src/directoryservice
parent2f4185ff1a54e1bdbaa062a9e4e1c8137d141762 (diff)
refactor(tvix/castore): move *Node and Directory to crate root r/8486
*Node and Directory are types of the tvix-castore model, not the tvix
DirectoryService model. A DirectoryService only happens to send
Directories.

Move types into individual files in a nodes/ subdirectory, as it's
gotten too cluttered in a single file, and (re-)export all types from
the crate root.

This has the effect that we now cannot poke at private fields directly
from other files inside `crate::directoryservice` (as it's not all in
the same file anymore), but that's a good thing, it now forces us to go
through the proper accessors.

For the same reasons, we currently also need to introduce the `rename`
functions on each *Node directly.

A followup is gonna move the names out of the individual enum kinds, so
we can better represent "unnamed nodes".

Change-Id: Icdb34dcfe454c41c94f2396e8e99973d27db8418
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12199
Reviewed-by: yuka <yuka@yuka.dev>
Autosubmit: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/castore/src/directoryservice')
-rw-r--r--tvix/castore/src/directoryservice/directory_graph.rs13
-rw-r--r--tvix/castore/src/directoryservice/mod.rs463
-rw-r--r--tvix/castore/src/directoryservice/order_validator.rs6
-rw-r--r--tvix/castore/src/directoryservice/tests/mod.rs2
-rw-r--r--tvix/castore/src/directoryservice/traverse.rs12
-rw-r--r--tvix/castore/src/directoryservice/utils.rs2
6 files changed, 17 insertions, 481 deletions
diff --git a/tvix/castore/src/directoryservice/directory_graph.rs b/tvix/castore/src/directoryservice/directory_graph.rs
index aa60f68a3197..0cf2f71adb06 100644
--- a/tvix/castore/src/directoryservice/directory_graph.rs
+++ b/tvix/castore/src/directoryservice/directory_graph.rs
@@ -10,8 +10,7 @@ use petgraph::{
 use tracing::instrument;
 
 use super::order_validator::{LeavesToRootValidator, OrderValidator, RootToLeavesValidator};
-use super::{Directory, DirectoryNode};
-use crate::B3Digest;
+use crate::{B3Digest, Directory, DirectoryNode, NamedNode};
 
 #[derive(thiserror::Error, Debug)]
 pub enum Error {
@@ -72,11 +71,11 @@ pub struct ValidatedDirectoryGraph {
 
 fn check_edge(dir: &DirectoryNode, child: &Directory) -> Result<(), Error> {
     // Ensure the size specified in the child node matches our records.
-    if dir.size != child.size() {
+    if dir.size() != child.size() {
         return Err(Error::ValidationError(format!(
             "'{}' has wrong size, specified {}, recorded {}",
-            dir.name.as_bstr(),
-            dir.size,
+            dir.get_name().as_bstr(),
+            dir.size(),
             child.size(),
         )));
     }
@@ -145,7 +144,7 @@ impl<O: OrderValidator> DirectoryGraph<O> {
         for subdir in directory.directories() {
             let child_ix = *self
                 .digest_to_node_ix
-                .entry(subdir.digest.clone())
+                .entry(subdir.digest().clone())
                 .or_insert_with(|| self.graph.add_node(None));
 
             let pending_edge_check = match &self.graph[child_ix] {
@@ -268,8 +267,8 @@ impl ValidatedDirectoryGraph {
 */
 #[cfg(test)]
 mod tests {
-    use crate::directoryservice::{Directory, DirectoryNode, Node};
     use crate::fixtures::{DIRECTORY_A, DIRECTORY_B, DIRECTORY_C};
+    use crate::{Directory, DirectoryNode, Node};
     use lazy_static::lazy_static;
     use rstest::rstest;
 
diff --git a/tvix/castore/src/directoryservice/mod.rs b/tvix/castore/src/directoryservice/mod.rs
index 0141a48f75bc..9f23176e31d0 100644
--- a/tvix/castore/src/directoryservice/mod.rs
+++ b/tvix/castore/src/directoryservice/mod.rs
@@ -1,9 +1,6 @@
 use crate::composition::{Registry, ServiceBuilder};
-use crate::proto;
-use crate::{B3Digest, Error};
-use crate::{ValidateDirectoryError, ValidateNodeError};
+use crate::{B3Digest, Directory, Error};
 
-use bytes::Bytes;
 use futures::stream::BoxStream;
 use tonic::async_trait;
 mod combinators;
@@ -148,461 +145,3 @@ pub(crate) fn register_directory_services(reg: &mut Registry) {
         reg.register::<Box<dyn ServiceBuilder<Output = dyn DirectoryService>>, super::directoryservice::BigtableParameters>("bigtable");
     }
 }
-
-/// A Directory can contain Directory, File or Symlink nodes.
-/// Each of these nodes have a name attribute, which is the basename in that
-/// directory and node type specific attributes.
-/// While a Node by itself may have any name, the names of Directory entries:
-///  - MUST not contain slashes or null bytes
-///  - MUST not be '.' or '..'
-///  - MUST be unique across all three lists
-#[derive(Default, Debug, Clone, PartialEq, Eq)]
-pub struct Directory {
-    nodes: Vec<Node>,
-}
-
-/// A DirectoryNode is a pointer to a [Directory], by its [Directory::digest].
-/// It also gives it a `name` and `size`.
-/// Such a node is either an element in the [Directory] it itself is contained in,
-/// or a standalone root node./
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct DirectoryNode {
-    /// The (base)name of the directory
-    name: Bytes,
-    /// The blake3 hash of a Directory message, serialized in protobuf canonical form.
-    digest: B3Digest,
-    /// Number of child elements in the Directory referred to by `digest`.
-    /// Calculated by summing up the numbers of nodes, and for each directory.
-    /// its size field. Can be used for inode allocation.
-    /// This field is precisely as verifiable as any other Merkle tree edge.
-    /// Resolve `digest`, and you can compute it incrementally. Resolve the entire
-    /// tree, and you can fully compute it from scratch.
-    /// A credulous implementation won't reject an excessive size, but this is
-    /// harmless: you'll have some ordinals without nodes. Undersizing is obvious
-    /// and easy to reject: you won't have an ordinal for some nodes.
-    size: u64,
-}
-
-impl DirectoryNode {
-    pub fn new(name: Bytes, digest: B3Digest, size: u64) -> Result<Self, ValidateNodeError> {
-        Ok(Self { name, digest, size })
-    }
-
-    pub fn digest(&self) -> &B3Digest {
-        &self.digest
-    }
-    pub fn size(&self) -> u64 {
-        self.size
-    }
-}
-
-/// A FileNode represents a regular or executable file in a Directory or at the root.
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct FileNode {
-    /// The (base)name of the file
-    name: Bytes,
-
-    /// The blake3 digest of the file contents
-    digest: B3Digest,
-
-    /// The file content size
-    size: u64,
-
-    /// Whether the file is executable
-    executable: bool,
-}
-
-impl FileNode {
-    pub fn new(
-        name: Bytes,
-        digest: B3Digest,
-        size: u64,
-        executable: bool,
-    ) -> Result<Self, ValidateNodeError> {
-        Ok(Self {
-            name,
-            digest,
-            size,
-            executable,
-        })
-    }
-
-    pub fn digest(&self) -> &B3Digest {
-        &self.digest
-    }
-
-    pub fn size(&self) -> u64 {
-        self.size
-    }
-
-    pub fn executable(&self) -> bool {
-        self.executable
-    }
-}
-
-/// A SymlinkNode represents a symbolic link in a Directory or at the root.
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct SymlinkNode {
-    /// The (base)name of the symlink
-    name: Bytes,
-    /// The target of the symlink.
-    target: Bytes,
-}
-
-impl SymlinkNode {
-    pub fn new(name: Bytes, target: Bytes) -> Result<Self, ValidateNodeError> {
-        if target.is_empty() || target.contains(&b'\0') {
-            return Err(ValidateNodeError::InvalidSymlinkTarget(target));
-        }
-        Ok(Self { name, target })
-    }
-
-    pub fn target(&self) -> &bytes::Bytes {
-        &self.target
-    }
-}
-
-/// A Node is either a [DirectoryNode], [FileNode] or [SymlinkNode].
-/// While a Node by itself may have any name, only those matching specific requirements
-/// can can be added as entries to a [Directory] (see the documentation on [Directory] for details).
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum Node {
-    Directory(DirectoryNode),
-    File(FileNode),
-    Symlink(SymlinkNode),
-}
-
-/// NamedNode is implemented for [FileNode], [DirectoryNode] and [SymlinkNode]
-/// and [Node], so we can ask all of them for the name easily.
-pub trait NamedNode {
-    fn get_name(&self) -> &bytes::Bytes;
-}
-
-impl NamedNode for &FileNode {
-    fn get_name(&self) -> &bytes::Bytes {
-        &self.name
-    }
-}
-impl NamedNode for FileNode {
-    fn get_name(&self) -> &bytes::Bytes {
-        &self.name
-    }
-}
-
-impl NamedNode for &DirectoryNode {
-    fn get_name(&self) -> &bytes::Bytes {
-        &self.name
-    }
-}
-impl NamedNode for DirectoryNode {
-    fn get_name(&self) -> &bytes::Bytes {
-        &self.name
-    }
-}
-
-impl NamedNode for &SymlinkNode {
-    fn get_name(&self) -> &bytes::Bytes {
-        &self.name
-    }
-}
-impl NamedNode for SymlinkNode {
-    fn get_name(&self) -> &bytes::Bytes {
-        &self.name
-    }
-}
-
-impl NamedNode for &Node {
-    fn get_name(&self) -> &bytes::Bytes {
-        match self {
-            Node::File(node_file) => &node_file.name,
-            Node::Directory(node_directory) => &node_directory.name,
-            Node::Symlink(node_symlink) => &node_symlink.name,
-        }
-    }
-}
-impl NamedNode for Node {
-    fn get_name(&self) -> &bytes::Bytes {
-        match self {
-            Node::File(node_file) => &node_file.name,
-            Node::Directory(node_directory) => &node_directory.name,
-            Node::Symlink(node_symlink) => &node_symlink.name,
-        }
-    }
-}
-
-impl Node {
-    /// Returns the node with a new name.
-    pub fn rename(self, name: bytes::Bytes) -> Self {
-        match self {
-            Node::Directory(n) => Node::Directory(DirectoryNode { name, ..n }),
-            Node::File(n) => Node::File(FileNode { name, ..n }),
-            Node::Symlink(n) => Node::Symlink(SymlinkNode { name, ..n }),
-        }
-    }
-}
-
-impl PartialOrd for Node {
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for Node {
-    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-        self.get_name().cmp(other.get_name())
-    }
-}
-
-impl PartialOrd for FileNode {
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for FileNode {
-    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-        self.get_name().cmp(other.get_name())
-    }
-}
-
-impl PartialOrd for DirectoryNode {
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for DirectoryNode {
-    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-        self.get_name().cmp(other.get_name())
-    }
-}
-
-impl PartialOrd for SymlinkNode {
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for SymlinkNode {
-    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-        self.get_name().cmp(other.get_name())
-    }
-}
-
-fn checked_sum(iter: impl IntoIterator<Item = u64>) -> Option<u64> {
-    iter.into_iter().try_fold(0u64, |acc, i| acc.checked_add(i))
-}
-
-impl Directory {
-    pub fn new() -> Self {
-        Directory { nodes: vec![] }
-    }
-
-    /// The size of a directory is the number of all regular and symlink elements,
-    /// the number of directory elements, and their size fields.
-    pub fn size(&self) -> u64 {
-        // It's impossible to create a Directory where the size overflows, because we
-        // check before every add() that the size won't overflow.
-        (self.nodes.len() as u64) + self.directories().map(|e| e.size).sum::<u64>()
-    }
-
-    /// Calculates the digest of a Directory, which is the blake3 hash of a
-    /// Directory protobuf message, serialized in protobuf canonical form.
-    pub fn digest(&self) -> B3Digest {
-        proto::Directory::from(self).digest()
-    }
-
-    /// Allows iterating over all nodes (directories, files and symlinks)
-    /// ordered by their name.
-    pub fn nodes(&self) -> impl Iterator<Item = &Node> + Send + Sync + '_ {
-        self.nodes.iter()
-    }
-
-    /// Allows iterating over the FileNode entries of this directory
-    /// ordered by their name
-    pub fn files(&self) -> impl Iterator<Item = &FileNode> + Send + Sync + '_ {
-        self.nodes.iter().filter_map(|node| match node {
-            Node::File(n) => Some(n),
-            _ => None,
-        })
-    }
-
-    /// Allows iterating over the subdirectories of this directory
-    /// ordered by their name
-    pub fn directories(&self) -> impl Iterator<Item = &DirectoryNode> + Send + Sync + '_ {
-        self.nodes.iter().filter_map(|node| match node {
-            Node::Directory(n) => Some(n),
-            _ => None,
-        })
-    }
-
-    /// Allows iterating over the SymlinkNode entries of this directory
-    /// ordered by their name
-    pub fn symlinks(&self) -> impl Iterator<Item = &SymlinkNode> + Send + Sync + '_ {
-        self.nodes.iter().filter_map(|node| match node {
-            Node::Symlink(n) => Some(n),
-            _ => None,
-        })
-    }
-
-    /// Checks a Node name for validity as a directory entry
-    /// We disallow slashes, null bytes, '.', '..' and the empty string.
-    pub(crate) fn validate_node_name(name: &[u8]) -> Result<(), ValidateNodeError> {
-        if name.is_empty()
-            || name == b".."
-            || name == b"."
-            || name.contains(&0x00)
-            || name.contains(&b'/')
-        {
-            Err(ValidateNodeError::InvalidName(name.to_owned().into()))
-        } else {
-            Ok(())
-        }
-    }
-
-    /// Adds the specified [Node] to the [Directory], preserving sorted entries.
-    ///
-    /// Inserting an element that already exists with the same name in the directory will yield an
-    /// error.
-    /// Inserting an element will validate that its name fulfills the stricter requirements for
-    /// directory entries and yield an error if it is not.
-    pub fn add(&mut self, node: Node) -> Result<(), ValidateDirectoryError> {
-        Self::validate_node_name(node.get_name())
-            .map_err(|e| ValidateDirectoryError::InvalidNode(node.get_name().clone().into(), e))?;
-
-        // Check that the even after adding this new directory entry, the size calculation will not
-        // overflow
-        // FUTUREWORK: add some sort of batch add interface which only does this check once with
-        // all the to-be-added entries
-        checked_sum([
-            self.size(),
-            1,
-            match node {
-                Node::Directory(ref dir) => dir.size,
-                _ => 0,
-            },
-        ])
-        .ok_or(ValidateDirectoryError::SizeOverflow)?;
-
-        // This assumes the [Directory] is sorted, since we don't allow accessing the nodes list
-        // directly and all previous inserts should have been in-order
-        let pos = match self
-            .nodes
-            .binary_search_by_key(&node.get_name(), |n| n.get_name())
-        {
-            Err(pos) => pos, // There is no node with this name; good!
-            Ok(_) => {
-                return Err(ValidateDirectoryError::DuplicateName(
-                    node.get_name().to_vec(),
-                ))
-            }
-        };
-
-        self.nodes.insert(pos, node);
-        Ok(())
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::{Directory, DirectoryNode, FileNode, Node, SymlinkNode};
-    use crate::fixtures::DUMMY_DIGEST;
-    use crate::ValidateDirectoryError;
-
-    #[test]
-    fn add_nodes_to_directory() {
-        let mut d = Directory::new();
-
-        d.add(Node::Directory(
-            DirectoryNode::new("b".into(), DUMMY_DIGEST.clone(), 1).unwrap(),
-        ))
-        .unwrap();
-        d.add(Node::Directory(
-            DirectoryNode::new("a".into(), DUMMY_DIGEST.clone(), 1).unwrap(),
-        ))
-        .unwrap();
-        d.add(Node::Directory(
-            DirectoryNode::new("z".into(), DUMMY_DIGEST.clone(), 1).unwrap(),
-        ))
-        .unwrap();
-
-        d.add(Node::File(
-            FileNode::new("f".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(),
-        ))
-        .unwrap();
-        d.add(Node::File(
-            FileNode::new("c".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(),
-        ))
-        .unwrap();
-        d.add(Node::File(
-            FileNode::new("g".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(),
-        ))
-        .unwrap();
-
-        d.add(Node::Symlink(
-            SymlinkNode::new("t".into(), "a".into()).unwrap(),
-        ))
-        .unwrap();
-        d.add(Node::Symlink(
-            SymlinkNode::new("o".into(), "a".into()).unwrap(),
-        ))
-        .unwrap();
-        d.add(Node::Symlink(
-            SymlinkNode::new("e".into(), "a".into()).unwrap(),
-        ))
-        .unwrap();
-
-        // Convert to proto struct and back to ensure we are not generating any invalid structures
-        crate::directoryservice::Directory::try_from(crate::proto::Directory::from(d))
-            .expect("directory should be valid");
-    }
-
-    #[test]
-    fn validate_overflow() {
-        let mut d = Directory::new();
-
-        assert_eq!(
-            d.add(Node::Directory(
-                DirectoryNode::new("foo".into(), DUMMY_DIGEST.clone(), u64::MAX).unwrap(),
-            )),
-            Err(ValidateDirectoryError::SizeOverflow)
-        );
-    }
-
-    #[test]
-    fn add_duplicate_node_to_directory() {
-        let mut d = Directory::new();
-
-        d.add(Node::Directory(
-            DirectoryNode::new("a".into(), DUMMY_DIGEST.clone(), 1).unwrap(),
-        ))
-        .unwrap();
-        assert_eq!(
-            format!(
-                "{}",
-                d.add(Node::File(
-                    FileNode::new("a".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(),
-                ))
-                .expect_err("adding duplicate dir entry must fail")
-            ),
-            "\"a\" is a duplicate name"
-        );
-    }
-
-    /// Attempt to add a directory entry with a name which should be rejected.
-    #[tokio::test]
-    async fn directory_reject_invalid_name() {
-        let mut dir = Directory::new();
-        assert!(
-            dir.add(Node::Symlink(
-                SymlinkNode::new(
-                    "".into(), // wrong! can not be added to directory
-                    "doesntmatter".into(),
-                )
-                .unwrap()
-            ))
-            .is_err(),
-            "invalid symlink entry be rejected"
-        );
-    }
-}
diff --git a/tvix/castore/src/directoryservice/order_validator.rs b/tvix/castore/src/directoryservice/order_validator.rs
index 17431fc8d3b0..f03bb7ff9a86 100644
--- a/tvix/castore/src/directoryservice/order_validator.rs
+++ b/tvix/castore/src/directoryservice/order_validator.rs
@@ -50,7 +50,7 @@ impl RootToLeavesValidator {
 
         for subdir in directory.directories() {
             // Allow the children to appear next
-            self.expected_digests.insert(subdir.digest.clone());
+            self.expected_digests.insert(subdir.digest().clone());
         }
     }
 }
@@ -80,10 +80,10 @@ impl OrderValidator for LeavesToRootValidator {
         let digest = directory.digest();
 
         for subdir in directory.directories() {
-            if !self.allowed_references.contains(&subdir.digest) {
+            if !self.allowed_references.contains(subdir.digest()) {
                 warn!(
                     directory.digest = %digest,
-                    subdirectory.digest = %subdir.digest,
+                    subdirectory.digest = %subdir.digest(),
                     "unexpected directory reference"
                 );
                 return false;
diff --git a/tvix/castore/src/directoryservice/tests/mod.rs b/tvix/castore/src/directoryservice/tests/mod.rs
index 3923e66dc2c2..7a15f44a686e 100644
--- a/tvix/castore/src/directoryservice/tests/mod.rs
+++ b/tvix/castore/src/directoryservice/tests/mod.rs
@@ -8,8 +8,8 @@ use rstest_reuse::{self, *};
 
 use super::DirectoryService;
 use crate::directoryservice;
-use crate::directoryservice::{Directory, DirectoryNode, Node};
 use crate::fixtures::{DIRECTORY_A, DIRECTORY_B, DIRECTORY_C, DIRECTORY_D};
+use crate::{Directory, DirectoryNode, Node};
 
 mod utils;
 use self::utils::make_grpc_directory_service_client;
diff --git a/tvix/castore/src/directoryservice/traverse.rs b/tvix/castore/src/directoryservice/traverse.rs
index 3e6dc73a4d6b..ff667e0f65c0 100644
--- a/tvix/castore/src/directoryservice/traverse.rs
+++ b/tvix/castore/src/directoryservice/traverse.rs
@@ -1,5 +1,4 @@
-use super::{DirectoryService, NamedNode, Node};
-use crate::{Error, Path};
+use crate::{directoryservice::DirectoryService, Error, NamedNode, Node, Path};
 use tracing::{instrument, warn};
 
 /// This descends from a (root) node to the given (sub)path, returning the Node
@@ -25,15 +24,15 @@ where
                 // fetch the linked node from the directory_service.
                 let directory = directory_service
                     .as_ref()
-                    .get(&directory_node.digest)
+                    .get(directory_node.digest())
                     .await?
                     .ok_or_else(|| {
                         // If we didn't get the directory node that's linked, that's a store inconsistency, bail out!
-                        warn!("directory {} does not exist", directory_node.digest);
+                        warn!("directory {} does not exist", directory_node.digest());
 
                         Error::StorageError(format!(
                             "directory {} does not exist",
-                            directory_node.digest
+                            directory_node.digest()
                         ))
                     })?;
 
@@ -59,9 +58,8 @@ where
 mod tests {
     use crate::{
         directoryservice,
-        directoryservice::{DirectoryNode, Node},
         fixtures::{DIRECTORY_COMPLICATED, DIRECTORY_WITH_KEEP},
-        PathBuf,
+        DirectoryNode, Node, PathBuf,
     };
 
     use super::descend_to;
diff --git a/tvix/castore/src/directoryservice/utils.rs b/tvix/castore/src/directoryservice/utils.rs
index 352362811a97..f1133663ede7 100644
--- a/tvix/castore/src/directoryservice/utils.rs
+++ b/tvix/castore/src/directoryservice/utils.rs
@@ -59,7 +59,7 @@ pub fn traverse_directory<'a, DS: DirectoryService + 'static>(
             // This panics if the digest looks invalid, it's supposed to be checked first.
             for child_directory_node in current_directory.directories() {
                 // TODO: propagate error
-                let child_digest: B3Digest = child_directory_node.digest.clone();
+                let child_digest: B3Digest = child_directory_node.digest().clone();
 
                 if worklist_directory_digests.contains(&child_digest)
                     || sent_directory_digests.contains(&child_digest)