about summary refs log tree commit diff
path: root/tvix/castore/src/proto/mod.rs
diff options
context:
space:
mode:
Diffstat (limited to 'tvix/castore/src/proto/mod.rs')
-rw-r--r--tvix/castore/src/proto/mod.rs584
1 files changed, 212 insertions, 372 deletions
diff --git a/tvix/castore/src/proto/mod.rs b/tvix/castore/src/proto/mod.rs
index 5374e3ae5a80..89c68a4ad97b 100644
--- a/tvix/castore/src/proto/mod.rs
+++ b/tvix/castore/src/proto/mod.rs
@@ -1,18 +1,14 @@
-#![allow(non_snake_case)]
-// https://github.com/hyperium/tonic/issues/1056
-use bstr::ByteSlice;
-use std::{collections::HashSet, iter::Peekable, str};
-
 use prost::Message;
 
+use std::cmp::Ordering;
+
 mod grpc_blobservice_wrapper;
 mod grpc_directoryservice_wrapper;
 
+use crate::{path::PathComponent, B3Digest, DirectoryError};
 pub use grpc_blobservice_wrapper::GRPCBlobServiceWrapper;
 pub use grpc_directoryservice_wrapper::GRPCDirectoryServiceWrapper;
 
-use crate::{B3Digest, B3_LEN};
-
 tonic::include_proto!("tvix.castore.v1");
 
 #[cfg(feature = "tonic-reflection")]
@@ -24,38 +20,6 @@ pub const FILE_DESCRIPTOR_SET: &[u8] = tonic::include_file_descriptor_set!("tvix
 #[cfg(test)]
 mod tests;
 
-/// Errors that can occur during the validation of [Directory] messages.
-#[derive(Debug, PartialEq, Eq, thiserror::Error)]
-pub enum ValidateDirectoryError {
-    /// Elements are not in sorted order
-    #[error("{:?} is not sorted", .0.as_bstr())]
-    WrongSorting(Vec<u8>),
-    /// Multiple elements with the same name encountered
-    #[error("{:?} is a duplicate name", .0.as_bstr())]
-    DuplicateName(Vec<u8>),
-    /// Invalid node
-    #[error("invalid node with name {:?}: {:?}", .0.as_bstr(), .1.to_string())]
-    InvalidNode(Vec<u8>, ValidateNodeError),
-    #[error("Total size exceeds u32::MAX")]
-    SizeOverflow,
-}
-
-/// Errors that occur during Node validation
-#[derive(Debug, PartialEq, Eq, thiserror::Error)]
-pub enum ValidateNodeError {
-    #[error("No node set")]
-    NoNodeSet,
-    /// Invalid digest length encountered
-    #[error("Invalid Digest length: {0}")]
-    InvalidDigestLen(usize),
-    /// Invalid name encountered
-    #[error("Invalid name: {}", .0.as_bstr())]
-    InvalidName(Vec<u8>),
-    /// Invalid symlink target
-    #[error("Invalid symlink target: {}", .0.as_bstr())]
-    InvalidSymlinkTarget(Vec<u8>),
-}
-
 /// Errors that occur during StatBlobResponse validation
 #[derive(Debug, PartialEq, Eq, thiserror::Error)]
 pub enum ValidateStatBlobResponseError {
@@ -64,184 +28,6 @@ pub enum ValidateStatBlobResponseError {
     InvalidDigestLen(usize, usize),
 }
 
-/// Checks a Node name for validity as an intermediate node.
-/// We disallow slashes, null bytes, '.', '..' and the empty string.
-pub(crate) fn validate_node_name(name: &[u8]) -> Result<(), ValidateNodeError> {
-    if name.is_empty()
-        || name == b".."
-        || name == b"."
-        || name.contains(&0x00)
-        || name.contains(&b'/')
-    {
-        Err(ValidateNodeError::InvalidName(name.to_owned()))
-    } else {
-        Ok(())
-    }
-}
-
-/// NamedNode is implemented for [FileNode], [DirectoryNode] and [SymlinkNode]
-/// and [node::Node], so we can ask all of them for the name easily.
-pub trait NamedNode {
-    fn get_name(&self) -> &[u8];
-}
-
-impl NamedNode for &FileNode {
-    fn get_name(&self) -> &[u8] {
-        &self.name
-    }
-}
-
-impl NamedNode for &DirectoryNode {
-    fn get_name(&self) -> &[u8] {
-        &self.name
-    }
-}
-
-impl NamedNode for &SymlinkNode {
-    fn get_name(&self) -> &[u8] {
-        &self.name
-    }
-}
-
-impl NamedNode for node::Node {
-    fn get_name(&self) -> &[u8] {
-        match self {
-            node::Node::File(node_file) => &node_file.name,
-            node::Node::Directory(node_directory) => &node_directory.name,
-            node::Node::Symlink(node_symlink) => &node_symlink.name,
-        }
-    }
-}
-
-impl Node {
-    /// Ensures the node has a valid enum kind (is Some), and passes its
-    // per-enum validation.
-    pub fn validate(&self) -> Result<(), ValidateNodeError> {
-        if let Some(node) = self.node.as_ref() {
-            node.validate()
-        } else {
-            Err(ValidateNodeError::NoNodeSet)
-        }
-    }
-}
-
-impl node::Node {
-    /// Returns the node with a new name.
-    pub fn rename(self, name: bytes::Bytes) -> Self {
-        match self {
-            node::Node::Directory(n) => node::Node::Directory(DirectoryNode { name, ..n }),
-            node::Node::File(n) => node::Node::File(FileNode { name, ..n }),
-            node::Node::Symlink(n) => node::Node::Symlink(SymlinkNode { name, ..n }),
-        }
-    }
-
-    /// Ensures the node has a valid name, and checks the type-specific fields too.
-    pub fn validate(&self) -> Result<(), ValidateNodeError> {
-        match self {
-            // for a directory root node, ensure the digest has the appropriate size.
-            node::Node::Directory(directory_node) => {
-                if directory_node.digest.len() != B3_LEN {
-                    Err(ValidateNodeError::InvalidDigestLen(
-                        directory_node.digest.len(),
-                    ))?;
-                }
-                validate_node_name(&directory_node.name)
-            }
-            // for a file root node, ensure the digest has the appropriate size.
-            node::Node::File(file_node) => {
-                if file_node.digest.len() != B3_LEN {
-                    Err(ValidateNodeError::InvalidDigestLen(file_node.digest.len()))?;
-                }
-                validate_node_name(&file_node.name)
-            }
-            // ensure the symlink target is not empty and doesn't contain null bytes.
-            node::Node::Symlink(symlink_node) => {
-                if symlink_node.target.is_empty() || symlink_node.target.contains(&b'\0') {
-                    Err(ValidateNodeError::InvalidSymlinkTarget(
-                        symlink_node.target.to_vec(),
-                    ))?;
-                }
-                validate_node_name(&symlink_node.name)
-            }
-        }
-    }
-}
-
-impl PartialOrd for node::Node {
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for node::Node {
-    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-        self.get_name().cmp(other.get_name())
-    }
-}
-
-impl PartialOrd for FileNode {
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for FileNode {
-    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-        self.get_name().cmp(other.get_name())
-    }
-}
-
-impl PartialOrd for SymlinkNode {
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for SymlinkNode {
-    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-        self.get_name().cmp(other.get_name())
-    }
-}
-
-impl PartialOrd for DirectoryNode {
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for DirectoryNode {
-    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-        self.get_name().cmp(other.get_name())
-    }
-}
-
-/// Accepts a name, and a mutable reference to the previous name.
-/// If the passed name is larger than the previous one, the reference is updated.
-/// If it's not, an error is returned.
-fn update_if_lt_prev<'n>(
-    prev_name: &mut &'n [u8],
-    name: &'n [u8],
-) -> Result<(), ValidateDirectoryError> {
-    if *name < **prev_name {
-        return Err(ValidateDirectoryError::WrongSorting(name.to_vec()));
-    }
-    *prev_name = name;
-    Ok(())
-}
-
-/// Inserts the given name into a HashSet if it's not already in there.
-/// If it is, an error is returned.
-fn insert_once<'n>(
-    seen_names: &mut HashSet<&'n [u8]>,
-    name: &'n [u8],
-) -> Result<(), ValidateDirectoryError> {
-    if seen_names.get(name).is_some() {
-        return Err(ValidateDirectoryError::DuplicateName(name.to_vec()));
-    }
-    seen_names.insert(name);
-    Ok(())
-}
-
 fn checked_sum(iter: impl IntoIterator<Item = u64>) -> Option<u64> {
     iter.into_iter().try_fold(0u64, |acc, i| acc.checked_add(i))
 }
@@ -278,117 +64,233 @@ impl Directory {
             .as_bytes()
             .into()
     }
+}
 
-    /// validate checks the directory for invalid data, such as:
-    /// - violations of name restrictions
-    /// - invalid digest lengths
-    /// - not properly sorted lists
-    /// - duplicate names in the three lists
-    pub fn validate(&self) -> Result<(), ValidateDirectoryError> {
-        let mut seen_names: HashSet<&[u8]> = HashSet::new();
-
-        let mut last_directory_name: &[u8] = b"";
-        let mut last_file_name: &[u8] = b"";
-        let mut last_symlink_name: &[u8] = b"";
-
-        // check directories
-        for directory_node in &self.directories {
-            node::Node::Directory(directory_node.clone())
-                .validate()
-                .map_err(|e| {
-                    ValidateDirectoryError::InvalidNode(directory_node.name.to_vec(), e)
-                })?;
-
-            update_if_lt_prev(&mut last_directory_name, &directory_node.name)?;
-            insert_once(&mut seen_names, &directory_node.name)?;
-        }
+impl TryFrom<Directory> for crate::Directory {
+    type Error = DirectoryError;
+
+    fn try_from(value: Directory) -> Result<Self, Self::Error> {
+        // Check directories, files and symlinks are sorted
+        // We'll notice duplicates across all three fields when constructing the Directory.
+        // FUTUREWORK: use is_sorted() once stable, and/or implement the producer for
+        // [crate::Directory::try_from_iter] iterating over all three and doing all checks inline.
+        value
+            .directories
+            .iter()
+            .try_fold(&b""[..], |prev_name, e| {
+                match e.name.as_ref().cmp(prev_name) {
+                    Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())),
+                    Ordering::Equal => Err(DirectoryError::DuplicateName(
+                        e.name
+                            .to_owned()
+                            .try_into()
+                            .map_err(DirectoryError::InvalidName)?,
+                    )),
+                    Ordering::Greater => Ok(e.name.as_ref()),
+                }
+            })?;
+        value.files.iter().try_fold(&b""[..], |prev_name, e| {
+            match e.name.as_ref().cmp(prev_name) {
+                Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())),
+                Ordering::Equal => Err(DirectoryError::DuplicateName(
+                    e.name
+                        .to_owned()
+                        .try_into()
+                        .map_err(DirectoryError::InvalidName)?,
+                )),
+                Ordering::Greater => Ok(e.name.as_ref()),
+            }
+        })?;
+        value.symlinks.iter().try_fold(&b""[..], |prev_name, e| {
+            match e.name.as_ref().cmp(prev_name) {
+                Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())),
+                Ordering::Equal => Err(DirectoryError::DuplicateName(
+                    e.name
+                        .to_owned()
+                        .try_into()
+                        .map_err(DirectoryError::InvalidName)?,
+                )),
+                Ordering::Greater => Ok(e.name.as_ref()),
+            }
+        })?;
 
-        // check files
-        for file_node in &self.files {
-            node::Node::File(file_node.clone())
-                .validate()
-                .map_err(|e| ValidateDirectoryError::InvalidNode(file_node.name.to_vec(), e))?;
+        // FUTUREWORK: use is_sorted() once stable, and/or implement the producer for
+        // [crate::Directory::try_from_iter] iterating over all three and doing all checks inline.
+        let mut elems: Vec<(PathComponent, crate::Node)> =
+            Vec::with_capacity(value.directories.len() + value.files.len() + value.symlinks.len());
 
-            update_if_lt_prev(&mut last_file_name, &file_node.name)?;
-            insert_once(&mut seen_names, &file_node.name)?;
+        for e in value.directories {
+            elems.push(
+                Node {
+                    node: Some(node::Node::Directory(e)),
+                }
+                .try_into_name_and_node()?,
+            );
         }
 
-        // check symlinks
-        for symlink_node in &self.symlinks {
-            node::Node::Symlink(symlink_node.clone())
-                .validate()
-                .map_err(|e| ValidateDirectoryError::InvalidNode(symlink_node.name.to_vec(), e))?;
+        for e in value.files {
+            elems.push(
+                Node {
+                    node: Some(node::Node::File(e)),
+                }
+                .try_into_name_and_node()?,
+            )
+        }
 
-            update_if_lt_prev(&mut last_symlink_name, &symlink_node.name)?;
-            insert_once(&mut seen_names, &symlink_node.name)?;
+        for e in value.symlinks {
+            elems.push(
+                Node {
+                    node: Some(node::Node::Symlink(e)),
+                }
+                .try_into_name_and_node()?,
+            )
         }
 
-        self.size_checked()
-            .ok_or(ValidateDirectoryError::SizeOverflow)?;
+        crate::Directory::try_from_iter(elems)
+    }
+}
 
-        Ok(())
+impl From<crate::Directory> for Directory {
+    fn from(value: crate::Directory) -> Self {
+        let mut directories = vec![];
+        let mut files = vec![];
+        let mut symlinks = vec![];
+
+        for (name, node) in value.into_nodes() {
+            match node {
+                crate::Node::File {
+                    digest,
+                    size,
+                    executable,
+                } => files.push(FileNode {
+                    name: name.into(),
+                    digest: digest.into(),
+                    size,
+                    executable,
+                }),
+                crate::Node::Directory { digest, size } => directories.push(DirectoryNode {
+                    name: name.into(),
+                    digest: digest.into(),
+                    size,
+                }),
+                crate::Node::Symlink { target } => {
+                    symlinks.push(SymlinkNode {
+                        name: name.into(),
+                        target: target.into(),
+                    });
+                }
+            }
+        }
+
+        Directory {
+            directories,
+            files,
+            symlinks,
+        }
     }
+}
 
-    /// Allows iterating over all three nodes ([DirectoryNode], [FileNode],
-    /// [SymlinkNode]) in an ordered fashion, as long as the individual lists
-    /// are sorted (which can be checked by the [Directory::validate]).
-    pub fn nodes(&self) -> DirectoryNodesIterator {
-        return DirectoryNodesIterator {
-            i_directories: self.directories.iter().peekable(),
-            i_files: self.files.iter().peekable(),
-            i_symlinks: self.symlinks.iter().peekable(),
-        };
+impl Node {
+    /// Converts a proto [Node] to a [crate::Node], and splits off the name as a [PathComponent].
+    pub fn try_into_name_and_node(self) -> Result<(PathComponent, crate::Node), DirectoryError> {
+        let (name_bytes, node) = self.try_into_unchecked_name_and_checked_node()?;
+        Ok((
+            name_bytes.try_into().map_err(DirectoryError::InvalidName)?,
+            node,
+        ))
     }
 
-    /// Adds the specified [node::Node] to the [Directory], preserving sorted entries.
-    /// This assumes the [Directory] to be sorted prior to adding the node.
-    ///
-    /// Inserting an element that already exists with the same name in the directory is not
-    /// supported.
-    pub fn add(&mut self, node: node::Node) {
-        debug_assert!(
-            !self.files.iter().any(|x| x.get_name() == node.get_name()),
-            "name already exists in files"
-        );
-        debug_assert!(
-            !self
-                .directories
-                .iter()
-                .any(|x| x.get_name() == node.get_name()),
-            "name already exists in directories"
-        );
-        debug_assert!(
-            !self
-                .symlinks
-                .iter()
-                .any(|x| x.get_name() == node.get_name()),
-            "name already exists in symlinks"
-        );
-
-        match node {
-            node::Node::File(node) => {
-                let pos = self
-                    .files
-                    .binary_search(&node)
-                    .expect_err("Tvix bug: dir entry with name already exists");
-                self.files.insert(pos, node);
+    /// Converts a proto [Node] to a [crate::Node], and splits off the name as a
+    /// [bytes::Bytes] without doing any checking of it.
+    fn try_into_unchecked_name_and_checked_node(
+        self,
+    ) -> Result<(bytes::Bytes, crate::Node), DirectoryError> {
+        match self.node.ok_or_else(|| DirectoryError::NoNodeSet)? {
+            node::Node::Directory(n) => {
+                let digest = B3Digest::try_from(n.digest)
+                    .map_err(|e| DirectoryError::InvalidNode(n.name.clone(), e.into()))?;
+
+                let node = crate::Node::Directory {
+                    digest,
+                    size: n.size,
+                };
+
+                Ok((n.name, node))
             }
-            node::Node::Directory(node) => {
-                let pos = self
-                    .directories
-                    .binary_search(&node)
-                    .expect_err("Tvix bug: dir entry with name already exists");
-                self.directories.insert(pos, node);
+            node::Node::File(n) => {
+                let digest = B3Digest::try_from(n.digest)
+                    .map_err(|e| DirectoryError::InvalidNode(n.name.clone(), e.into()))?;
+
+                let node = crate::Node::File {
+                    digest,
+                    size: n.size,
+                    executable: n.executable,
+                };
+
+                Ok((n.name, node))
             }
-            node::Node::Symlink(node) => {
-                let pos = self
-                    .symlinks
-                    .binary_search(&node)
-                    .expect_err("Tvix bug: dir entry with name already exists");
-                self.symlinks.insert(pos, node);
+
+            node::Node::Symlink(n) => {
+                let node = crate::Node::Symlink {
+                    target: n.target.try_into().map_err(|e| {
+                        DirectoryError::InvalidNode(
+                            n.name.clone(),
+                            crate::ValidateNodeError::InvalidSymlinkTarget(e),
+                        )
+                    })?,
+                };
+
+                Ok((n.name, node))
             }
         }
     }
+
+    /// Converts a proto [Node] to a [crate::Node], and splits off the name and returns it as a
+    /// [bytes::Bytes].
+    ///
+    /// The name must be empty.
+    pub fn try_into_anonymous_node(self) -> Result<crate::Node, DirectoryError> {
+        let (name, node) = Self::try_into_unchecked_name_and_checked_node(self)?;
+
+        if !name.is_empty() {
+            return Err(DirectoryError::NameInAnonymousNode);
+        }
+
+        Ok(node)
+    }
+
+    /// Constructs a [Node] from a name and [crate::Node].
+    /// The name is a [bytes::Bytes], not a [PathComponent], as we have use an
+    /// empty name in some places.
+    pub fn from_name_and_node(name: bytes::Bytes, n: crate::Node) -> Self {
+        match n {
+            crate::Node::Directory { digest, size } => Self {
+                node: Some(node::Node::Directory(DirectoryNode {
+                    name,
+                    digest: digest.into(),
+                    size,
+                })),
+            },
+            crate::Node::File {
+                digest,
+                size,
+                executable,
+            } => Self {
+                node: Some(node::Node::File(FileNode {
+                    name,
+                    digest: digest.into(),
+                    size,
+                    executable,
+                })),
+            },
+            crate::Node::Symlink { target } => Self {
+                node: Some(node::Node::Symlink(SymlinkNode {
+                    name,
+                    target: target.into(),
+                })),
+            },
+        }
+    }
 }
 
 impl StatBlobResponse {
@@ -407,65 +309,3 @@ impl StatBlobResponse {
         Ok(())
     }
 }
-
-/// Struct to hold the state of an iterator over all nodes of a Directory.
-///
-/// Internally, this keeps peekable Iterators over all three lists of a
-/// Directory message.
-pub struct DirectoryNodesIterator<'a> {
-    // directory: &Directory,
-    i_directories: Peekable<std::slice::Iter<'a, DirectoryNode>>,
-    i_files: Peekable<std::slice::Iter<'a, FileNode>>,
-    i_symlinks: Peekable<std::slice::Iter<'a, SymlinkNode>>,
-}
-
-/// looks at two elements implementing NamedNode, and returns true if "left
-/// is smaller / comes first".
-///
-/// Some(_) is preferred over None.
-fn left_name_lt_right<A: NamedNode, B: NamedNode>(left: Option<&A>, right: Option<&B>) -> bool {
-    match left {
-        // if left is None, right always wins
-        None => false,
-        Some(left_inner) => {
-            // left is Some.
-            match right {
-                // left is Some, right is None - left wins.
-                None => true,
-                Some(right_inner) => {
-                    // both are Some - compare the name.
-                    return left_inner.get_name() < right_inner.get_name();
-                }
-            }
-        }
-    }
-}
-
-impl Iterator for DirectoryNodesIterator<'_> {
-    type Item = node::Node;
-
-    // next returns the next node in the Directory.
-    // we peek at all three internal iterators, and pick the one with the
-    // smallest name, to ensure lexicographical ordering.
-    // The individual lists are already known to be sorted.
-    fn next(&mut self) -> Option<Self::Item> {
-        if left_name_lt_right(self.i_directories.peek(), self.i_files.peek()) {
-            // i_directories is still in the game, compare with symlinks
-            if left_name_lt_right(self.i_directories.peek(), self.i_symlinks.peek()) {
-                self.i_directories
-                    .next()
-                    .cloned()
-                    .map(node::Node::Directory)
-            } else {
-                self.i_symlinks.next().cloned().map(node::Node::Symlink)
-            }
-        } else {
-            // i_files is still in the game, compare with symlinks
-            if left_name_lt_right(self.i_files.peek(), self.i_symlinks.peek()) {
-                self.i_files.next().cloned().map(node::Node::File)
-            } else {
-                self.i_symlinks.next().cloned().map(node::Node::Symlink)
-            }
-        }
-    }
-}