about summary refs log tree commit diff
path: root/tvix/castore/src/proto/mod.rs
diff options
context:
space:
mode:
Diffstat (limited to 'tvix/castore/src/proto/mod.rs')
-rw-r--r--tvix/castore/src/proto/mod.rs279
1 files changed, 279 insertions, 0 deletions
diff --git a/tvix/castore/src/proto/mod.rs b/tvix/castore/src/proto/mod.rs
new file mode 100644
index 000000000000..2a44383fdd85
--- /dev/null
+++ b/tvix/castore/src/proto/mod.rs
@@ -0,0 +1,279 @@
+#![allow(clippy::derive_partial_eq_without_eq, non_snake_case)]
+// https://github.com/hyperium/tonic/issues/1056
+use data_encoding::BASE64;
+use std::{collections::HashSet, iter::Peekable};
+use thiserror::Error;
+
+use prost::Message;
+
+mod grpc_blobservice_wrapper;
+mod grpc_directoryservice_wrapper;
+
+pub use grpc_blobservice_wrapper::GRPCBlobServiceWrapper;
+pub use grpc_directoryservice_wrapper::GRPCDirectoryServiceWrapper;
+
+use crate::B3Digest;
+
+tonic::include_proto!("tvix.castore.v1");
+
+#[cfg(feature = "reflection")]
+/// Compiled file descriptors for implementing [gRPC
+/// reflection](https://github.com/grpc/grpc/blob/master/doc/server-reflection.md) with e.g.
+/// [`tonic_reflection`](https://docs.rs/tonic-reflection).
+pub const FILE_DESCRIPTOR_SET: &[u8] = tonic::include_file_descriptor_set!("tvix.castore.v1");
+
+#[cfg(test)]
+mod tests;
+
+/// Errors that can occur during the validation of Directory messages.
+#[derive(Debug, PartialEq, Eq, Error)]
+pub enum ValidateDirectoryError {
+    /// Elements are not in sorted order
+    #[error("{} is not sorted", std::str::from_utf8(.0).unwrap_or(&BASE64.encode(.0)))]
+    WrongSorting(Vec<u8>),
+    /// Multiple elements with the same name encountered
+    #[error("{0:?} is a duplicate name")]
+    DuplicateName(Vec<u8>),
+    /// Invalid name encountered
+    #[error("Invalid name in {0:?}")]
+    InvalidName(Vec<u8>),
+    /// Invalid digest length encountered
+    #[error("Invalid Digest length: {0}")]
+    InvalidDigestLen(usize),
+}
+
+/// Checks a Node name for validity as an intermediate node, and returns an
+/// error that's generated from the supplied constructor.
+///
+/// We disallow slashes, null bytes, '.', '..' and the empty string.
+fn validate_node_name<E>(name: &[u8], err: fn(Vec<u8>) -> E) -> Result<(), E> {
+    if name.is_empty()
+        || name == b".."
+        || name == b"."
+        || name.contains(&0x00)
+        || name.contains(&b'/')
+    {
+        return Err(err(name.to_vec()));
+    }
+    Ok(())
+}
+
+/// NamedNode is implemented for [FileNode], [DirectoryNode] and [SymlinkNode]
+/// and [node::Node], so we can ask all of them for the name easily.
+pub trait NamedNode {
+    fn get_name(&self) -> &[u8];
+}
+
+impl NamedNode for &FileNode {
+    fn get_name(&self) -> &[u8] {
+        &self.name
+    }
+}
+
+impl NamedNode for &DirectoryNode {
+    fn get_name(&self) -> &[u8] {
+        &self.name
+    }
+}
+
+impl NamedNode for &SymlinkNode {
+    fn get_name(&self) -> &[u8] {
+        &self.name
+    }
+}
+
+impl NamedNode for node::Node {
+    fn get_name(&self) -> &[u8] {
+        match self {
+            node::Node::File(node_file) => &node_file.name,
+            node::Node::Directory(node_directory) => &node_directory.name,
+            node::Node::Symlink(node_symlink) => &node_symlink.name,
+        }
+    }
+}
+
+impl node::Node {
+    /// Returns the node with a new name.
+    pub fn rename(self, name: bytes::Bytes) -> Self {
+        match self {
+            node::Node::Directory(n) => node::Node::Directory(DirectoryNode { name, ..n }),
+            node::Node::File(n) => node::Node::File(FileNode { name, ..n }),
+            node::Node::Symlink(n) => node::Node::Symlink(SymlinkNode { name, ..n }),
+        }
+    }
+}
+
+/// Accepts a name, and a mutable reference to the previous name.
+/// If the passed name is larger than the previous one, the reference is updated.
+/// If it's not, an error is returned.
+fn update_if_lt_prev<'n>(
+    prev_name: &mut &'n [u8],
+    name: &'n [u8],
+) -> Result<(), ValidateDirectoryError> {
+    if *name < **prev_name {
+        return Err(ValidateDirectoryError::WrongSorting(name.to_vec()));
+    }
+    *prev_name = name;
+    Ok(())
+}
+
+/// Inserts the given name into a HashSet if it's not already in there.
+/// If it is, an error is returned.
+fn insert_once<'n>(
+    seen_names: &mut HashSet<&'n [u8]>,
+    name: &'n [u8],
+) -> Result<(), ValidateDirectoryError> {
+    if seen_names.get(name).is_some() {
+        return Err(ValidateDirectoryError::DuplicateName(name.to_vec()));
+    }
+    seen_names.insert(name);
+    Ok(())
+}
+
+impl Directory {
+    /// The size of a directory is the number of all regular and symlink elements,
+    /// the number of directory elements, and their size fields.
+    pub fn size(&self) -> u32 {
+        self.files.len() as u32
+            + self.symlinks.len() as u32
+            + self
+                .directories
+                .iter()
+                .fold(0, |acc: u32, e| (acc + 1 + e.size))
+    }
+
+    /// Calculates the digest of a Directory, which is the blake3 hash of a
+    /// Directory protobuf message, serialized in protobuf canonical form.
+    pub fn digest(&self) -> B3Digest {
+        let mut hasher = blake3::Hasher::new();
+
+        hasher
+            .update(&self.encode_to_vec())
+            .finalize()
+            .as_bytes()
+            .into()
+    }
+
+    /// validate checks the directory for invalid data, such as:
+    /// - violations of name restrictions
+    /// - invalid digest lengths
+    /// - not properly sorted lists
+    /// - duplicate names in the three lists
+    pub fn validate(&self) -> Result<(), ValidateDirectoryError> {
+        let mut seen_names: HashSet<&[u8]> = HashSet::new();
+
+        let mut last_directory_name: &[u8] = b"";
+        let mut last_file_name: &[u8] = b"";
+        let mut last_symlink_name: &[u8] = b"";
+
+        // check directories
+        for directory_node in &self.directories {
+            validate_node_name(&directory_node.name, ValidateDirectoryError::InvalidName)?;
+            // ensure the digest has the appropriate size.
+            if TryInto::<B3Digest>::try_into(directory_node.digest.clone()).is_err() {
+                return Err(ValidateDirectoryError::InvalidDigestLen(
+                    directory_node.digest.len(),
+                ));
+            }
+
+            update_if_lt_prev(&mut last_directory_name, &directory_node.name)?;
+            insert_once(&mut seen_names, &directory_node.name)?;
+        }
+
+        // check files
+        for file_node in &self.files {
+            validate_node_name(&file_node.name, ValidateDirectoryError::InvalidName)?;
+            if TryInto::<B3Digest>::try_into(file_node.digest.clone()).is_err() {
+                return Err(ValidateDirectoryError::InvalidDigestLen(
+                    file_node.digest.len(),
+                ));
+            }
+
+            update_if_lt_prev(&mut last_file_name, &file_node.name)?;
+            insert_once(&mut seen_names, &file_node.name)?;
+        }
+
+        // check symlinks
+        for symlink_node in &self.symlinks {
+            validate_node_name(&symlink_node.name, ValidateDirectoryError::InvalidName)?;
+
+            update_if_lt_prev(&mut last_symlink_name, &symlink_node.name)?;
+            insert_once(&mut seen_names, &symlink_node.name)?;
+        }
+
+        Ok(())
+    }
+
+    /// Allows iterating over all three nodes ([DirectoryNode], [FileNode],
+    /// [SymlinkNode]) in an ordered fashion, as long as the individual lists
+    /// are sorted (which can be checked by the [Directory::validate]).
+    pub fn nodes(&self) -> DirectoryNodesIterator {
+        return DirectoryNodesIterator {
+            i_directories: self.directories.iter().peekable(),
+            i_files: self.files.iter().peekable(),
+            i_symlinks: self.symlinks.iter().peekable(),
+        };
+    }
+}
+
+/// Struct to hold the state of an iterator over all nodes of a Directory.
+///
+/// Internally, this keeps peekable Iterators over all three lists of a
+/// Directory message.
+pub struct DirectoryNodesIterator<'a> {
+    // directory: &Directory,
+    i_directories: Peekable<std::slice::Iter<'a, DirectoryNode>>,
+    i_files: Peekable<std::slice::Iter<'a, FileNode>>,
+    i_symlinks: Peekable<std::slice::Iter<'a, SymlinkNode>>,
+}
+
+/// looks at two elements implementing NamedNode, and returns true if "left
+/// is smaller / comes first".
+///
+/// Some(_) is preferred over None.
+fn left_name_lt_right<A: NamedNode, B: NamedNode>(left: Option<&A>, right: Option<&B>) -> bool {
+    match left {
+        // if left is None, right always wins
+        None => false,
+        Some(left_inner) => {
+            // left is Some.
+            match right {
+                // left is Some, right is None - left wins.
+                None => true,
+                Some(right_inner) => {
+                    // both are Some - compare the name.
+                    return left_inner.get_name() < right_inner.get_name();
+                }
+            }
+        }
+    }
+}
+
+impl Iterator for DirectoryNodesIterator<'_> {
+    type Item = node::Node;
+
+    // next returns the next node in the Directory.
+    // we peek at all three internal iterators, and pick the one with the
+    // smallest name, to ensure lexicographical ordering.
+    // The individual lists are already known to be sorted.
+    fn next(&mut self) -> Option<Self::Item> {
+        if left_name_lt_right(self.i_directories.peek(), self.i_files.peek()) {
+            // i_directories is still in the game, compare with symlinks
+            if left_name_lt_right(self.i_directories.peek(), self.i_symlinks.peek()) {
+                self.i_directories
+                    .next()
+                    .cloned()
+                    .map(node::Node::Directory)
+            } else {
+                self.i_symlinks.next().cloned().map(node::Node::Symlink)
+            }
+        } else {
+            // i_files is still in the game, compare with symlinks
+            if left_name_lt_right(self.i_files.peek(), self.i_symlinks.peek()) {
+                self.i_files.next().cloned().map(node::Node::File)
+            } else {
+                self.i_symlinks.next().cloned().map(node::Node::Symlink)
+            }
+        }
+    }
+}