diff options
Diffstat (limited to 'tvix/castore/src/proto/mod.rs')
-rw-r--r-- | tvix/castore/src/proto/mod.rs | 584 |
1 files changed, 212 insertions, 372 deletions
diff --git a/tvix/castore/src/proto/mod.rs b/tvix/castore/src/proto/mod.rs index 5374e3ae5a80..89c68a4ad97b 100644 --- a/tvix/castore/src/proto/mod.rs +++ b/tvix/castore/src/proto/mod.rs @@ -1,18 +1,14 @@ -#![allow(non_snake_case)] -// https://github.com/hyperium/tonic/issues/1056 -use bstr::ByteSlice; -use std::{collections::HashSet, iter::Peekable, str}; - use prost::Message; +use std::cmp::Ordering; + mod grpc_blobservice_wrapper; mod grpc_directoryservice_wrapper; +use crate::{path::PathComponent, B3Digest, DirectoryError}; pub use grpc_blobservice_wrapper::GRPCBlobServiceWrapper; pub use grpc_directoryservice_wrapper::GRPCDirectoryServiceWrapper; -use crate::{B3Digest, B3_LEN}; - tonic::include_proto!("tvix.castore.v1"); #[cfg(feature = "tonic-reflection")] @@ -24,38 +20,6 @@ pub const FILE_DESCRIPTOR_SET: &[u8] = tonic::include_file_descriptor_set!("tvix #[cfg(test)] mod tests; -/// Errors that can occur during the validation of [Directory] messages. -#[derive(Debug, PartialEq, Eq, thiserror::Error)] -pub enum ValidateDirectoryError { - /// Elements are not in sorted order - #[error("{:?} is not sorted", .0.as_bstr())] - WrongSorting(Vec<u8>), - /// Multiple elements with the same name encountered - #[error("{:?} is a duplicate name", .0.as_bstr())] - DuplicateName(Vec<u8>), - /// Invalid node - #[error("invalid node with name {:?}: {:?}", .0.as_bstr(), .1.to_string())] - InvalidNode(Vec<u8>, ValidateNodeError), - #[error("Total size exceeds u32::MAX")] - SizeOverflow, -} - -/// Errors that occur during Node validation -#[derive(Debug, PartialEq, Eq, thiserror::Error)] -pub enum ValidateNodeError { - #[error("No node set")] - NoNodeSet, - /// Invalid digest length encountered - #[error("Invalid Digest length: {0}")] - InvalidDigestLen(usize), - /// Invalid name encountered - #[error("Invalid name: {}", .0.as_bstr())] - InvalidName(Vec<u8>), - /// Invalid symlink target - #[error("Invalid symlink target: {}", .0.as_bstr())] - InvalidSymlinkTarget(Vec<u8>), -} - /// Errors that occur during StatBlobResponse validation #[derive(Debug, PartialEq, Eq, thiserror::Error)] pub enum ValidateStatBlobResponseError { @@ -64,184 +28,6 @@ pub enum ValidateStatBlobResponseError { InvalidDigestLen(usize, usize), } -/// Checks a Node name for validity as an intermediate node. -/// We disallow slashes, null bytes, '.', '..' and the empty string. -pub(crate) fn validate_node_name(name: &[u8]) -> Result<(), ValidateNodeError> { - if name.is_empty() - || name == b".." - || name == b"." - || name.contains(&0x00) - || name.contains(&b'/') - { - Err(ValidateNodeError::InvalidName(name.to_owned())) - } else { - Ok(()) - } -} - -/// NamedNode is implemented for [FileNode], [DirectoryNode] and [SymlinkNode] -/// and [node::Node], so we can ask all of them for the name easily. -pub trait NamedNode { - fn get_name(&self) -> &[u8]; -} - -impl NamedNode for &FileNode { - fn get_name(&self) -> &[u8] { - &self.name - } -} - -impl NamedNode for &DirectoryNode { - fn get_name(&self) -> &[u8] { - &self.name - } -} - -impl NamedNode for &SymlinkNode { - fn get_name(&self) -> &[u8] { - &self.name - } -} - -impl NamedNode for node::Node { - fn get_name(&self) -> &[u8] { - match self { - node::Node::File(node_file) => &node_file.name, - node::Node::Directory(node_directory) => &node_directory.name, - node::Node::Symlink(node_symlink) => &node_symlink.name, - } - } -} - -impl Node { - /// Ensures the node has a valid enum kind (is Some), and passes its - // per-enum validation. - pub fn validate(&self) -> Result<(), ValidateNodeError> { - if let Some(node) = self.node.as_ref() { - node.validate() - } else { - Err(ValidateNodeError::NoNodeSet) - } - } -} - -impl node::Node { - /// Returns the node with a new name. - pub fn rename(self, name: bytes::Bytes) -> Self { - match self { - node::Node::Directory(n) => node::Node::Directory(DirectoryNode { name, ..n }), - node::Node::File(n) => node::Node::File(FileNode { name, ..n }), - node::Node::Symlink(n) => node::Node::Symlink(SymlinkNode { name, ..n }), - } - } - - /// Ensures the node has a valid name, and checks the type-specific fields too. - pub fn validate(&self) -> Result<(), ValidateNodeError> { - match self { - // for a directory root node, ensure the digest has the appropriate size. - node::Node::Directory(directory_node) => { - if directory_node.digest.len() != B3_LEN { - Err(ValidateNodeError::InvalidDigestLen( - directory_node.digest.len(), - ))?; - } - validate_node_name(&directory_node.name) - } - // for a file root node, ensure the digest has the appropriate size. - node::Node::File(file_node) => { - if file_node.digest.len() != B3_LEN { - Err(ValidateNodeError::InvalidDigestLen(file_node.digest.len()))?; - } - validate_node_name(&file_node.name) - } - // ensure the symlink target is not empty and doesn't contain null bytes. - node::Node::Symlink(symlink_node) => { - if symlink_node.target.is_empty() || symlink_node.target.contains(&b'\0') { - Err(ValidateNodeError::InvalidSymlinkTarget( - symlink_node.target.to_vec(), - ))?; - } - validate_node_name(&symlink_node.name) - } - } - } -} - -impl PartialOrd for node::Node { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for node::Node { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -impl PartialOrd for FileNode { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for FileNode { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -impl PartialOrd for SymlinkNode { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for SymlinkNode { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -impl PartialOrd for DirectoryNode { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for DirectoryNode { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -/// Accepts a name, and a mutable reference to the previous name. -/// If the passed name is larger than the previous one, the reference is updated. -/// If it's not, an error is returned. -fn update_if_lt_prev<'n>( - prev_name: &mut &'n [u8], - name: &'n [u8], -) -> Result<(), ValidateDirectoryError> { - if *name < **prev_name { - return Err(ValidateDirectoryError::WrongSorting(name.to_vec())); - } - *prev_name = name; - Ok(()) -} - -/// Inserts the given name into a HashSet if it's not already in there. -/// If it is, an error is returned. -fn insert_once<'n>( - seen_names: &mut HashSet<&'n [u8]>, - name: &'n [u8], -) -> Result<(), ValidateDirectoryError> { - if seen_names.get(name).is_some() { - return Err(ValidateDirectoryError::DuplicateName(name.to_vec())); - } - seen_names.insert(name); - Ok(()) -} - fn checked_sum(iter: impl IntoIterator<Item = u64>) -> Option<u64> { iter.into_iter().try_fold(0u64, |acc, i| acc.checked_add(i)) } @@ -278,117 +64,233 @@ impl Directory { .as_bytes() .into() } +} - /// validate checks the directory for invalid data, such as: - /// - violations of name restrictions - /// - invalid digest lengths - /// - not properly sorted lists - /// - duplicate names in the three lists - pub fn validate(&self) -> Result<(), ValidateDirectoryError> { - let mut seen_names: HashSet<&[u8]> = HashSet::new(); - - let mut last_directory_name: &[u8] = b""; - let mut last_file_name: &[u8] = b""; - let mut last_symlink_name: &[u8] = b""; - - // check directories - for directory_node in &self.directories { - node::Node::Directory(directory_node.clone()) - .validate() - .map_err(|e| { - ValidateDirectoryError::InvalidNode(directory_node.name.to_vec(), e) - })?; - - update_if_lt_prev(&mut last_directory_name, &directory_node.name)?; - insert_once(&mut seen_names, &directory_node.name)?; - } +impl TryFrom<Directory> for crate::Directory { + type Error = DirectoryError; + + fn try_from(value: Directory) -> Result<Self, Self::Error> { + // Check directories, files and symlinks are sorted + // We'll notice duplicates across all three fields when constructing the Directory. + // FUTUREWORK: use is_sorted() once stable, and/or implement the producer for + // [crate::Directory::try_from_iter] iterating over all three and doing all checks inline. + value + .directories + .iter() + .try_fold(&b""[..], |prev_name, e| { + match e.name.as_ref().cmp(prev_name) { + Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())), + Ordering::Equal => Err(DirectoryError::DuplicateName( + e.name + .to_owned() + .try_into() + .map_err(DirectoryError::InvalidName)?, + )), + Ordering::Greater => Ok(e.name.as_ref()), + } + })?; + value.files.iter().try_fold(&b""[..], |prev_name, e| { + match e.name.as_ref().cmp(prev_name) { + Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())), + Ordering::Equal => Err(DirectoryError::DuplicateName( + e.name + .to_owned() + .try_into() + .map_err(DirectoryError::InvalidName)?, + )), + Ordering::Greater => Ok(e.name.as_ref()), + } + })?; + value.symlinks.iter().try_fold(&b""[..], |prev_name, e| { + match e.name.as_ref().cmp(prev_name) { + Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())), + Ordering::Equal => Err(DirectoryError::DuplicateName( + e.name + .to_owned() + .try_into() + .map_err(DirectoryError::InvalidName)?, + )), + Ordering::Greater => Ok(e.name.as_ref()), + } + })?; - // check files - for file_node in &self.files { - node::Node::File(file_node.clone()) - .validate() - .map_err(|e| ValidateDirectoryError::InvalidNode(file_node.name.to_vec(), e))?; + // FUTUREWORK: use is_sorted() once stable, and/or implement the producer for + // [crate::Directory::try_from_iter] iterating over all three and doing all checks inline. + let mut elems: Vec<(PathComponent, crate::Node)> = + Vec::with_capacity(value.directories.len() + value.files.len() + value.symlinks.len()); - update_if_lt_prev(&mut last_file_name, &file_node.name)?; - insert_once(&mut seen_names, &file_node.name)?; + for e in value.directories { + elems.push( + Node { + node: Some(node::Node::Directory(e)), + } + .try_into_name_and_node()?, + ); } - // check symlinks - for symlink_node in &self.symlinks { - node::Node::Symlink(symlink_node.clone()) - .validate() - .map_err(|e| ValidateDirectoryError::InvalidNode(symlink_node.name.to_vec(), e))?; + for e in value.files { + elems.push( + Node { + node: Some(node::Node::File(e)), + } + .try_into_name_and_node()?, + ) + } - update_if_lt_prev(&mut last_symlink_name, &symlink_node.name)?; - insert_once(&mut seen_names, &symlink_node.name)?; + for e in value.symlinks { + elems.push( + Node { + node: Some(node::Node::Symlink(e)), + } + .try_into_name_and_node()?, + ) } - self.size_checked() - .ok_or(ValidateDirectoryError::SizeOverflow)?; + crate::Directory::try_from_iter(elems) + } +} - Ok(()) +impl From<crate::Directory> for Directory { + fn from(value: crate::Directory) -> Self { + let mut directories = vec![]; + let mut files = vec![]; + let mut symlinks = vec![]; + + for (name, node) in value.into_nodes() { + match node { + crate::Node::File { + digest, + size, + executable, + } => files.push(FileNode { + name: name.into(), + digest: digest.into(), + size, + executable, + }), + crate::Node::Directory { digest, size } => directories.push(DirectoryNode { + name: name.into(), + digest: digest.into(), + size, + }), + crate::Node::Symlink { target } => { + symlinks.push(SymlinkNode { + name: name.into(), + target: target.into(), + }); + } + } + } + + Directory { + directories, + files, + symlinks, + } } +} - /// Allows iterating over all three nodes ([DirectoryNode], [FileNode], - /// [SymlinkNode]) in an ordered fashion, as long as the individual lists - /// are sorted (which can be checked by the [Directory::validate]). - pub fn nodes(&self) -> DirectoryNodesIterator { - return DirectoryNodesIterator { - i_directories: self.directories.iter().peekable(), - i_files: self.files.iter().peekable(), - i_symlinks: self.symlinks.iter().peekable(), - }; +impl Node { + /// Converts a proto [Node] to a [crate::Node], and splits off the name as a [PathComponent]. + pub fn try_into_name_and_node(self) -> Result<(PathComponent, crate::Node), DirectoryError> { + let (name_bytes, node) = self.try_into_unchecked_name_and_checked_node()?; + Ok(( + name_bytes.try_into().map_err(DirectoryError::InvalidName)?, + node, + )) } - /// Adds the specified [node::Node] to the [Directory], preserving sorted entries. - /// This assumes the [Directory] to be sorted prior to adding the node. - /// - /// Inserting an element that already exists with the same name in the directory is not - /// supported. - pub fn add(&mut self, node: node::Node) { - debug_assert!( - !self.files.iter().any(|x| x.get_name() == node.get_name()), - "name already exists in files" - ); - debug_assert!( - !self - .directories - .iter() - .any(|x| x.get_name() == node.get_name()), - "name already exists in directories" - ); - debug_assert!( - !self - .symlinks - .iter() - .any(|x| x.get_name() == node.get_name()), - "name already exists in symlinks" - ); - - match node { - node::Node::File(node) => { - let pos = self - .files - .binary_search(&node) - .expect_err("Tvix bug: dir entry with name already exists"); - self.files.insert(pos, node); + /// Converts a proto [Node] to a [crate::Node], and splits off the name as a + /// [bytes::Bytes] without doing any checking of it. + fn try_into_unchecked_name_and_checked_node( + self, + ) -> Result<(bytes::Bytes, crate::Node), DirectoryError> { + match self.node.ok_or_else(|| DirectoryError::NoNodeSet)? { + node::Node::Directory(n) => { + let digest = B3Digest::try_from(n.digest) + .map_err(|e| DirectoryError::InvalidNode(n.name.clone(), e.into()))?; + + let node = crate::Node::Directory { + digest, + size: n.size, + }; + + Ok((n.name, node)) } - node::Node::Directory(node) => { - let pos = self - .directories - .binary_search(&node) - .expect_err("Tvix bug: dir entry with name already exists"); - self.directories.insert(pos, node); + node::Node::File(n) => { + let digest = B3Digest::try_from(n.digest) + .map_err(|e| DirectoryError::InvalidNode(n.name.clone(), e.into()))?; + + let node = crate::Node::File { + digest, + size: n.size, + executable: n.executable, + }; + + Ok((n.name, node)) } - node::Node::Symlink(node) => { - let pos = self - .symlinks - .binary_search(&node) - .expect_err("Tvix bug: dir entry with name already exists"); - self.symlinks.insert(pos, node); + + node::Node::Symlink(n) => { + let node = crate::Node::Symlink { + target: n.target.try_into().map_err(|e| { + DirectoryError::InvalidNode( + n.name.clone(), + crate::ValidateNodeError::InvalidSymlinkTarget(e), + ) + })?, + }; + + Ok((n.name, node)) } } } + + /// Converts a proto [Node] to a [crate::Node], and splits off the name and returns it as a + /// [bytes::Bytes]. + /// + /// The name must be empty. + pub fn try_into_anonymous_node(self) -> Result<crate::Node, DirectoryError> { + let (name, node) = Self::try_into_unchecked_name_and_checked_node(self)?; + + if !name.is_empty() { + return Err(DirectoryError::NameInAnonymousNode); + } + + Ok(node) + } + + /// Constructs a [Node] from a name and [crate::Node]. + /// The name is a [bytes::Bytes], not a [PathComponent], as we have use an + /// empty name in some places. + pub fn from_name_and_node(name: bytes::Bytes, n: crate::Node) -> Self { + match n { + crate::Node::Directory { digest, size } => Self { + node: Some(node::Node::Directory(DirectoryNode { + name, + digest: digest.into(), + size, + })), + }, + crate::Node::File { + digest, + size, + executable, + } => Self { + node: Some(node::Node::File(FileNode { + name, + digest: digest.into(), + size, + executable, + })), + }, + crate::Node::Symlink { target } => Self { + node: Some(node::Node::Symlink(SymlinkNode { + name, + target: target.into(), + })), + }, + } + } } impl StatBlobResponse { @@ -407,65 +309,3 @@ impl StatBlobResponse { Ok(()) } } - -/// Struct to hold the state of an iterator over all nodes of a Directory. -/// -/// Internally, this keeps peekable Iterators over all three lists of a -/// Directory message. -pub struct DirectoryNodesIterator<'a> { - // directory: &Directory, - i_directories: Peekable<std::slice::Iter<'a, DirectoryNode>>, - i_files: Peekable<std::slice::Iter<'a, FileNode>>, - i_symlinks: Peekable<std::slice::Iter<'a, SymlinkNode>>, -} - -/// looks at two elements implementing NamedNode, and returns true if "left -/// is smaller / comes first". -/// -/// Some(_) is preferred over None. -fn left_name_lt_right<A: NamedNode, B: NamedNode>(left: Option<&A>, right: Option<&B>) -> bool { - match left { - // if left is None, right always wins - None => false, - Some(left_inner) => { - // left is Some. - match right { - // left is Some, right is None - left wins. - None => true, - Some(right_inner) => { - // both are Some - compare the name. - return left_inner.get_name() < right_inner.get_name(); - } - } - } - } -} - -impl Iterator for DirectoryNodesIterator<'_> { - type Item = node::Node; - - // next returns the next node in the Directory. - // we peek at all three internal iterators, and pick the one with the - // smallest name, to ensure lexicographical ordering. - // The individual lists are already known to be sorted. - fn next(&mut self) -> Option<Self::Item> { - if left_name_lt_right(self.i_directories.peek(), self.i_files.peek()) { - // i_directories is still in the game, compare with symlinks - if left_name_lt_right(self.i_directories.peek(), self.i_symlinks.peek()) { - self.i_directories - .next() - .cloned() - .map(node::Node::Directory) - } else { - self.i_symlinks.next().cloned().map(node::Node::Symlink) - } - } else { - // i_files is still in the game, compare with symlinks - if left_name_lt_right(self.i_files.peek(), self.i_symlinks.peek()) { - self.i_files.next().cloned().map(node::Node::File) - } else { - self.i_symlinks.next().cloned().map(node::Node::Symlink) - } - } - } -} |