diff options
Diffstat (limited to 'tvix/castore/src')
24 files changed, 504 insertions, 699 deletions
diff --git a/tvix/castore/src/directoryservice/directory_graph.rs b/tvix/castore/src/directoryservice/directory_graph.rs index 0cf2f71adb06..ffb25de9a854 100644 --- a/tvix/castore/src/directoryservice/directory_graph.rs +++ b/tvix/castore/src/directoryservice/directory_graph.rs @@ -10,7 +10,7 @@ use petgraph::{ use tracing::instrument; use super::order_validator::{LeavesToRootValidator, OrderValidator, RootToLeavesValidator}; -use crate::{B3Digest, Directory, DirectoryNode, NamedNode}; +use crate::{B3Digest, Directory, DirectoryNode}; #[derive(thiserror::Error, Debug)] pub enum Error { @@ -69,12 +69,12 @@ pub struct ValidatedDirectoryGraph { root: Option<NodeIndex>, } -fn check_edge(dir: &DirectoryNode, child: &Directory) -> Result<(), Error> { +fn check_edge(dir: &DirectoryNode, dir_name: &[u8], child: &Directory) -> Result<(), Error> { // Ensure the size specified in the child node matches our records. if dir.size() != child.size() { return Err(Error::ValidationError(format!( "'{}' has wrong size, specified {}, recorded {}", - dir.get_name().as_bstr(), + dir_name.as_bstr(), dir.size(), child.size(), ))); @@ -141,19 +141,19 @@ impl<O: OrderValidator> DirectoryGraph<O> { } // set up edges to all child directories - for subdir in directory.directories() { + for (subdir_name, subdir_node) in directory.directories() { let child_ix = *self .digest_to_node_ix - .entry(subdir.digest().clone()) + .entry(subdir_node.digest().clone()) .or_insert_with(|| self.graph.add_node(None)); let pending_edge_check = match &self.graph[child_ix] { Some(child) => { // child is already available, validate the edge now - check_edge(subdir, child)?; + check_edge(subdir_node, subdir_name, child)?; None } - None => Some(subdir.clone()), // pending validation + None => Some(subdir_node.clone()), // pending validation }; self.graph.add_edge(ix, child_ix, pending_edge_check); } @@ -173,7 +173,9 @@ impl<O: OrderValidator> DirectoryGraph<O> { .expect("edge not found") .take() .expect("edge is already validated"); - check_edge(&edge_weight, &directory)?; + + // TODO: where's the name here? + check_edge(&edge_weight, b"??", &directory)?; } // finally, store the directory information in the node weight @@ -277,11 +279,12 @@ mod tests { lazy_static! { pub static ref BROKEN_PARENT_DIRECTORY: Directory = { let mut dir = Directory::new(); - dir.add(Node::Directory(DirectoryNode::new( + dir.add( "foo".into(), - DIRECTORY_A.digest(), - DIRECTORY_A.size() + 42, // wrong! - ).unwrap())).unwrap(); + Node::Directory(DirectoryNode::new( + DIRECTORY_A.digest(), + DIRECTORY_A.size() + 42, // wrong! + ))).unwrap(); dir }; } diff --git a/tvix/castore/src/directoryservice/grpc.rs b/tvix/castore/src/directoryservice/grpc.rs index 2079514d2b79..9696c5631949 100644 --- a/tvix/castore/src/directoryservice/grpc.rs +++ b/tvix/castore/src/directoryservice/grpc.rs @@ -3,8 +3,7 @@ use std::collections::HashSet; use super::{Directory, DirectoryPutter, DirectoryService}; use crate::composition::{CompositionContext, ServiceBuilder}; use crate::proto::{self, get_directory_request::ByWhat}; -use crate::ValidateDirectoryError; -use crate::{B3Digest, Error}; +use crate::{B3Digest, DirectoryError, Error}; use async_stream::try_stream; use futures::stream::BoxStream; use std::sync::Arc; @@ -154,7 +153,7 @@ where } let directory = directory.try_into() - .map_err(|e: ValidateDirectoryError| Error::StorageError(e.to_string()))?; + .map_err(|e: DirectoryError| Error::StorageError(e.to_string()))?; yield directory; }, diff --git a/tvix/castore/src/directoryservice/order_validator.rs b/tvix/castore/src/directoryservice/order_validator.rs index f03bb7ff9a86..6daf4a624d29 100644 --- a/tvix/castore/src/directoryservice/order_validator.rs +++ b/tvix/castore/src/directoryservice/order_validator.rs @@ -48,9 +48,9 @@ impl RootToLeavesValidator { self.expected_digests.insert(directory.digest()); } - for subdir in directory.directories() { + for (_, subdir_node) in directory.directories() { // Allow the children to appear next - self.expected_digests.insert(subdir.digest().clone()); + self.expected_digests.insert(subdir_node.digest().clone()); } } } @@ -79,11 +79,11 @@ impl OrderValidator for LeavesToRootValidator { fn add_directory(&mut self, directory: &Directory) -> bool { let digest = directory.digest(); - for subdir in directory.directories() { - if !self.allowed_references.contains(subdir.digest()) { + for (_, subdir_node) in directory.directories() { + if !self.allowed_references.contains(subdir_node.digest()) { warn!( directory.digest = %digest, - subdirectory.digest = %subdir.digest(), + subdirectory.digest = %subdir_node.digest(), "unexpected directory reference" ); return false; diff --git a/tvix/castore/src/directoryservice/tests/mod.rs b/tvix/castore/src/directoryservice/tests/mod.rs index 7a15f44a686e..c0a0f22c989f 100644 --- a/tvix/castore/src/directoryservice/tests/mod.rs +++ b/tvix/castore/src/directoryservice/tests/mod.rs @@ -218,14 +218,13 @@ async fn upload_reject_dangling_pointer(directory_service: impl DirectoryService async fn upload_reject_wrong_size(directory_service: impl DirectoryService) { let wrong_parent_directory = { let mut dir = Directory::new(); - dir.add(Node::Directory( - DirectoryNode::new( - "foo".into(), + dir.add( + "foo".into(), + Node::Directory(DirectoryNode::new( DIRECTORY_A.digest(), DIRECTORY_A.size() + 42, // wrong! - ) - .unwrap(), - )) + )), + ) .unwrap(); dir }; diff --git a/tvix/castore/src/directoryservice/traverse.rs b/tvix/castore/src/directoryservice/traverse.rs index ff667e0f65c0..de2ea33c7f9d 100644 --- a/tvix/castore/src/directoryservice/traverse.rs +++ b/tvix/castore/src/directoryservice/traverse.rs @@ -1,4 +1,4 @@ -use crate::{directoryservice::DirectoryService, Error, NamedNode, Node, Path}; +use crate::{directoryservice::DirectoryService, Error, Node, Path}; use tracing::{instrument, warn}; /// This descends from a (root) node to the given (sub)path, returning the Node @@ -37,9 +37,10 @@ where })?; // look for the component in the [Directory]. - // FUTUREWORK: as the nodes() iterator returns in a sorted fashion, we - // could stop as soon as e.name is larger than the search string. - if let Some(child_node) = directory.nodes().find(|n| n.get_name() == component) { + if let Some((_child_name, child_node)) = directory + .nodes() + .find(|(name, _node)| name.as_ref() == component) + { // child node found, update prev_node to that and continue. parent_node = child_node.clone(); } else { @@ -81,21 +82,23 @@ mod tests { handle.close().await.expect("must upload"); // construct the node for DIRECTORY_COMPLICATED - let node_directory_complicated = Node::Directory( - DirectoryNode::new( - "doesntmatter".into(), - DIRECTORY_COMPLICATED.digest(), - DIRECTORY_COMPLICATED.size(), - ) - .unwrap(), - ); + let node_directory_complicated = Node::Directory(DirectoryNode::new( + DIRECTORY_COMPLICATED.digest(), + DIRECTORY_COMPLICATED.size(), + )); // construct the node for DIRECTORY_COMPLICATED - let node_directory_with_keep = - Node::Directory(DIRECTORY_COMPLICATED.directories().next().unwrap().clone()); + let node_directory_with_keep = Node::Directory( + DIRECTORY_COMPLICATED + .directories() + .next() + .unwrap() + .1 + .clone(), + ); // construct the node for the .keep file - let node_file_keep = Node::File(DIRECTORY_WITH_KEEP.files().next().unwrap().clone()); + let node_file_keep = Node::File(DIRECTORY_WITH_KEEP.files().next().unwrap().1.clone()); // traversal to an empty subpath should return the root node. { diff --git a/tvix/castore/src/directoryservice/utils.rs b/tvix/castore/src/directoryservice/utils.rs index f1133663ede7..4b060707d4cc 100644 --- a/tvix/castore/src/directoryservice/utils.rs +++ b/tvix/castore/src/directoryservice/utils.rs @@ -57,16 +57,15 @@ pub fn traverse_directory<'a, DS: DirectoryService + 'static>( // enqueue all child directory digests to the work queue, as // long as they're not part of the worklist or already sent. // This panics if the digest looks invalid, it's supposed to be checked first. - for child_directory_node in current_directory.directories() { - // TODO: propagate error - let child_digest: B3Digest = child_directory_node.digest().clone(); + for (_, child_directory_node) in current_directory.directories() { + let child_digest = child_directory_node.digest(); - if worklist_directory_digests.contains(&child_digest) - || sent_directory_digests.contains(&child_digest) + if worklist_directory_digests.contains(child_digest) + || sent_directory_digests.contains(child_digest) { continue; } - worklist_directory_digests.push_back(child_digest); + worklist_directory_digests.push_back(child_digest.clone()); } yield current_directory; diff --git a/tvix/castore/src/errors.rs b/tvix/castore/src/errors.rs index 083f43036845..e03cfa5a5207 100644 --- a/tvix/castore/src/errors.rs +++ b/tvix/castore/src/errors.rs @@ -13,33 +13,12 @@ pub enum Error { StorageError(String), } -/// Errors that can occur during the validation of [Directory] messages. -#[derive(Debug, thiserror::Error, PartialEq)] -pub enum ValidateDirectoryError { - /// Elements are not in sorted order - #[error("{:?} is not sorted", .0.as_bstr())] - WrongSorting(Vec<u8>), - /// Multiple elements with the same name encountered - #[error("{:?} is a duplicate name", .0.as_bstr())] - DuplicateName(Vec<u8>), - /// Invalid node - #[error("invalid node with name {:?}: {:?}", .0.as_bstr(), .1.to_string())] - InvalidNode(Vec<u8>, ValidateNodeError), - #[error("Total size exceeds u32::MAX")] - SizeOverflow, -} - -/// Errors that occur during Node validation +/// Errors that occur during construction of [crate::Node] #[derive(Debug, thiserror::Error, PartialEq)] pub enum ValidateNodeError { - #[error("No node set")] - NoNodeSet, /// Invalid digest length encountered #[error("invalid digest length: {0}")] InvalidDigestLen(usize), - /// Invalid name encountered - #[error("Invalid name: {}", .0.as_bstr())] - InvalidName(bytes::Bytes), /// Invalid symlink target #[error("Invalid symlink target: {}", .0.as_bstr())] InvalidSymlinkTarget(bytes::Bytes), @@ -53,6 +32,29 @@ impl From<crate::digests::Error> for ValidateNodeError { } } +/// Errors that can occur when populating [crate::Directory] messages, +/// or parsing [crate::proto::Directory] +#[derive(Debug, thiserror::Error, PartialEq)] +pub enum DirectoryError { + /// Multiple elements with the same name encountered + #[error("{:?} is a duplicate name", .0.as_bstr())] + DuplicateName(Vec<u8>), + /// Node failed validation + #[error("invalid node with name {:?}: {:?}", .0.as_bstr(), .1.to_string())] + InvalidNode(bytes::Bytes, ValidateNodeError), + #[error("Total size exceeds u32::MAX")] + SizeOverflow, + /// Invalid name encountered + #[error("Invalid name: {}", .0.as_bstr())] + InvalidName(bytes::Bytes), + /// Elements are not in sorted order. Can only happen on protos + #[error("{:?} is not sorted", .0.as_bstr())] + WrongSorting(bytes::Bytes), + /// This can only happen if there's an unknown node type (on protos) + #[error("No node set")] + NoNodeSet, +} + impl From<JoinError> for Error { fn from(value: JoinError) -> Self { Error::StorageError(value.to_string()) diff --git a/tvix/castore/src/fixtures.rs b/tvix/castore/src/fixtures.rs index 06366f2057ab..b57540d177e2 100644 --- a/tvix/castore/src/fixtures.rs +++ b/tvix/castore/src/fixtures.rs @@ -35,70 +35,83 @@ lazy_static! { // Directories pub static ref DIRECTORY_WITH_KEEP: Directory = { let mut dir = Directory::new(); - dir.add(Node::File(FileNode::new( - b".keep".to_vec().into(), - EMPTY_BLOB_DIGEST.clone(), - 0, - false - ).unwrap())).unwrap(); + dir.add( + ".keep".into(), + Node::File(FileNode::new( + EMPTY_BLOB_DIGEST.clone(), + 0, + false + ))).unwrap(); + dir }; pub static ref DIRECTORY_COMPLICATED: Directory = { let mut dir = Directory::new(); - dir.add(Node::Directory(DirectoryNode::new( - b"keep".to_vec().into(), - DIRECTORY_WITH_KEEP.digest(), - DIRECTORY_WITH_KEEP.size() - ).unwrap())).unwrap(); - dir.add(Node::File(FileNode::new( - b".keep".to_vec().into(), + dir.add( + "keep".into(), + Node::Directory(DirectoryNode::new( + DIRECTORY_WITH_KEEP.digest(), + DIRECTORY_WITH_KEEP.size() + ))).unwrap(); + dir.add( + ".keep".into(), + Node::File(FileNode::new( EMPTY_BLOB_DIGEST.clone(), 0, false - ).unwrap())).unwrap(); - dir.add(Node::Symlink(SymlinkNode::new( - b"aa".to_vec().into(), - b"/nix/store/somewhereelse".to_vec().into() - ).unwrap())).unwrap(); + ))).unwrap(); + dir.add( + "aa".into(), + Node::Symlink(SymlinkNode::new( + b"/nix/store/somewhereelse".to_vec().into() + ).unwrap())).unwrap(); + dir }; pub static ref DIRECTORY_A: Directory = Directory::new(); pub static ref DIRECTORY_B: Directory = { let mut dir = Directory::new(); - dir.add(Node::Directory(DirectoryNode::new( - b"a".to_vec().into(), - DIRECTORY_A.digest(), - DIRECTORY_A.size(), - ).unwrap())).unwrap(); + dir.add( + "a".into(), + Node::Directory(DirectoryNode::new( + DIRECTORY_A.digest(), + DIRECTORY_A.size(), + ))).unwrap(); + dir }; pub static ref DIRECTORY_C: Directory = { let mut dir = Directory::new(); - dir.add(Node::Directory(DirectoryNode::new( - b"a".to_vec().into(), - DIRECTORY_A.digest(), - DIRECTORY_A.size(), - ).unwrap())).unwrap(); - dir.add(Node::Directory(DirectoryNode::new( - b"a'".to_vec().into(), + dir.add( + "a".into(), + Node::Directory(DirectoryNode::new( + DIRECTORY_A.digest(), + DIRECTORY_A.size(), + ))).unwrap(); + dir.add( + "a'".into(), + Node::Directory(DirectoryNode::new( DIRECTORY_A.digest(), DIRECTORY_A.size(), - ).unwrap())).unwrap(); + ))).unwrap(); + dir }; pub static ref DIRECTORY_D: Directory = { let mut dir = Directory::new(); - dir.add(Node::Directory(DirectoryNode::new( - b"a".to_vec().into(), - DIRECTORY_A.digest(), - DIRECTORY_A.size(), - ).unwrap())).unwrap(); - dir.add(Node::Directory(DirectoryNode::new( - b"b".to_vec().into(), - DIRECTORY_B.digest(), - DIRECTORY_B.size(), - ).unwrap())).unwrap(); - dir + dir.add( + "a".into(), + Node::Directory(DirectoryNode::new( + DIRECTORY_A.digest(), + DIRECTORY_A.size(), + ))).unwrap(); + dir.add( + "b".into(), + Node::Directory(DirectoryNode::new( + DIRECTORY_B.digest(), + DIRECTORY_B.size(), + ))).unwrap(); + dir }; } diff --git a/tvix/castore/src/fs/fuse/tests.rs b/tvix/castore/src/fs/fuse/tests.rs index 3dad7d83aedd..30bbfc46235d 100644 --- a/tvix/castore/src/fs/fuse/tests.rs +++ b/tvix/castore/src/fs/fuse/tests.rs @@ -68,15 +68,11 @@ async fn populate_blob_a( root_nodes.insert( BLOB_A_NAME.into(), - Node::File( - FileNode::new( - BLOB_A_NAME.into(), - fixtures::BLOB_A_DIGEST.clone(), - fixtures::BLOB_A.len() as u64, - false, - ) - .unwrap(), - ), + Node::File(FileNode::new( + fixtures::BLOB_A_DIGEST.clone(), + fixtures::BLOB_A.len() as u64, + false, + )), ); } @@ -92,15 +88,11 @@ async fn populate_blob_b( root_nodes.insert( BLOB_B_NAME.into(), - Node::File( - FileNode::new( - BLOB_B_NAME.into(), - fixtures::BLOB_B_DIGEST.clone(), - fixtures::BLOB_B.len() as u64, - false, - ) - .unwrap(), - ), + Node::File(FileNode::new( + fixtures::BLOB_B_DIGEST.clone(), + fixtures::BLOB_B.len() as u64, + false, + )), ); } @@ -120,22 +112,18 @@ async fn populate_blob_helloworld( root_nodes.insert( HELLOWORLD_BLOB_NAME.into(), - Node::File( - FileNode::new( - HELLOWORLD_BLOB_NAME.into(), - fixtures::HELLOWORLD_BLOB_DIGEST.clone(), - fixtures::HELLOWORLD_BLOB_CONTENTS.len() as u64, - true, - ) - .unwrap(), - ), + Node::File(FileNode::new( + fixtures::HELLOWORLD_BLOB_DIGEST.clone(), + fixtures::HELLOWORLD_BLOB_CONTENTS.len() as u64, + true, + )), ); } async fn populate_symlink(root_nodes: &mut BTreeMap<Bytes, Node>) { root_nodes.insert( SYMLINK_NAME.into(), - Node::Symlink(SymlinkNode::new(SYMLINK_NAME.into(), BLOB_A_NAME.into()).unwrap()), + Node::Symlink(SymlinkNode::new(BLOB_A_NAME.into()).unwrap()), ); } @@ -144,9 +132,7 @@ async fn populate_symlink(root_nodes: &mut BTreeMap<Bytes, Node>) { async fn populate_symlink2(root_nodes: &mut BTreeMap<Bytes, Node>) { root_nodes.insert( SYMLINK_NAME2.into(), - Node::Symlink( - SymlinkNode::new(SYMLINK_NAME2.into(), "/nix/store/somewhereelse".into()).unwrap(), - ), + Node::Symlink(SymlinkNode::new("/nix/store/somewhereelse".into()).unwrap()), ); } @@ -170,14 +156,10 @@ async fn populate_directory_with_keep( root_nodes.insert( DIRECTORY_WITH_KEEP_NAME.into(), - Node::Directory( - DirectoryNode::new( - DIRECTORY_WITH_KEEP_NAME.into(), - fixtures::DIRECTORY_WITH_KEEP.digest(), - fixtures::DIRECTORY_WITH_KEEP.size(), - ) - .unwrap(), - ), + Node::Directory(DirectoryNode::new( + fixtures::DIRECTORY_WITH_KEEP.digest(), + fixtures::DIRECTORY_WITH_KEEP.size(), + )), ); } @@ -186,14 +168,10 @@ async fn populate_directory_with_keep( async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<Bytes, Node>) { root_nodes.insert( DIRECTORY_WITH_KEEP_NAME.into(), - Node::Directory( - DirectoryNode::new( - DIRECTORY_WITH_KEEP_NAME.into(), - fixtures::DIRECTORY_WITH_KEEP.digest(), - fixtures::DIRECTORY_WITH_KEEP.size(), - ) - .unwrap(), - ), + Node::Directory(DirectoryNode::new( + fixtures::DIRECTORY_WITH_KEEP.digest(), + fixtures::DIRECTORY_WITH_KEEP.size(), + )), ); } @@ -201,15 +179,11 @@ async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<Byte async fn populate_filenode_without_blob(root_nodes: &mut BTreeMap<Bytes, Node>) { root_nodes.insert( BLOB_A_NAME.into(), - Node::File( - FileNode::new( - BLOB_A_NAME.into(), - fixtures::BLOB_A_DIGEST.clone(), - fixtures::BLOB_A.len() as u64, - false, - ) - .unwrap(), - ), + Node::File(FileNode::new( + fixtures::BLOB_A_DIGEST.clone(), + fixtures::BLOB_A.len() as u64, + false, + )), ); } @@ -239,14 +213,10 @@ async fn populate_directory_complicated( root_nodes.insert( DIRECTORY_COMPLICATED_NAME.into(), - Node::Directory( - DirectoryNode::new( - DIRECTORY_COMPLICATED_NAME.into(), - fixtures::DIRECTORY_COMPLICATED.digest(), - fixtures::DIRECTORY_COMPLICATED.size(), - ) - .unwrap(), - ), + Node::Directory(DirectoryNode::new( + fixtures::DIRECTORY_COMPLICATED.digest(), + fixtures::DIRECTORY_COMPLICATED.size(), + )), ); } diff --git a/tvix/castore/src/fs/inodes.rs b/tvix/castore/src/fs/inodes.rs index b04505c9fa98..e49d28f8e52e 100644 --- a/tvix/castore/src/fs/inodes.rs +++ b/tvix/castore/src/fs/inodes.rs @@ -2,9 +2,7 @@ //! about inodes, which present tvix-castore nodes in a filesystem. use std::time::Duration; -use bytes::Bytes; - -use crate::{B3Digest, NamedNode, Node}; +use crate::{B3Digest, Node}; #[derive(Clone, Debug)] pub enum InodeData { @@ -19,24 +17,19 @@ pub enum InodeData { /// lookup and did fetch the data. #[derive(Clone, Debug)] pub enum DirectoryInodeData { - Sparse(B3Digest, u64), // digest, size - Populated(B3Digest, Vec<(u64, Node)>), // [(child_inode, node)] + Sparse(B3Digest, u64), // digest, size + Populated(B3Digest, Vec<(u64, bytes::Bytes, Node)>), // [(child_inode, name, node)] } impl InodeData { /// Constructs a new InodeData by consuming a [Node]. - /// It splits off the orginal name, so it can be used later. - pub fn from_node(node: &Node) -> (Self, Bytes) { + pub fn from_node(node: &Node) -> Self { match node { - Node::Directory(n) => ( - Self::Directory(DirectoryInodeData::Sparse(n.digest().clone(), n.size())), - n.get_name().clone(), - ), - Node::File(n) => ( - Self::Regular(n.digest().clone(), n.size(), n.executable()), - n.get_name().clone(), - ), - Node::Symlink(n) => (Self::Symlink(n.target().clone()), n.get_name().clone()), + Node::Directory(n) => { + Self::Directory(DirectoryInodeData::Sparse(n.digest().clone(), n.size())) + } + Node::File(n) => Self::Regular(n.digest().clone(), n.size(), n.executable()), + Node::Symlink(n) => Self::Symlink(n.target().clone()), } } diff --git a/tvix/castore/src/fs/mod.rs b/tvix/castore/src/fs/mod.rs index 90c1f371b546..f819adb549f5 100644 --- a/tvix/castore/src/fs/mod.rs +++ b/tvix/castore/src/fs/mod.rs @@ -18,7 +18,7 @@ use self::{ use crate::{ blobservice::{BlobReader, BlobService}, directoryservice::DirectoryService, - {B3Digest, NamedNode, Node}, + {B3Digest, Node}, }; use bstr::ByteVec; use bytes::Bytes; @@ -103,7 +103,7 @@ pub struct TvixStoreFs<BS, DS, RN> { u64, ( Span, - Arc<Mutex<mpsc::Receiver<(usize, Result<Node, crate::Error>)>>>, + Arc<Mutex<mpsc::Receiver<(usize, Result<(Bytes, Node), crate::Error>)>>>, ), >, >, @@ -165,8 +165,12 @@ where /// It is ok if it's a [DirectoryInodeData::Sparse] - in that case, a lookup /// in self.directory_service is performed, and self.inode_tracker is updated with the /// [DirectoryInodeData::Populated]. + #[allow(clippy::type_complexity)] #[instrument(skip(self), err)] - fn get_directory_children(&self, ino: u64) -> io::Result<(B3Digest, Vec<(u64, Node)>)> { + fn get_directory_children( + &self, + ino: u64, + ) -> io::Result<(B3Digest, Vec<(u64, bytes::Bytes, Node)>)> { let data = self.inode_tracker.read().get(ino).unwrap(); match *data { // if it's populated already, return children. @@ -196,13 +200,13 @@ where let children = { let mut inode_tracker = self.inode_tracker.write(); - let children: Vec<(u64, Node)> = directory + let children: Vec<(u64, Bytes, Node)> = directory .nodes() - .map(|child_node| { - let (inode_data, _) = InodeData::from_node(child_node); + .map(|(child_name, child_node)| { + let inode_data = InodeData::from_node(child_node); let child_ino = inode_tracker.put(inode_data); - (child_ino, child_node.clone()) + (child_ino, child_name.to_owned(), child_node.clone()) }) .collect(); @@ -263,12 +267,6 @@ where Ok(None) => Err(io::Error::from_raw_os_error(libc::ENOENT)), // The root node does exist Ok(Some(root_node)) => { - // The name must match what's passed in the lookup, otherwise this is also a ENOENT. - if root_node.get_name() != name.to_bytes() { - debug!(root_node.name=?root_node.get_name(), found_node.name=%name.to_string_lossy(), "node name mismatch"); - return Err(io::Error::from_raw_os_error(libc::ENOENT)); - } - // Let's check if someone else beat us to updating the inode tracker and // root_nodes map. This avoids locking inode_tracker for writing. if let Some(ino) = self.root_nodes.read().get(name.to_bytes()) { @@ -285,9 +283,9 @@ where // insert the (sparse) inode data and register in // self.root_nodes. - let (inode_data, name) = InodeData::from_node(&root_node); + let inode_data = InodeData::from_node(&root_node); let ino = inode_tracker.put(inode_data.clone()); - root_nodes.insert(name, ino); + root_nodes.insert(bytes::Bytes::copy_from_slice(name.to_bytes()), ino); Ok((ino, Arc::new(inode_data))) } @@ -362,7 +360,7 @@ where // Search for that name in the list of children and return the FileAttrs. // in the children, find the one with the desired name. - if let Some((child_ino, _)) = children.iter().find(|e| e.1.get_name() == name.to_bytes()) { + if let Some((child_ino, _, _)) = children.iter().find(|(_, n, _)| n == name.to_bytes()) { // lookup the child [InodeData] in [self.inode_tracker]. // We know the inodes for children have already been allocated. let child_inode_data = self.inode_tracker.read().get(*child_ino).unwrap(); @@ -398,8 +396,8 @@ where self.tokio_handle.spawn( async move { let mut stream = root_nodes_provider.list().enumerate(); - while let Some(node) = stream.next().await { - if tx.send(node).await.is_err() { + while let Some(e) = stream.next().await { + if tx.send(e).await.is_err() { // If we get a send error, it means the sync code // doesn't want any more entries. break; @@ -461,12 +459,12 @@ where .map_err(|_| crate::Error::StorageError("mutex poisoned".into()))?; while let Some((i, n)) = rx.blocking_recv() { - let root_node = n.map_err(|e| { + let (name, node) = n.map_err(|e| { warn!("failed to retrieve root node: {}", e); io::Error::from_raw_os_error(libc::EIO) })?; - let (inode_data, name) = InodeData::from_node(&root_node); + let inode_data = InodeData::from_node(&node); // obtain the inode, or allocate a new one. let ino = self.get_inode_for_root_name(&name).unwrap_or_else(|| { @@ -495,15 +493,17 @@ where let (parent_digest, children) = self.get_directory_children(inode)?; Span::current().record("directory.digest", parent_digest.to_string()); - for (i, (ino, child_node)) in children.into_iter().skip(offset as usize).enumerate() { - let (inode_data, name) = InodeData::from_node(&child_node); + for (i, (ino, child_name, child_node)) in + children.into_iter().skip(offset as usize).enumerate() + { + let inode_data = InodeData::from_node(&child_node); // the second parameter will become the "offset" parameter on the next call. let written = add_entry(fuse_backend_rs::api::filesystem::DirEntry { ino, offset: offset + (i as u64) + 1, type_: inode_data.as_fuse_type(), - name: &name, + name: &child_name, })?; // If the buffer is full, add_entry will return `Ok(0)`. if written == 0 { @@ -548,12 +548,12 @@ where .map_err(|_| crate::Error::StorageError("mutex poisoned".into()))?; while let Some((i, n)) = rx.blocking_recv() { - let root_node = n.map_err(|e| { + let (name, node) = n.map_err(|e| { warn!("failed to retrieve root node: {}", e); io::Error::from_raw_os_error(libc::EPERM) })?; - let (inode_data, name) = InodeData::from_node(&root_node); + let inode_data = InodeData::from_node(&node); // obtain the inode, or allocate a new one. let ino = self.get_inode_for_root_name(&name).unwrap_or_else(|| { @@ -585,8 +585,8 @@ where let (parent_digest, children) = self.get_directory_children(inode)?; Span::current().record("directory.digest", parent_digest.to_string()); - for (i, (ino, child_node)) in children.into_iter().skip(offset as usize).enumerate() { - let (inode_data, name) = InodeData::from_node(&child_node); + for (i, (ino, name, child_node)) in children.into_iter().skip(offset as usize).enumerate() { + let inode_data = InodeData::from_node(&child_node); // the second parameter will become the "offset" parameter on the next call. let written = add_entry( diff --git a/tvix/castore/src/fs/root_nodes.rs b/tvix/castore/src/fs/root_nodes.rs index c237142c8d91..62f0b62196a6 100644 --- a/tvix/castore/src/fs/root_nodes.rs +++ b/tvix/castore/src/fs/root_nodes.rs @@ -12,9 +12,10 @@ pub trait RootNodes: Send + Sync { /// directory of the filesystem. async fn get_by_basename(&self, name: &[u8]) -> Result<Option<Node>, Error>; - /// Lists all root CA nodes in the filesystem. An error can be returned - /// in case listing is not allowed - fn list(&self) -> BoxStream<Result<Node, Error>>; + /// Lists all root CA nodes in the filesystem, as a tuple of (base)name + /// and Node. + /// An error can be returned in case listing is not allowed. + fn list(&self) -> BoxStream<Result<(bytes::Bytes, Node), Error>>; } #[async_trait] @@ -28,9 +29,11 @@ where Ok(self.as_ref().get(name).cloned()) } - fn list(&self) -> BoxStream<Result<Node, Error>> { + fn list(&self) -> BoxStream<Result<(bytes::Bytes, Node), Error>> { Box::pin(tokio_stream::iter( - self.as_ref().iter().map(|(_, v)| Ok(v.clone())), + self.as_ref() + .iter() + .map(|(name, node)| Ok((name.to_owned(), node.to_owned()))), )) } } diff --git a/tvix/castore/src/import/mod.rs b/tvix/castore/src/import/mod.rs index b40dfd74e9e4..408166beffb4 100644 --- a/tvix/castore/src/import/mod.rs +++ b/tvix/castore/src/import/mod.rs @@ -59,14 +59,6 @@ where // we break the loop manually. .expect("Tvix bug: unexpected end of stream")?; - let name = entry - .path() - .file_name() - // If this is the root node, it will have an empty name. - .unwrap_or_default() - .to_owned() - .into(); - let node = match &mut entry { IngestionEntry::Dir { .. } => { // If the entry is a directory, we traversed all its children (and @@ -92,36 +84,22 @@ where IngestionError::UploadDirectoryError(entry.path().to_owned(), e) })?; - Node::Directory( - DirectoryNode::new(name, directory_digest, directory_size).map_err(|e| { - IngestionError::UploadDirectoryError( - entry.path().to_owned(), - crate::Error::StorageError(e.to_string()), - ) - })?, - ) + Node::Directory(DirectoryNode::new(directory_digest, directory_size)) } - IngestionEntry::Symlink { ref target, .. } => Node::Symlink( - SymlinkNode::new(name, target.to_owned().into()).map_err(|e| { + IngestionEntry::Symlink { ref target, .. } => { + Node::Symlink(SymlinkNode::new(target.to_owned().into()).map_err(|e| { IngestionError::UploadDirectoryError( entry.path().to_owned(), crate::Error::StorageError(e.to_string()), ) - })?, - ), + })?) + } IngestionEntry::Regular { size, executable, digest, .. - } => Node::File( - FileNode::new(name, digest.clone(), *size, *executable).map_err(|e| { - IngestionError::UploadDirectoryError( - entry.path().to_owned(), - crate::Error::StorageError(e.to_string()), - ) - })?, - ), + } => Node::File(FileNode::new(digest.clone(), *size, *executable)), }; let parent = entry @@ -132,11 +110,19 @@ where if parent == crate::Path::ROOT { break node; } else { + let name = entry + .path() + .file_name() + // If this is the root node, it will have an empty name. + .unwrap_or_default() + .to_owned() + .into(); + // record node in parent directory, creating a new [Directory] if not there yet. directories .entry(parent.to_owned()) .or_default() - .add(node) + .add(name, node) .map_err(|e| { IngestionError::UploadDirectoryError( entry.path().to_owned(), @@ -227,18 +213,18 @@ mod test { executable: true, digest: DUMMY_DIGEST.clone(), }], - Node::File(FileNode::new("foo".into(), DUMMY_DIGEST.clone(), 42, true).unwrap()) + Node::File(FileNode::new(DUMMY_DIGEST.clone(), 42, true)) )] #[case::single_symlink(vec![IngestionEntry::Symlink { path: "foo".parse().unwrap(), target: b"blub".into(), }], - Node::Symlink(SymlinkNode::new("foo".into(), "blub".into()).unwrap()) + Node::Symlink(SymlinkNode::new("blub".into()).unwrap()) )] #[case::single_dir(vec![IngestionEntry::Dir { path: "foo".parse().unwrap(), }], - Node::Directory(DirectoryNode::new("foo".into(), Directory::default().digest(), Directory::default().size()).unwrap()) + Node::Directory(DirectoryNode::new(Directory::default().digest(), Directory::default().size())) )] #[case::dir_with_keep(vec![ IngestionEntry::Regular { @@ -251,7 +237,7 @@ mod test { path: "foo".parse().unwrap(), }, ], - Node::Directory(DirectoryNode::new("foo".into(), DIRECTORY_WITH_KEEP.digest(), DIRECTORY_WITH_KEEP.size()).unwrap()) + Node::Directory(DirectoryNode::new(DIRECTORY_WITH_KEEP.digest(), DIRECTORY_WITH_KEEP.size())) )] /// This is intentionally a bit unsorted, though it still satisfies all /// requirements we have on the order of elements in the stream. @@ -279,7 +265,7 @@ mod test { path: "blub".parse().unwrap(), }, ], - Node::Directory(DirectoryNode::new("blub".into(), DIRECTORY_COMPLICATED.digest(), DIRECTORY_COMPLICATED.size()).unwrap()) + Node::Directory(DirectoryNode::new(DIRECTORY_COMPLICATED.digest(), DIRECTORY_COMPLICATED.size())) )] #[tokio::test] async fn test_ingestion(#[case] entries: Vec<IngestionEntry>, #[case] exp_root_node: Node) { diff --git a/tvix/castore/src/lib.rs b/tvix/castore/src/lib.rs index e09926afe70d..f56ddf827722 100644 --- a/tvix/castore/src/lib.rs +++ b/tvix/castore/src/lib.rs @@ -21,7 +21,7 @@ pub mod proto; pub mod tonic; pub use digests::{B3Digest, B3_LEN}; -pub use errors::{Error, ValidateDirectoryError, ValidateNodeError}; +pub use errors::{DirectoryError, Error, ValidateNodeError}; pub use hashing_reader::{B3HashingReader, HashingReader}; #[cfg(test)] diff --git a/tvix/castore/src/nodes/directory.rs b/tvix/castore/src/nodes/directory.rs index 1752681c89c3..d58b7822a434 100644 --- a/tvix/castore/src/nodes/directory.rs +++ b/tvix/castore/src/nodes/directory.rs @@ -1,23 +1,25 @@ -use crate::{ - proto, B3Digest, DirectoryNode, FileNode, NamedNode, Node, SymlinkNode, ValidateDirectoryError, - ValidateNodeError, -}; - -/// A Directory can contain Directory, File or Symlink nodes. -/// Each of these nodes have a name attribute, which is the basename in that -/// directory and node type specific attributes. -/// While a Node by itself may have any name, the names of Directory entries: +use std::collections::BTreeMap; + +use crate::{errors::DirectoryError, proto, B3Digest, DirectoryNode, FileNode, Node, SymlinkNode}; + +/// A Directory contains nodes, which can be Directory, File or Symlink nodes. +/// It attached names to these nodes, which is the basename in that directory. +/// These names: /// - MUST not contain slashes or null bytes /// - MUST not be '.' or '..' /// - MUST be unique across all three lists #[derive(Default, Debug, Clone, PartialEq, Eq)] pub struct Directory { - nodes: Vec<Node>, + nodes: BTreeMap<bytes::Bytes, Node>, } impl Directory { + /// Constructs a new, empty Directory. + /// FUTUREWORK: provide a constructor from an interator of (sorted) names and nodes. pub fn new() -> Self { - Directory { nodes: vec![] } + Directory { + nodes: BTreeMap::new(), + } } /// The size of a directory is the number of all regular and symlink elements, @@ -25,7 +27,7 @@ impl Directory { pub fn size(&self) -> u64 { // It's impossible to create a Directory where the size overflows, because we // check before every add() that the size won't overflow. - (self.nodes.len() as u64) + self.directories().map(|e| e.size()).sum::<u64>() + (self.nodes.len() as u64) + self.directories().map(|(_name, dn)| dn.size()).sum::<u64>() } /// Calculates the digest of a Directory, which is the blake3 hash of a @@ -35,62 +37,66 @@ impl Directory { } /// Allows iterating over all nodes (directories, files and symlinks) - /// ordered by their name. - pub fn nodes(&self) -> impl Iterator<Item = &Node> + Send + Sync + '_ { + /// For each, it returns a tuple of its name and node. + /// The elements are sorted by their names. + pub fn nodes(&self) -> impl Iterator<Item = (&bytes::Bytes, &Node)> + Send + Sync + '_ { self.nodes.iter() } - /// Allows iterating over the FileNode entries of this directory - /// ordered by their name - pub fn files(&self) -> impl Iterator<Item = &FileNode> + Send + Sync + '_ { - self.nodes.iter().filter_map(|node| match node { - Node::File(n) => Some(n), + /// Allows iterating over the FileNode entries of this directory. + /// For each, it returns a tuple of its name and node. + /// The elements are sorted by their names. + pub fn files(&self) -> impl Iterator<Item = (&bytes::Bytes, &FileNode)> + Send + Sync + '_ { + self.nodes.iter().filter_map(|(name, node)| match node { + Node::File(n) => Some((name, n)), _ => None, }) } - /// Allows iterating over the subdirectories of this directory - /// ordered by their name - pub fn directories(&self) -> impl Iterator<Item = &DirectoryNode> + Send + Sync + '_ { - self.nodes.iter().filter_map(|node| match node { - Node::Directory(n) => Some(n), + /// Allows iterating over the DirectoryNode entries (subdirectories) of this directory. + /// For each, it returns a tuple of its name and node. + /// The elements are sorted by their names. + pub fn directories( + &self, + ) -> impl Iterator<Item = (&bytes::Bytes, &DirectoryNode)> + Send + Sync + '_ { + self.nodes.iter().filter_map(|(name, node)| match node { + Node::Directory(n) => Some((name, n)), _ => None, }) } /// Allows iterating over the SymlinkNode entries of this directory - /// ordered by their name - pub fn symlinks(&self) -> impl Iterator<Item = &SymlinkNode> + Send + Sync + '_ { - self.nodes.iter().filter_map(|node| match node { - Node::Symlink(n) => Some(n), + /// For each, it returns a tuple of its name and node. + /// The elements are sorted by their names. + pub fn symlinks( + &self, + ) -> impl Iterator<Item = (&bytes::Bytes, &SymlinkNode)> + Send + Sync + '_ { + self.nodes.iter().filter_map(|(name, node)| match node { + Node::Symlink(n) => Some((name, n)), _ => None, }) } /// Checks a Node name for validity as a directory entry /// We disallow slashes, null bytes, '.', '..' and the empty string. - pub(crate) fn validate_node_name(name: &[u8]) -> Result<(), ValidateNodeError> { - if name.is_empty() + pub(crate) fn is_valid_name(name: &[u8]) -> bool { + !(name.is_empty() || name == b".." || name == b"." || name.contains(&0x00) - || name.contains(&b'/') - { - Err(ValidateNodeError::InvalidName(name.to_owned().into())) - } else { - Ok(()) - } + || name.contains(&b'/')) } - /// Adds the specified [Node] to the [Directory], preserving sorted entries. + /// Adds the specified [Node] to the [Directory] with a given name. /// /// Inserting an element that already exists with the same name in the directory will yield an /// error. - /// Inserting an element will validate that its name fulfills the stricter requirements for - /// directory entries and yield an error if it is not. - pub fn add(&mut self, node: Node) -> Result<(), ValidateDirectoryError> { - Self::validate_node_name(node.get_name()) - .map_err(|e| ValidateDirectoryError::InvalidNode(node.get_name().clone().into(), e))?; + /// Inserting an element will validate that its name fulfills the + /// requirements for directory entries and yield an error if it is not. + pub fn add(&mut self, name: bytes::Bytes, node: Node) -> Result<(), DirectoryError> { + if !Self::is_valid_name(&name) { + return Err(DirectoryError::InvalidName(name)); + } // Check that the even after adding this new directory entry, the size calculation will not // overflow @@ -104,24 +110,17 @@ impl Directory { _ => 0, }, ]) - .ok_or(ValidateDirectoryError::SizeOverflow)?; - - // This assumes the [Directory] is sorted, since we don't allow accessing the nodes list - // directly and all previous inserts should have been in-order - let pos = match self - .nodes - .binary_search_by_key(&node.get_name(), |n| n.get_name()) - { - Err(pos) => pos, // There is no node with this name; good! - Ok(_) => { - return Err(ValidateDirectoryError::DuplicateName( - node.get_name().to_vec(), - )) - } - }; + .ok_or(DirectoryError::SizeOverflow)?; - self.nodes.insert(pos, node); - Ok(()) + match self.nodes.entry(name) { + std::collections::btree_map::Entry::Vacant(e) => { + e.insert(node); + Ok(()) + } + std::collections::btree_map::Entry::Occupied(occupied) => { + Err(DirectoryError::DuplicateName(occupied.key().to_vec())) + } + } } } @@ -133,49 +132,58 @@ fn checked_sum(iter: impl IntoIterator<Item = u64>) -> Option<u64> { mod test { use super::{Directory, DirectoryNode, FileNode, Node, SymlinkNode}; use crate::fixtures::DUMMY_DIGEST; - use crate::ValidateDirectoryError; + use crate::DirectoryError; #[test] fn add_nodes_to_directory() { let mut d = Directory::new(); - d.add(Node::Directory( - DirectoryNode::new("b".into(), DUMMY_DIGEST.clone(), 1).unwrap(), - )) + d.add( + "b".into(), + Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)), + ) .unwrap(); - d.add(Node::Directory( - DirectoryNode::new("a".into(), DUMMY_DIGEST.clone(), 1).unwrap(), - )) + d.add( + "a".into(), + Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)), + ) .unwrap(); - d.add(Node::Directory( - DirectoryNode::new("z".into(), DUMMY_DIGEST.clone(), 1).unwrap(), - )) + d.add( + "z".into(), + Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)), + ) .unwrap(); - d.add(Node::File( - FileNode::new("f".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(), - )) + d.add( + "f".into(), + Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)), + ) .unwrap(); - d.add(Node::File( - FileNode::new("c".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(), - )) + d.add( + "c".into(), + Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)), + ) .unwrap(); - d.add(Node::File( - FileNode::new("g".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(), - )) + d.add( + "g".into(), + Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)), + ) .unwrap(); - d.add(Node::Symlink( - SymlinkNode::new("t".into(), "a".into()).unwrap(), - )) + d.add( + "t".into(), + Node::Symlink(SymlinkNode::new("a".into()).unwrap()), + ) .unwrap(); - d.add(Node::Symlink( - SymlinkNode::new("o".into(), "a".into()).unwrap(), - )) + d.add( + "o".into(), + Node::Symlink(SymlinkNode::new("a".into()).unwrap()), + ) .unwrap(); - d.add(Node::Symlink( - SymlinkNode::new("e".into(), "a".into()).unwrap(), - )) + d.add( + "e".into(), + Node::Symlink(SymlinkNode::new("a".into()).unwrap()), + ) .unwrap(); // Convert to proto struct and back to ensure we are not generating any invalid structures @@ -188,10 +196,11 @@ mod test { let mut d = Directory::new(); assert_eq!( - d.add(Node::Directory( - DirectoryNode::new("foo".into(), DUMMY_DIGEST.clone(), u64::MAX).unwrap(), - )), - Err(ValidateDirectoryError::SizeOverflow) + d.add( + "foo".into(), + Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), u64::MAX)) + ), + Err(DirectoryError::SizeOverflow) ); } @@ -199,16 +208,18 @@ mod test { fn add_duplicate_node_to_directory() { let mut d = Directory::new(); - d.add(Node::Directory( - DirectoryNode::new("a".into(), DUMMY_DIGEST.clone(), 1).unwrap(), - )) + d.add( + "a".into(), + Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)), + ) .unwrap(); assert_eq!( format!( "{}", - d.add(Node::File( - FileNode::new("a".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(), - )) + d.add( + "a".into(), + Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)) + ) .expect_err("adding duplicate dir entry must fail") ), "\"a\" is a duplicate name" @@ -220,13 +231,10 @@ mod test { async fn directory_reject_invalid_name() { let mut dir = Directory::new(); assert!( - dir.add(Node::Symlink( - SymlinkNode::new( - "".into(), // wrong! can not be added to directory - "doesntmatter".into(), - ) - .unwrap() - )) + dir.add( + "".into(), // wrong! can not be added to directory + Node::Symlink(SymlinkNode::new("doesntmatter".into(),).unwrap()) + ) .is_err(), "invalid symlink entry be rejected" ); diff --git a/tvix/castore/src/nodes/directory_node.rs b/tvix/castore/src/nodes/directory_node.rs index afbae1695c63..e6e6312a7a0c 100644 --- a/tvix/castore/src/nodes/directory_node.rs +++ b/tvix/castore/src/nodes/directory_node.rs @@ -1,13 +1,11 @@ -use crate::{B3Digest, NamedNode, ValidateNodeError}; +use crate::B3Digest; /// A DirectoryNode is a pointer to a [Directory], by its [Directory::digest]. -/// It also gives it a `name` and `size`. +/// It also records a`size`. /// Such a node is either an element in the [Directory] it itself is contained in, /// or a standalone root node./ #[derive(Debug, Clone, PartialEq, Eq)] pub struct DirectoryNode { - /// The (base)name of the directory - name: bytes::Bytes, /// The blake3 hash of a Directory message, serialized in protobuf canonical form. digest: B3Digest, /// Number of child elements in the Directory referred to by `digest`. @@ -23,8 +21,8 @@ pub struct DirectoryNode { } impl DirectoryNode { - pub fn new(name: bytes::Bytes, digest: B3Digest, size: u64) -> Result<Self, ValidateNodeError> { - Ok(Self { name, digest, size }) + pub fn new(digest: B3Digest, size: u64) -> Self { + Self { digest, size } } pub fn digest(&self) -> &B3Digest { @@ -34,31 +32,4 @@ impl DirectoryNode { pub fn size(&self) -> u64 { self.size } - - pub fn rename(self, name: bytes::Bytes) -> Self { - Self { name, ..self } - } -} - -impl PartialOrd for DirectoryNode { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for DirectoryNode { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -impl NamedNode for &DirectoryNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } -} -impl NamedNode for DirectoryNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } } diff --git a/tvix/castore/src/nodes/file_node.rs b/tvix/castore/src/nodes/file_node.rs index 5dd677c17b49..73abe21e59b4 100644 --- a/tvix/castore/src/nodes/file_node.rs +++ b/tvix/castore/src/nodes/file_node.rs @@ -1,11 +1,8 @@ -use crate::{B3Digest, NamedNode, ValidateNodeError}; +use crate::B3Digest; /// A FileNode represents a regular or executable file in a Directory or at the root. #[derive(Debug, Clone, PartialEq, Eq)] pub struct FileNode { - /// The (base)name of the file - name: bytes::Bytes, - /// The blake3 digest of the file contents digest: B3Digest, @@ -17,18 +14,12 @@ pub struct FileNode { } impl FileNode { - pub fn new( - name: bytes::Bytes, - digest: B3Digest, - size: u64, - executable: bool, - ) -> Result<Self, ValidateNodeError> { - Ok(Self { - name, + pub fn new(digest: B3Digest, size: u64, executable: bool) -> Self { + Self { digest, size, executable, - }) + } } pub fn digest(&self) -> &B3Digest { @@ -42,31 +33,4 @@ impl FileNode { pub fn executable(&self) -> bool { self.executable } - - pub fn rename(self, name: bytes::Bytes) -> Self { - Self { name, ..self } - } -} - -impl PartialOrd for FileNode { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for FileNode { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -impl NamedNode for &FileNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } -} -impl NamedNode for FileNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } } diff --git a/tvix/castore/src/nodes/mod.rs b/tvix/castore/src/nodes/mod.rs index bb8e14cf856e..47f39ae8f94a 100644 --- a/tvix/castore/src/nodes/mod.rs +++ b/tvix/castore/src/nodes/mod.rs @@ -4,66 +4,17 @@ mod directory_node; mod file_node; mod symlink_node; -use bytes::Bytes; pub use directory::Directory; pub use directory_node::DirectoryNode; pub use file_node::FileNode; pub use symlink_node::SymlinkNode; /// A Node is either a [DirectoryNode], [FileNode] or [SymlinkNode]. -/// While a Node by itself may have any name, only those matching specific requirements -/// can can be added as entries to a [Directory] (see the documentation on [Directory] for details). +/// Nodes themselves don't have names, what gives them names is either them +/// being inside a [Directory], or a root node with its own name attached to it. #[derive(Debug, Clone, PartialEq, Eq)] pub enum Node { Directory(DirectoryNode), File(FileNode), Symlink(SymlinkNode), } - -impl Node { - /// Returns the node with a new name. - pub fn rename(self, name: Bytes) -> Self { - match self { - Node::Directory(n) => Node::Directory(n.rename(name)), - Node::File(n) => Node::File(n.rename(name)), - Node::Symlink(n) => Node::Symlink(n.rename(name)), - } - } -} - -impl PartialOrd for Node { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for Node { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -/// NamedNode is implemented for [FileNode], [DirectoryNode] and [SymlinkNode] -/// and [Node], so we can ask all of them for the name easily. -pub trait NamedNode { - fn get_name(&self) -> &Bytes; -} - -impl NamedNode for &Node { - fn get_name(&self) -> &Bytes { - match self { - Node::File(node_file) => node_file.get_name(), - Node::Directory(node_directory) => node_directory.get_name(), - Node::Symlink(node_symlink) => node_symlink.get_name(), - } - } -} -impl NamedNode for Node { - fn get_name(&self) -> &Bytes { - match self { - Node::File(node_file) => node_file.get_name(), - Node::Directory(node_directory) => node_directory.get_name(), - Node::Symlink(node_symlink) => node_symlink.get_name(), - } - } -} diff --git a/tvix/castore/src/nodes/symlink_node.rs b/tvix/castore/src/nodes/symlink_node.rs index 2fac27231a85..6b9df96a5dd3 100644 --- a/tvix/castore/src/nodes/symlink_node.rs +++ b/tvix/castore/src/nodes/symlink_node.rs @@ -1,50 +1,21 @@ -use crate::{NamedNode, ValidateNodeError}; +use crate::ValidateNodeError; /// A SymlinkNode represents a symbolic link in a Directory or at the root. #[derive(Debug, Clone, PartialEq, Eq)] pub struct SymlinkNode { - /// The (base)name of the symlink - name: bytes::Bytes, /// The target of the symlink. target: bytes::Bytes, } impl SymlinkNode { - pub fn new(name: bytes::Bytes, target: bytes::Bytes) -> Result<Self, ValidateNodeError> { + pub fn new(target: bytes::Bytes) -> Result<Self, ValidateNodeError> { if target.is_empty() || target.contains(&b'\0') { return Err(ValidateNodeError::InvalidSymlinkTarget(target)); } - Ok(Self { name, target }) + Ok(Self { target }) } pub fn target(&self) -> &bytes::Bytes { &self.target } - - pub(crate) fn rename(self, name: bytes::Bytes) -> Self { - Self { name, ..self } - } -} - -impl PartialOrd for SymlinkNode { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for SymlinkNode { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -impl NamedNode for &SymlinkNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } -} -impl NamedNode for SymlinkNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } } diff --git a/tvix/castore/src/path.rs b/tvix/castore/src/path.rs index fb340478c08c..8a55e9f5a1d3 100644 --- a/tvix/castore/src/path.rs +++ b/tvix/castore/src/path.rs @@ -38,7 +38,9 @@ impl Path { if !bytes.is_empty() { // Ensure all components are valid castore node names. for component in bytes.split_str(b"/") { - Directory::validate_node_name(component).ok()?; + if !Directory::is_valid_name(component) { + return None; + } } } @@ -211,7 +213,9 @@ impl PathBuf { /// Adjoins `name` to self. pub fn try_push(&mut self, name: &[u8]) -> Result<(), std::io::Error> { - Directory::validate_node_name(name).map_err(|_| std::io::ErrorKind::InvalidData)?; + if !Directory::is_valid_name(name) { + return Err(std::io::ErrorKind::InvalidData.into()); + } if !self.inner.is_empty() { self.inner.push(b'/'); diff --git a/tvix/castore/src/proto/grpc_directoryservice_wrapper.rs b/tvix/castore/src/proto/grpc_directoryservice_wrapper.rs index e65145509184..62fdb34a25a0 100644 --- a/tvix/castore/src/proto/grpc_directoryservice_wrapper.rs +++ b/tvix/castore/src/proto/grpc_directoryservice_wrapper.rs @@ -1,5 +1,5 @@ use crate::directoryservice::{DirectoryGraph, DirectoryService, LeavesToRootValidator}; -use crate::{proto, B3Digest, ValidateDirectoryError}; +use crate::{proto, B3Digest, DirectoryError}; use futures::stream::BoxStream; use futures::TryStreamExt; use std::ops::Deref; @@ -84,7 +84,7 @@ where let mut validator = DirectoryGraph::<LeavesToRootValidator>::default(); while let Some(directory) = req_inner.message().await? { validator - .add(directory.try_into().map_err(|e: ValidateDirectoryError| { + .add(directory.try_into().map_err(|e: DirectoryError| { tonic::Status::new(tonic::Code::Internal, e.to_string()) })?) .map_err(|e| tonic::Status::new(tonic::Code::Internal, e.to_string()))?; diff --git a/tvix/castore/src/proto/mod.rs b/tvix/castore/src/proto/mod.rs index 8f8f3c08347a..7cb1cecd27fa 100644 --- a/tvix/castore/src/proto/mod.rs +++ b/tvix/castore/src/proto/mod.rs @@ -5,12 +5,10 @@ use prost::Message; mod grpc_blobservice_wrapper; mod grpc_directoryservice_wrapper; +use crate::{B3Digest, DirectoryError}; pub use grpc_blobservice_wrapper::GRPCBlobServiceWrapper; pub use grpc_directoryservice_wrapper::GRPCDirectoryServiceWrapper; -use crate::NamedNode; -use crate::{B3Digest, ValidateDirectoryError, ValidateNodeError}; - tonic::include_proto!("tvix.castore.v1"); #[cfg(feature = "tonic-reflection")] @@ -71,168 +69,83 @@ impl Directory { /// Accepts a name, and a mutable reference to the previous name. /// If the passed name is larger than the previous one, the reference is updated. /// If it's not, an error is returned. -fn update_if_lt_prev<'n>( - prev_name: &mut &'n [u8], - name: &'n [u8], -) -> Result<(), ValidateDirectoryError> { +fn update_if_lt_prev<'n>(prev_name: &mut &'n [u8], name: &'n [u8]) -> Result<(), DirectoryError> { if *name < **prev_name { - return Err(ValidateDirectoryError::WrongSorting(name.to_vec())); + return Err(DirectoryError::WrongSorting(bytes::Bytes::copy_from_slice( + name, + ))); } *prev_name = name; Ok(()) } -impl TryFrom<&node::Node> for crate::Node { - type Error = ValidateNodeError; - - fn try_from(node: &node::Node) -> Result<crate::Node, ValidateNodeError> { - Ok(match node { - node::Node::Directory(n) => crate::Node::Directory(n.try_into()?), - node::Node::File(n) => crate::Node::File(n.try_into()?), - node::Node::Symlink(n) => crate::Node::Symlink(n.try_into()?), - }) - } -} - -impl TryFrom<&Node> for crate::Node { - type Error = ValidateNodeError; - - fn try_from(node: &Node) -> Result<crate::Node, ValidateNodeError> { - match node { - Node { node: None } => Err(ValidateNodeError::NoNodeSet), - Node { node: Some(node) } => node.try_into(), - } - } -} - -impl TryFrom<&DirectoryNode> for crate::DirectoryNode { - type Error = ValidateNodeError; - - fn try_from(node: &DirectoryNode) -> Result<crate::DirectoryNode, ValidateNodeError> { - crate::DirectoryNode::new( - node.name.clone(), - node.digest.clone().try_into()?, - node.size, - ) - } -} - -impl TryFrom<&SymlinkNode> for crate::SymlinkNode { - type Error = ValidateNodeError; - - fn try_from(node: &SymlinkNode) -> Result<crate::SymlinkNode, ValidateNodeError> { - crate::SymlinkNode::new(node.name.clone(), node.target.clone()) - } -} - -impl TryFrom<&FileNode> for crate::FileNode { - type Error = ValidateNodeError; - - fn try_from(node: &FileNode) -> Result<crate::FileNode, ValidateNodeError> { - crate::FileNode::new( - node.name.clone(), - node.digest.clone().try_into()?, - node.size, - node.executable, - ) - } -} - +// TODO: add a proper owned version here that moves various fields impl TryFrom<Directory> for crate::Directory { - type Error = ValidateDirectoryError; + type Error = DirectoryError; - fn try_from(directory: Directory) -> Result<crate::Directory, ValidateDirectoryError> { - (&directory).try_into() + fn try_from(value: Directory) -> Result<Self, Self::Error> { + (&value).try_into() } } impl TryFrom<&Directory> for crate::Directory { - type Error = ValidateDirectoryError; + type Error = DirectoryError; - fn try_from(directory: &Directory) -> Result<crate::Directory, ValidateDirectoryError> { + fn try_from(directory: &Directory) -> Result<crate::Directory, DirectoryError> { let mut dir = crate::Directory::new(); + let mut last_file_name: &[u8] = b""; + + // TODO: this currently loops over all three types separately, rather + // than peeking and picking from where would be the next. + for file in directory.files.iter().map(move |file| { update_if_lt_prev(&mut last_file_name, &file.name).map(|()| file.clone()) }) { let file = file?; - dir.add(crate::Node::File((&file).try_into().map_err(|e| { - ValidateDirectoryError::InvalidNode(file.name.into(), e) - })?))?; + + let (name, node) = Node { + node: Some(node::Node::File(file)), + } + .into_name_and_node()?; + + dir.add(name, node)?; } let mut last_directory_name: &[u8] = b""; for directory in directory.directories.iter().map(move |directory| { update_if_lt_prev(&mut last_directory_name, &directory.name).map(|()| directory.clone()) }) { let directory = directory?; - dir.add(crate::Node::Directory((&directory).try_into().map_err( - |e| ValidateDirectoryError::InvalidNode(directory.name.into(), e), - )?))?; + + let (name, node) = Node { + node: Some(node::Node::Directory(directory)), + } + .into_name_and_node()?; + + dir.add(name, node)?; } let mut last_symlink_name: &[u8] = b""; for symlink in directory.symlinks.iter().map(move |symlink| { update_if_lt_prev(&mut last_symlink_name, &symlink.name).map(|()| symlink.clone()) }) { let symlink = symlink?; - dir.add(crate::Node::Symlink((&symlink).try_into().map_err( - |e| ValidateDirectoryError::InvalidNode(symlink.name.into(), e), - )?))?; - } - Ok(dir) - } -} -impl From<&crate::Node> for node::Node { - fn from(node: &crate::Node) -> node::Node { - match node { - crate::Node::Directory(n) => node::Node::Directory(n.into()), - crate::Node::File(n) => node::Node::File(n.into()), - crate::Node::Symlink(n) => node::Node::Symlink(n.into()), - } - } -} - -impl From<&crate::Node> for Node { - fn from(node: &crate::Node) -> Node { - Node { - node: Some(node.into()), - } - } -} - -impl From<&crate::DirectoryNode> for DirectoryNode { - fn from(node: &crate::DirectoryNode) -> DirectoryNode { - DirectoryNode { - digest: node.digest().clone().into(), - size: node.size(), - name: node.get_name().clone(), - } - } -} + let (name, node) = Node { + node: Some(node::Node::Symlink(symlink)), + } + .into_name_and_node()?; -impl From<&crate::FileNode> for FileNode { - fn from(node: &crate::FileNode) -> FileNode { - FileNode { - digest: node.digest().clone().into(), - size: node.size(), - name: node.get_name().clone(), - executable: node.executable(), + dir.add(name, node)?; } - } -} -impl From<&crate::SymlinkNode> for SymlinkNode { - fn from(node: &crate::SymlinkNode) -> SymlinkNode { - SymlinkNode { - name: node.get_name().clone(), - target: node.target().clone(), - } + Ok(dir) } } +// TODO: add a proper owned version here that moves various fields impl From<crate::Directory> for Directory { - fn from(directory: crate::Directory) -> Directory { - (&directory).into() + fn from(value: crate::Directory) -> Self { + (&value).into() } } @@ -241,16 +154,25 @@ impl From<&crate::Directory> for Directory { let mut directories = vec![]; let mut files = vec![]; let mut symlinks = vec![]; - for node in directory.nodes() { + + for (name, node) in directory.nodes() { match node { - crate::Node::File(n) => { - files.push(n.into()); - } - crate::Node::Directory(n) => { - directories.push(n.into()); - } + crate::Node::File(n) => files.push(FileNode { + name: name.clone(), + digest: n.digest().to_owned().into(), + size: n.size(), + executable: n.executable(), + }), + crate::Node::Directory(n) => directories.push(DirectoryNode { + name: name.clone(), + digest: n.digest().to_owned().into(), + size: n.size(), + }), crate::Node::Symlink(n) => { - symlinks.push(n.into()); + symlinks.push(SymlinkNode { + name: name.clone(), + target: n.target().to_owned(), + }); } } } @@ -262,6 +184,67 @@ impl From<&crate::Directory> for Directory { } } +impl Node { + /// Converts a proto [Node] to a [crate::Node], and splits off the name. + pub fn into_name_and_node(self) -> Result<(bytes::Bytes, crate::Node), DirectoryError> { + match self.node.ok_or_else(|| DirectoryError::NoNodeSet)? { + node::Node::Directory(n) => { + let digest = B3Digest::try_from(n.digest) + .map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e.into()))?; + + let node = crate::Node::Directory(crate::DirectoryNode::new(digest, n.size)); + + Ok((n.name, node)) + } + node::Node::File(n) => { + let digest = B3Digest::try_from(n.digest) + .map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e.into()))?; + + let node = crate::Node::File(crate::FileNode::new(digest, n.size, n.executable)); + + Ok((n.name, node)) + } + + node::Node::Symlink(n) => { + let node = crate::Node::Symlink( + crate::SymlinkNode::new(n.target) + .map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e))?, + ); + + Ok((n.name, node)) + } + } + } + + /// Construsts a [Node] from a name and [crate::Node]. + pub fn from_name_and_node(name: bytes::Bytes, n: crate::Node) -> Self { + // TODO: make these pub(crate) so we can avoid cloning? + match n { + crate::Node::Directory(directory_node) => Self { + node: Some(node::Node::Directory(DirectoryNode { + name, + digest: directory_node.digest().to_owned().into(), + size: directory_node.size(), + })), + }, + crate::Node::File(file_node) => Self { + node: Some(node::Node::File(FileNode { + name, + digest: file_node.digest().to_owned().into(), + size: file_node.size(), + executable: file_node.executable(), + })), + }, + crate::Node::Symlink(symlink_node) => Self { + node: Some(node::Node::Symlink(SymlinkNode { + name, + target: symlink_node.target().to_owned(), + })), + }, + } + } +} + impl StatBlobResponse { /// Validates a StatBlobResponse. All chunks must have valid blake3 digests. /// It is allowed to send an empty list, if no more granular chunking is diff --git a/tvix/castore/src/proto/tests/directory.rs b/tvix/castore/src/proto/tests/directory.rs index 7847b9c4c9cb..215bce7275dd 100644 --- a/tvix/castore/src/proto/tests/directory.rs +++ b/tvix/castore/src/proto/tests/directory.rs @@ -1,4 +1,4 @@ -use crate::proto::{Directory, DirectoryNode, FileNode, SymlinkNode, ValidateDirectoryError}; +use crate::proto::{Directory, DirectoryError, DirectoryNode, FileNode, SymlinkNode}; use crate::ValidateNodeError; use hex_literal::hex; @@ -162,8 +162,8 @@ fn validate_invalid_names() { ..Default::default() }; match crate::Directory::try_from(d).expect_err("must fail") { - ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName(_)) => { - assert_eq!(n, b"") + DirectoryError::InvalidName(n) => { + assert_eq!(n.as_ref(), b"") } _ => panic!("unexpected error"), }; @@ -179,8 +179,8 @@ fn validate_invalid_names() { ..Default::default() }; match crate::Directory::try_from(d).expect_err("must fail") { - ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName(_)) => { - assert_eq!(n, b".") + DirectoryError::InvalidName(n) => { + assert_eq!(n.as_ref(), b".") } _ => panic!("unexpected error"), }; @@ -197,8 +197,8 @@ fn validate_invalid_names() { ..Default::default() }; match crate::Directory::try_from(d).expect_err("must fail") { - ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName(_)) => { - assert_eq!(n, b"..") + DirectoryError::InvalidName(n) => { + assert_eq!(n.as_ref(), b"..") } _ => panic!("unexpected error"), }; @@ -213,8 +213,8 @@ fn validate_invalid_names() { ..Default::default() }; match crate::Directory::try_from(d).expect_err("must fail") { - ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName(_)) => { - assert_eq!(n, b"\x00") + DirectoryError::InvalidName(n) => { + assert_eq!(n.as_ref(), b"\x00") } _ => panic!("unexpected error"), }; @@ -229,8 +229,8 @@ fn validate_invalid_names() { ..Default::default() }; match crate::Directory::try_from(d).expect_err("must fail") { - ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName(_)) => { - assert_eq!(n, b"foo/bar") + DirectoryError::InvalidName(n) => { + assert_eq!(n.as_ref(), b"foo/bar") } _ => panic!("unexpected error"), }; @@ -248,7 +248,7 @@ fn validate_invalid_digest() { ..Default::default() }; match crate::Directory::try_from(d).expect_err("must fail") { - ValidateDirectoryError::InvalidNode(_, ValidateNodeError::InvalidDigestLen(n)) => { + DirectoryError::InvalidNode(_, ValidateNodeError::InvalidDigestLen(n)) => { assert_eq!(n, 2) } _ => panic!("unexpected error"), @@ -275,8 +275,8 @@ fn validate_sorting() { ..Default::default() }; match crate::Directory::try_from(d).expect_err("must fail") { - ValidateDirectoryError::WrongSorting(s) => { - assert_eq!(s, b"a"); + DirectoryError::WrongSorting(s) => { + assert_eq!(s.as_ref(), b"a"); } _ => panic!("unexpected error"), } @@ -300,7 +300,7 @@ fn validate_sorting() { ..Default::default() }; match crate::Directory::try_from(d).expect_err("must fail") { - ValidateDirectoryError::DuplicateName(s) => { + DirectoryError::DuplicateName(s) => { assert_eq!(s, b"a"); } _ => panic!("unexpected error"), diff --git a/tvix/castore/src/tests/import.rs b/tvix/castore/src/tests/import.rs index 5dff61782cf2..bbf28a2981bf 100644 --- a/tvix/castore/src/tests/import.rs +++ b/tvix/castore/src/tests/import.rs @@ -2,15 +2,11 @@ use crate::blobservice::{self, BlobService}; use crate::directoryservice; use crate::fixtures::*; use crate::import::fs::ingest_path; -use crate::proto; use crate::{DirectoryNode, Node, SymlinkNode}; use tempfile::TempDir; #[cfg(target_family = "unix")] -use std::os::unix::ffi::OsStrExt; - -#[cfg(target_family = "unix")] #[tokio::test] async fn symlink() { let blob_service = blobservice::from_addr("memory://").await.unwrap(); @@ -34,9 +30,7 @@ async fn symlink() { .expect("must succeed"); assert_eq!( - Node::Symlink( - SymlinkNode::new("doesntmatter".into(), "/nix/store/somewhereelse".into(),).unwrap() - ), + Node::Symlink(SymlinkNode::new("/nix/store/somewhereelse".into(),).unwrap()), root_node, ) } @@ -59,13 +53,12 @@ async fn single_file() { .expect("must succeed"); assert_eq!( - proto::node::Node::File(proto::FileNode { - name: "root".into(), - digest: HELLOWORLD_BLOB_DIGEST.clone().into(), - size: HELLOWORLD_BLOB_CONTENTS.len() as u64, - executable: false, - }), - (&root_node).into(), + Node::File(crate::FileNode::new( + HELLOWORLD_BLOB_DIGEST.clone(), + HELLOWORLD_BLOB_CONTENTS.len() as u64, + false, + )), + root_node, ); // ensure the blob has been uploaded @@ -95,20 +88,10 @@ async fn complicated() { // ensure root_node matched expectations assert_eq!( - Node::Directory( - DirectoryNode::new( - tmpdir - .path() - .file_name() - .unwrap() - .as_bytes() - .to_owned() - .into(), - DIRECTORY_COMPLICATED.digest().clone(), - DIRECTORY_COMPLICATED.size(), - ) - .unwrap() - ), + Node::Directory(DirectoryNode::new( + DIRECTORY_COMPLICATED.digest().clone(), + DIRECTORY_COMPLICATED.size(), + )), root_node, ); |