diff options
46 files changed, 785 insertions, 1002 deletions
diff --git a/tvix/build/src/proto/mod.rs b/tvix/build/src/proto/mod.rs index 2164803b6193..d3b325bd3e62 100644 --- a/tvix/build/src/proto/mod.rs +++ b/tvix/build/src/proto/mod.rs @@ -1,8 +1,7 @@ use std::path::{Path, PathBuf}; use itertools::Itertools; -use tvix_castore::ValidateNodeError; -use tvix_castore::{NamedNode, Node}; +use tvix_castore::DirectoryError; mod grpc_buildservice_wrapper; @@ -20,7 +19,7 @@ pub const FILE_DESCRIPTOR_SET: &[u8] = tonic::include_file_descriptor_set!("tvix #[derive(Debug, thiserror::Error)] pub enum ValidateBuildRequestError { #[error("invalid input node at position {0}: {1}")] - InvalidInputNode(usize, ValidateNodeError), + InvalidInputNode(usize, DirectoryError), #[error("input nodes are not sorted by name")] InputNodesNotSorted, @@ -124,19 +123,21 @@ impl BuildRequest { /// and all restrictions around paths themselves (relative, clean, …) need // to be fulfilled. pub fn validate(&self) -> Result<(), ValidateBuildRequestError> { - // now we can look at the names, and make sure they're sorted. - if !is_sorted( - self.inputs - .iter() - // TODO(flokli) handle conversion errors and store result somewhere - .map(|e| { - Node::try_from(e.node.as_ref().unwrap()) - .unwrap() - .get_name() - .clone() - }), - ) { - Err(ValidateBuildRequestError::InputNodesNotSorted)? + // validate names. Make sure they're sorted + + let mut last_name = bytes::Bytes::new(); + for (i, node) in self.inputs.iter().enumerate() { + // TODO(flokli): store result somewhere + let (name, _node) = node + .clone() + .into_name_and_node() + .map_err(|e| ValidateBuildRequestError::InvalidInputNode(i, e))?; + + if name <= last_name { + return Err(ValidateBuildRequestError::InputNodesNotSorted); + } else { + last_name = name + } } // validate working_dir diff --git a/tvix/castore/src/directoryservice/directory_graph.rs b/tvix/castore/src/directoryservice/directory_graph.rs index 0cf2f71adb06..ffb25de9a854 100644 --- a/tvix/castore/src/directoryservice/directory_graph.rs +++ b/tvix/castore/src/directoryservice/directory_graph.rs @@ -10,7 +10,7 @@ use petgraph::{ use tracing::instrument; use super::order_validator::{LeavesToRootValidator, OrderValidator, RootToLeavesValidator}; -use crate::{B3Digest, Directory, DirectoryNode, NamedNode}; +use crate::{B3Digest, Directory, DirectoryNode}; #[derive(thiserror::Error, Debug)] pub enum Error { @@ -69,12 +69,12 @@ pub struct ValidatedDirectoryGraph { root: Option<NodeIndex>, } -fn check_edge(dir: &DirectoryNode, child: &Directory) -> Result<(), Error> { +fn check_edge(dir: &DirectoryNode, dir_name: &[u8], child: &Directory) -> Result<(), Error> { // Ensure the size specified in the child node matches our records. if dir.size() != child.size() { return Err(Error::ValidationError(format!( "'{}' has wrong size, specified {}, recorded {}", - dir.get_name().as_bstr(), + dir_name.as_bstr(), dir.size(), child.size(), ))); @@ -141,19 +141,19 @@ impl<O: OrderValidator> DirectoryGraph<O> { } // set up edges to all child directories - for subdir in directory.directories() { + for (subdir_name, subdir_node) in directory.directories() { let child_ix = *self .digest_to_node_ix - .entry(subdir.digest().clone()) + .entry(subdir_node.digest().clone()) .or_insert_with(|| self.graph.add_node(None)); let pending_edge_check = match &self.graph[child_ix] { Some(child) => { // child is already available, validate the edge now - check_edge(subdir, child)?; + check_edge(subdir_node, subdir_name, child)?; None } - None => Some(subdir.clone()), // pending validation + None => Some(subdir_node.clone()), // pending validation }; self.graph.add_edge(ix, child_ix, pending_edge_check); } @@ -173,7 +173,9 @@ impl<O: OrderValidator> DirectoryGraph<O> { .expect("edge not found") .take() .expect("edge is already validated"); - check_edge(&edge_weight, &directory)?; + + // TODO: where's the name here? + check_edge(&edge_weight, b"??", &directory)?; } // finally, store the directory information in the node weight @@ -277,11 +279,12 @@ mod tests { lazy_static! { pub static ref BROKEN_PARENT_DIRECTORY: Directory = { let mut dir = Directory::new(); - dir.add(Node::Directory(DirectoryNode::new( + dir.add( "foo".into(), - DIRECTORY_A.digest(), - DIRECTORY_A.size() + 42, // wrong! - ).unwrap())).unwrap(); + Node::Directory(DirectoryNode::new( + DIRECTORY_A.digest(), + DIRECTORY_A.size() + 42, // wrong! + ))).unwrap(); dir }; } diff --git a/tvix/castore/src/directoryservice/grpc.rs b/tvix/castore/src/directoryservice/grpc.rs index 2079514d2b79..9696c5631949 100644 --- a/tvix/castore/src/directoryservice/grpc.rs +++ b/tvix/castore/src/directoryservice/grpc.rs @@ -3,8 +3,7 @@ use std::collections::HashSet; use super::{Directory, DirectoryPutter, DirectoryService}; use crate::composition::{CompositionContext, ServiceBuilder}; use crate::proto::{self, get_directory_request::ByWhat}; -use crate::ValidateDirectoryError; -use crate::{B3Digest, Error}; +use crate::{B3Digest, DirectoryError, Error}; use async_stream::try_stream; use futures::stream::BoxStream; use std::sync::Arc; @@ -154,7 +153,7 @@ where } let directory = directory.try_into() - .map_err(|e: ValidateDirectoryError| Error::StorageError(e.to_string()))?; + .map_err(|e: DirectoryError| Error::StorageError(e.to_string()))?; yield directory; }, diff --git a/tvix/castore/src/directoryservice/order_validator.rs b/tvix/castore/src/directoryservice/order_validator.rs index f03bb7ff9a86..6daf4a624d29 100644 --- a/tvix/castore/src/directoryservice/order_validator.rs +++ b/tvix/castore/src/directoryservice/order_validator.rs @@ -48,9 +48,9 @@ impl RootToLeavesValidator { self.expected_digests.insert(directory.digest()); } - for subdir in directory.directories() { + for (_, subdir_node) in directory.directories() { // Allow the children to appear next - self.expected_digests.insert(subdir.digest().clone()); + self.expected_digests.insert(subdir_node.digest().clone()); } } } @@ -79,11 +79,11 @@ impl OrderValidator for LeavesToRootValidator { fn add_directory(&mut self, directory: &Directory) -> bool { let digest = directory.digest(); - for subdir in directory.directories() { - if !self.allowed_references.contains(subdir.digest()) { + for (_, subdir_node) in directory.directories() { + if !self.allowed_references.contains(subdir_node.digest()) { warn!( directory.digest = %digest, - subdirectory.digest = %subdir.digest(), + subdirectory.digest = %subdir_node.digest(), "unexpected directory reference" ); return false; diff --git a/tvix/castore/src/directoryservice/tests/mod.rs b/tvix/castore/src/directoryservice/tests/mod.rs index 7a15f44a686e..c0a0f22c989f 100644 --- a/tvix/castore/src/directoryservice/tests/mod.rs +++ b/tvix/castore/src/directoryservice/tests/mod.rs @@ -218,14 +218,13 @@ async fn upload_reject_dangling_pointer(directory_service: impl DirectoryService async fn upload_reject_wrong_size(directory_service: impl DirectoryService) { let wrong_parent_directory = { let mut dir = Directory::new(); - dir.add(Node::Directory( - DirectoryNode::new( - "foo".into(), + dir.add( + "foo".into(), + Node::Directory(DirectoryNode::new( DIRECTORY_A.digest(), DIRECTORY_A.size() + 42, // wrong! - ) - .unwrap(), - )) + )), + ) .unwrap(); dir }; diff --git a/tvix/castore/src/directoryservice/traverse.rs b/tvix/castore/src/directoryservice/traverse.rs index ff667e0f65c0..de2ea33c7f9d 100644 --- a/tvix/castore/src/directoryservice/traverse.rs +++ b/tvix/castore/src/directoryservice/traverse.rs @@ -1,4 +1,4 @@ -use crate::{directoryservice::DirectoryService, Error, NamedNode, Node, Path}; +use crate::{directoryservice::DirectoryService, Error, Node, Path}; use tracing::{instrument, warn}; /// This descends from a (root) node to the given (sub)path, returning the Node @@ -37,9 +37,10 @@ where })?; // look for the component in the [Directory]. - // FUTUREWORK: as the nodes() iterator returns in a sorted fashion, we - // could stop as soon as e.name is larger than the search string. - if let Some(child_node) = directory.nodes().find(|n| n.get_name() == component) { + if let Some((_child_name, child_node)) = directory + .nodes() + .find(|(name, _node)| name.as_ref() == component) + { // child node found, update prev_node to that and continue. parent_node = child_node.clone(); } else { @@ -81,21 +82,23 @@ mod tests { handle.close().await.expect("must upload"); // construct the node for DIRECTORY_COMPLICATED - let node_directory_complicated = Node::Directory( - DirectoryNode::new( - "doesntmatter".into(), - DIRECTORY_COMPLICATED.digest(), - DIRECTORY_COMPLICATED.size(), - ) - .unwrap(), - ); + let node_directory_complicated = Node::Directory(DirectoryNode::new( + DIRECTORY_COMPLICATED.digest(), + DIRECTORY_COMPLICATED.size(), + )); // construct the node for DIRECTORY_COMPLICATED - let node_directory_with_keep = - Node::Directory(DIRECTORY_COMPLICATED.directories().next().unwrap().clone()); + let node_directory_with_keep = Node::Directory( + DIRECTORY_COMPLICATED + .directories() + .next() + .unwrap() + .1 + .clone(), + ); // construct the node for the .keep file - let node_file_keep = Node::File(DIRECTORY_WITH_KEEP.files().next().unwrap().clone()); + let node_file_keep = Node::File(DIRECTORY_WITH_KEEP.files().next().unwrap().1.clone()); // traversal to an empty subpath should return the root node. { diff --git a/tvix/castore/src/directoryservice/utils.rs b/tvix/castore/src/directoryservice/utils.rs index f1133663ede7..4b060707d4cc 100644 --- a/tvix/castore/src/directoryservice/utils.rs +++ b/tvix/castore/src/directoryservice/utils.rs @@ -57,16 +57,15 @@ pub fn traverse_directory<'a, DS: DirectoryService + 'static>( // enqueue all child directory digests to the work queue, as // long as they're not part of the worklist or already sent. // This panics if the digest looks invalid, it's supposed to be checked first. - for child_directory_node in current_directory.directories() { - // TODO: propagate error - let child_digest: B3Digest = child_directory_node.digest().clone(); + for (_, child_directory_node) in current_directory.directories() { + let child_digest = child_directory_node.digest(); - if worklist_directory_digests.contains(&child_digest) - || sent_directory_digests.contains(&child_digest) + if worklist_directory_digests.contains(child_digest) + || sent_directory_digests.contains(child_digest) { continue; } - worklist_directory_digests.push_back(child_digest); + worklist_directory_digests.push_back(child_digest.clone()); } yield current_directory; diff --git a/tvix/castore/src/errors.rs b/tvix/castore/src/errors.rs index 083f43036845..e03cfa5a5207 100644 --- a/tvix/castore/src/errors.rs +++ b/tvix/castore/src/errors.rs @@ -13,33 +13,12 @@ pub enum Error { StorageError(String), } -/// Errors that can occur during the validation of [Directory] messages. -#[derive(Debug, thiserror::Error, PartialEq)] -pub enum ValidateDirectoryError { - /// Elements are not in sorted order - #[error("{:?} is not sorted", .0.as_bstr())] - WrongSorting(Vec<u8>), - /// Multiple elements with the same name encountered - #[error("{:?} is a duplicate name", .0.as_bstr())] - DuplicateName(Vec<u8>), - /// Invalid node - #[error("invalid node with name {:?}: {:?}", .0.as_bstr(), .1.to_string())] - InvalidNode(Vec<u8>, ValidateNodeError), - #[error("Total size exceeds u32::MAX")] - SizeOverflow, -} - -/// Errors that occur during Node validation +/// Errors that occur during construction of [crate::Node] #[derive(Debug, thiserror::Error, PartialEq)] pub enum ValidateNodeError { - #[error("No node set")] - NoNodeSet, /// Invalid digest length encountered #[error("invalid digest length: {0}")] InvalidDigestLen(usize), - /// Invalid name encountered - #[error("Invalid name: {}", .0.as_bstr())] - InvalidName(bytes::Bytes), /// Invalid symlink target #[error("Invalid symlink target: {}", .0.as_bstr())] InvalidSymlinkTarget(bytes::Bytes), @@ -53,6 +32,29 @@ impl From<crate::digests::Error> for ValidateNodeError { } } +/// Errors that can occur when populating [crate::Directory] messages, +/// or parsing [crate::proto::Directory] +#[derive(Debug, thiserror::Error, PartialEq)] +pub enum DirectoryError { + /// Multiple elements with the same name encountered + #[error("{:?} is a duplicate name", .0.as_bstr())] + DuplicateName(Vec<u8>), + /// Node failed validation + #[error("invalid node with name {:?}: {:?}", .0.as_bstr(), .1.to_string())] + InvalidNode(bytes::Bytes, ValidateNodeError), + #[error("Total size exceeds u32::MAX")] + SizeOverflow, + /// Invalid name encountered + #[error("Invalid name: {}", .0.as_bstr())] + InvalidName(bytes::Bytes), + /// Elements are not in sorted order. Can only happen on protos + #[error("{:?} is not sorted", .0.as_bstr())] + WrongSorting(bytes::Bytes), + /// This can only happen if there's an unknown node type (on protos) + #[error("No node set")] + NoNodeSet, +} + impl From<JoinError> for Error { fn from(value: JoinError) -> Self { Error::StorageError(value.to_string()) diff --git a/tvix/castore/src/fixtures.rs b/tvix/castore/src/fixtures.rs index 06366f2057ab..b57540d177e2 100644 --- a/tvix/castore/src/fixtures.rs +++ b/tvix/castore/src/fixtures.rs @@ -35,70 +35,83 @@ lazy_static! { // Directories pub static ref DIRECTORY_WITH_KEEP: Directory = { let mut dir = Directory::new(); - dir.add(Node::File(FileNode::new( - b".keep".to_vec().into(), - EMPTY_BLOB_DIGEST.clone(), - 0, - false - ).unwrap())).unwrap(); + dir.add( + ".keep".into(), + Node::File(FileNode::new( + EMPTY_BLOB_DIGEST.clone(), + 0, + false + ))).unwrap(); + dir }; pub static ref DIRECTORY_COMPLICATED: Directory = { let mut dir = Directory::new(); - dir.add(Node::Directory(DirectoryNode::new( - b"keep".to_vec().into(), - DIRECTORY_WITH_KEEP.digest(), - DIRECTORY_WITH_KEEP.size() - ).unwrap())).unwrap(); - dir.add(Node::File(FileNode::new( - b".keep".to_vec().into(), + dir.add( + "keep".into(), + Node::Directory(DirectoryNode::new( + DIRECTORY_WITH_KEEP.digest(), + DIRECTORY_WITH_KEEP.size() + ))).unwrap(); + dir.add( + ".keep".into(), + Node::File(FileNode::new( EMPTY_BLOB_DIGEST.clone(), 0, false - ).unwrap())).unwrap(); - dir.add(Node::Symlink(SymlinkNode::new( - b"aa".to_vec().into(), - b"/nix/store/somewhereelse".to_vec().into() - ).unwrap())).unwrap(); + ))).unwrap(); + dir.add( + "aa".into(), + Node::Symlink(SymlinkNode::new( + b"/nix/store/somewhereelse".to_vec().into() + ).unwrap())).unwrap(); + dir }; pub static ref DIRECTORY_A: Directory = Directory::new(); pub static ref DIRECTORY_B: Directory = { let mut dir = Directory::new(); - dir.add(Node::Directory(DirectoryNode::new( - b"a".to_vec().into(), - DIRECTORY_A.digest(), - DIRECTORY_A.size(), - ).unwrap())).unwrap(); + dir.add( + "a".into(), + Node::Directory(DirectoryNode::new( + DIRECTORY_A.digest(), + DIRECTORY_A.size(), + ))).unwrap(); + dir }; pub static ref DIRECTORY_C: Directory = { let mut dir = Directory::new(); - dir.add(Node::Directory(DirectoryNode::new( - b"a".to_vec().into(), - DIRECTORY_A.digest(), - DIRECTORY_A.size(), - ).unwrap())).unwrap(); - dir.add(Node::Directory(DirectoryNode::new( - b"a'".to_vec().into(), + dir.add( + "a".into(), + Node::Directory(DirectoryNode::new( + DIRECTORY_A.digest(), + DIRECTORY_A.size(), + ))).unwrap(); + dir.add( + "a'".into(), + Node::Directory(DirectoryNode::new( DIRECTORY_A.digest(), DIRECTORY_A.size(), - ).unwrap())).unwrap(); + ))).unwrap(); + dir }; pub static ref DIRECTORY_D: Directory = { let mut dir = Directory::new(); - dir.add(Node::Directory(DirectoryNode::new( - b"a".to_vec().into(), - DIRECTORY_A.digest(), - DIRECTORY_A.size(), - ).unwrap())).unwrap(); - dir.add(Node::Directory(DirectoryNode::new( - b"b".to_vec().into(), - DIRECTORY_B.digest(), - DIRECTORY_B.size(), - ).unwrap())).unwrap(); - dir + dir.add( + "a".into(), + Node::Directory(DirectoryNode::new( + DIRECTORY_A.digest(), + DIRECTORY_A.size(), + ))).unwrap(); + dir.add( + "b".into(), + Node::Directory(DirectoryNode::new( + DIRECTORY_B.digest(), + DIRECTORY_B.size(), + ))).unwrap(); + dir }; } diff --git a/tvix/castore/src/fs/fuse/tests.rs b/tvix/castore/src/fs/fuse/tests.rs index 3dad7d83aedd..30bbfc46235d 100644 --- a/tvix/castore/src/fs/fuse/tests.rs +++ b/tvix/castore/src/fs/fuse/tests.rs @@ -68,15 +68,11 @@ async fn populate_blob_a( root_nodes.insert( BLOB_A_NAME.into(), - Node::File( - FileNode::new( - BLOB_A_NAME.into(), - fixtures::BLOB_A_DIGEST.clone(), - fixtures::BLOB_A.len() as u64, - false, - ) - .unwrap(), - ), + Node::File(FileNode::new( + fixtures::BLOB_A_DIGEST.clone(), + fixtures::BLOB_A.len() as u64, + false, + )), ); } @@ -92,15 +88,11 @@ async fn populate_blob_b( root_nodes.insert( BLOB_B_NAME.into(), - Node::File( - FileNode::new( - BLOB_B_NAME.into(), - fixtures::BLOB_B_DIGEST.clone(), - fixtures::BLOB_B.len() as u64, - false, - ) - .unwrap(), - ), + Node::File(FileNode::new( + fixtures::BLOB_B_DIGEST.clone(), + fixtures::BLOB_B.len() as u64, + false, + )), ); } @@ -120,22 +112,18 @@ async fn populate_blob_helloworld( root_nodes.insert( HELLOWORLD_BLOB_NAME.into(), - Node::File( - FileNode::new( - HELLOWORLD_BLOB_NAME.into(), - fixtures::HELLOWORLD_BLOB_DIGEST.clone(), - fixtures::HELLOWORLD_BLOB_CONTENTS.len() as u64, - true, - ) - .unwrap(), - ), + Node::File(FileNode::new( + fixtures::HELLOWORLD_BLOB_DIGEST.clone(), + fixtures::HELLOWORLD_BLOB_CONTENTS.len() as u64, + true, + )), ); } async fn populate_symlink(root_nodes: &mut BTreeMap<Bytes, Node>) { root_nodes.insert( SYMLINK_NAME.into(), - Node::Symlink(SymlinkNode::new(SYMLINK_NAME.into(), BLOB_A_NAME.into()).unwrap()), + Node::Symlink(SymlinkNode::new(BLOB_A_NAME.into()).unwrap()), ); } @@ -144,9 +132,7 @@ async fn populate_symlink(root_nodes: &mut BTreeMap<Bytes, Node>) { async fn populate_symlink2(root_nodes: &mut BTreeMap<Bytes, Node>) { root_nodes.insert( SYMLINK_NAME2.into(), - Node::Symlink( - SymlinkNode::new(SYMLINK_NAME2.into(), "/nix/store/somewhereelse".into()).unwrap(), - ), + Node::Symlink(SymlinkNode::new("/nix/store/somewhereelse".into()).unwrap()), ); } @@ -170,14 +156,10 @@ async fn populate_directory_with_keep( root_nodes.insert( DIRECTORY_WITH_KEEP_NAME.into(), - Node::Directory( - DirectoryNode::new( - DIRECTORY_WITH_KEEP_NAME.into(), - fixtures::DIRECTORY_WITH_KEEP.digest(), - fixtures::DIRECTORY_WITH_KEEP.size(), - ) - .unwrap(), - ), + Node::Directory(DirectoryNode::new( + fixtures::DIRECTORY_WITH_KEEP.digest(), + fixtures::DIRECTORY_WITH_KEEP.size(), + )), ); } @@ -186,14 +168,10 @@ async fn populate_directory_with_keep( async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<Bytes, Node>) { root_nodes.insert( DIRECTORY_WITH_KEEP_NAME.into(), - Node::Directory( - DirectoryNode::new( - DIRECTORY_WITH_KEEP_NAME.into(), - fixtures::DIRECTORY_WITH_KEEP.digest(), - fixtures::DIRECTORY_WITH_KEEP.size(), - ) - .unwrap(), - ), + Node::Directory(DirectoryNode::new( + fixtures::DIRECTORY_WITH_KEEP.digest(), + fixtures::DIRECTORY_WITH_KEEP.size(), + )), ); } @@ -201,15 +179,11 @@ async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<Byte async fn populate_filenode_without_blob(root_nodes: &mut BTreeMap<Bytes, Node>) { root_nodes.insert( BLOB_A_NAME.into(), - Node::File( - FileNode::new( - BLOB_A_NAME.into(), - fixtures::BLOB_A_DIGEST.clone(), - fixtures::BLOB_A.len() as u64, - false, - ) - .unwrap(), - ), + Node::File(FileNode::new( + fixtures::BLOB_A_DIGEST.clone(), + fixtures::BLOB_A.len() as u64, + false, + )), ); } @@ -239,14 +213,10 @@ async fn populate_directory_complicated( root_nodes.insert( DIRECTORY_COMPLICATED_NAME.into(), - Node::Directory( - DirectoryNode::new( - DIRECTORY_COMPLICATED_NAME.into(), - fixtures::DIRECTORY_COMPLICATED.digest(), - fixtures::DIRECTORY_COMPLICATED.size(), - ) - .unwrap(), - ), + Node::Directory(DirectoryNode::new( + fixtures::DIRECTORY_COMPLICATED.digest(), + fixtures::DIRECTORY_COMPLICATED.size(), + )), ); } diff --git a/tvix/castore/src/fs/inodes.rs b/tvix/castore/src/fs/inodes.rs index b04505c9fa98..e49d28f8e52e 100644 --- a/tvix/castore/src/fs/inodes.rs +++ b/tvix/castore/src/fs/inodes.rs @@ -2,9 +2,7 @@ //! about inodes, which present tvix-castore nodes in a filesystem. use std::time::Duration; -use bytes::Bytes; - -use crate::{B3Digest, NamedNode, Node}; +use crate::{B3Digest, Node}; #[derive(Clone, Debug)] pub enum InodeData { @@ -19,24 +17,19 @@ pub enum InodeData { /// lookup and did fetch the data. #[derive(Clone, Debug)] pub enum DirectoryInodeData { - Sparse(B3Digest, u64), // digest, size - Populated(B3Digest, Vec<(u64, Node)>), // [(child_inode, node)] + Sparse(B3Digest, u64), // digest, size + Populated(B3Digest, Vec<(u64, bytes::Bytes, Node)>), // [(child_inode, name, node)] } impl InodeData { /// Constructs a new InodeData by consuming a [Node]. - /// It splits off the orginal name, so it can be used later. - pub fn from_node(node: &Node) -> (Self, Bytes) { + pub fn from_node(node: &Node) -> Self { match node { - Node::Directory(n) => ( - Self::Directory(DirectoryInodeData::Sparse(n.digest().clone(), n.size())), - n.get_name().clone(), - ), - Node::File(n) => ( - Self::Regular(n.digest().clone(), n.size(), n.executable()), - n.get_name().clone(), - ), - Node::Symlink(n) => (Self::Symlink(n.target().clone()), n.get_name().clone()), + Node::Directory(n) => { + Self::Directory(DirectoryInodeData::Sparse(n.digest().clone(), n.size())) + } + Node::File(n) => Self::Regular(n.digest().clone(), n.size(), n.executable()), + Node::Symlink(n) => Self::Symlink(n.target().clone()), } } diff --git a/tvix/castore/src/fs/mod.rs b/tvix/castore/src/fs/mod.rs index 90c1f371b546..f819adb549f5 100644 --- a/tvix/castore/src/fs/mod.rs +++ b/tvix/castore/src/fs/mod.rs @@ -18,7 +18,7 @@ use self::{ use crate::{ blobservice::{BlobReader, BlobService}, directoryservice::DirectoryService, - {B3Digest, NamedNode, Node}, + {B3Digest, Node}, }; use bstr::ByteVec; use bytes::Bytes; @@ -103,7 +103,7 @@ pub struct TvixStoreFs<BS, DS, RN> { u64, ( Span, - Arc<Mutex<mpsc::Receiver<(usize, Result<Node, crate::Error>)>>>, + Arc<Mutex<mpsc::Receiver<(usize, Result<(Bytes, Node), crate::Error>)>>>, ), >, >, @@ -165,8 +165,12 @@ where /// It is ok if it's a [DirectoryInodeData::Sparse] - in that case, a lookup /// in self.directory_service is performed, and self.inode_tracker is updated with the /// [DirectoryInodeData::Populated]. + #[allow(clippy::type_complexity)] #[instrument(skip(self), err)] - fn get_directory_children(&self, ino: u64) -> io::Result<(B3Digest, Vec<(u64, Node)>)> { + fn get_directory_children( + &self, + ino: u64, + ) -> io::Result<(B3Digest, Vec<(u64, bytes::Bytes, Node)>)> { let data = self.inode_tracker.read().get(ino).unwrap(); match *data { // if it's populated already, return children. @@ -196,13 +200,13 @@ where let children = { let mut inode_tracker = self.inode_tracker.write(); - let children: Vec<(u64, Node)> = directory + let children: Vec<(u64, Bytes, Node)> = directory .nodes() - .map(|child_node| { - let (inode_data, _) = InodeData::from_node(child_node); + .map(|(child_name, child_node)| { + let inode_data = InodeData::from_node(child_node); let child_ino = inode_tracker.put(inode_data); - (child_ino, child_node.clone()) + (child_ino, child_name.to_owned(), child_node.clone()) }) .collect(); @@ -263,12 +267,6 @@ where Ok(None) => Err(io::Error::from_raw_os_error(libc::ENOENT)), // The root node does exist Ok(Some(root_node)) => { - // The name must match what's passed in the lookup, otherwise this is also a ENOENT. - if root_node.get_name() != name.to_bytes() { - debug!(root_node.name=?root_node.get_name(), found_node.name=%name.to_string_lossy(), "node name mismatch"); - return Err(io::Error::from_raw_os_error(libc::ENOENT)); - } - // Let's check if someone else beat us to updating the inode tracker and // root_nodes map. This avoids locking inode_tracker for writing. if let Some(ino) = self.root_nodes.read().get(name.to_bytes()) { @@ -285,9 +283,9 @@ where // insert the (sparse) inode data and register in // self.root_nodes. - let (inode_data, name) = InodeData::from_node(&root_node); + let inode_data = InodeData::from_node(&root_node); let ino = inode_tracker.put(inode_data.clone()); - root_nodes.insert(name, ino); + root_nodes.insert(bytes::Bytes::copy_from_slice(name.to_bytes()), ino); Ok((ino, Arc::new(inode_data))) } @@ -362,7 +360,7 @@ where // Search for that name in the list of children and return the FileAttrs. // in the children, find the one with the desired name. - if let Some((child_ino, _)) = children.iter().find(|e| e.1.get_name() == name.to_bytes()) { + if let Some((child_ino, _, _)) = children.iter().find(|(_, n, _)| n == name.to_bytes()) { // lookup the child [InodeData] in [self.inode_tracker]. // We know the inodes for children have already been allocated. let child_inode_data = self.inode_tracker.read().get(*child_ino).unwrap(); @@ -398,8 +396,8 @@ where self.tokio_handle.spawn( async move { let mut stream = root_nodes_provider.list().enumerate(); - while let Some(node) = stream.next().await { - if tx.send(node).await.is_err() { + while let Some(e) = stream.next().await { + if tx.send(e).await.is_err() { // If we get a send error, it means the sync code // doesn't want any more entries. break; @@ -461,12 +459,12 @@ where .map_err(|_| crate::Error::StorageError("mutex poisoned".into()))?; while let Some((i, n)) = rx.blocking_recv() { - let root_node = n.map_err(|e| { + let (name, node) = n.map_err(|e| { warn!("failed to retrieve root node: {}", e); io::Error::from_raw_os_error(libc::EIO) })?; - let (inode_data, name) = InodeData::from_node(&root_node); + let inode_data = InodeData::from_node(&node); // obtain the inode, or allocate a new one. let ino = self.get_inode_for_root_name(&name).unwrap_or_else(|| { @@ -495,15 +493,17 @@ where let (parent_digest, children) = self.get_directory_children(inode)?; Span::current().record("directory.digest", parent_digest.to_string()); - for (i, (ino, child_node)) in children.into_iter().skip(offset as usize).enumerate() { - let (inode_data, name) = InodeData::from_node(&child_node); + for (i, (ino, child_name, child_node)) in + children.into_iter().skip(offset as usize).enumerate() + { + let inode_data = InodeData::from_node(&child_node); // the second parameter will become the "offset" parameter on the next call. let written = add_entry(fuse_backend_rs::api::filesystem::DirEntry { ino, offset: offset + (i as u64) + 1, type_: inode_data.as_fuse_type(), - name: &name, + name: &child_name, })?; // If the buffer is full, add_entry will return `Ok(0)`. if written == 0 { @@ -548,12 +548,12 @@ where .map_err(|_| crate::Error::StorageError("mutex poisoned".into()))?; while let Some((i, n)) = rx.blocking_recv() { - let root_node = n.map_err(|e| { + let (name, node) = n.map_err(|e| { warn!("failed to retrieve root node: {}", e); io::Error::from_raw_os_error(libc::EPERM) })?; - let (inode_data, name) = InodeData::from_node(&root_node); + let inode_data = InodeData::from_node(&node); // obtain the inode, or allocate a new one. let ino = self.get_inode_for_root_name(&name).unwrap_or_else(|| { @@ -585,8 +585,8 @@ where let (parent_digest, children) = self.get_directory_children(inode)?; Span::current().record("directory.digest", parent_digest.to_string()); - for (i, (ino, child_node)) in children.into_iter().skip(offset as usize).enumerate() { - let (inode_data, name) = InodeData::from_node(&child_node); + for (i, (ino, name, child_node)) in children.into_iter().skip(offset as usize).enumerate() { + let inode_data = InodeData::from_node(&child_node); // the second parameter will become the "offset" parameter on the next call. let written = add_entry( diff --git a/tvix/castore/src/fs/root_nodes.rs b/tvix/castore/src/fs/root_nodes.rs index c237142c8d91..62f0b62196a6 100644 --- a/tvix/castore/src/fs/root_nodes.rs +++ b/tvix/castore/src/fs/root_nodes.rs @@ -12,9 +12,10 @@ pub trait RootNodes: Send + Sync { /// directory of the filesystem. async fn get_by_basename(&self, name: &[u8]) -> Result<Option<Node>, Error>; - /// Lists all root CA nodes in the filesystem. An error can be returned - /// in case listing is not allowed - fn list(&self) -> BoxStream<Result<Node, Error>>; + /// Lists all root CA nodes in the filesystem, as a tuple of (base)name + /// and Node. + /// An error can be returned in case listing is not allowed. + fn list(&self) -> BoxStream<Result<(bytes::Bytes, Node), Error>>; } #[async_trait] @@ -28,9 +29,11 @@ where Ok(self.as_ref().get(name).cloned()) } - fn list(&self) -> BoxStream<Result<Node, Error>> { + fn list(&self) -> BoxStream<Result<(bytes::Bytes, Node), Error>> { Box::pin(tokio_stream::iter( - self.as_ref().iter().map(|(_, v)| Ok(v.clone())), + self.as_ref() + .iter() + .map(|(name, node)| Ok((name.to_owned(), node.to_owned()))), )) } } diff --git a/tvix/castore/src/import/mod.rs b/tvix/castore/src/import/mod.rs index b40dfd74e9e4..408166beffb4 100644 --- a/tvix/castore/src/import/mod.rs +++ b/tvix/castore/src/import/mod.rs @@ -59,14 +59,6 @@ where // we break the loop manually. .expect("Tvix bug: unexpected end of stream")?; - let name = entry - .path() - .file_name() - // If this is the root node, it will have an empty name. - .unwrap_or_default() - .to_owned() - .into(); - let node = match &mut entry { IngestionEntry::Dir { .. } => { // If the entry is a directory, we traversed all its children (and @@ -92,36 +84,22 @@ where IngestionError::UploadDirectoryError(entry.path().to_owned(), e) })?; - Node::Directory( - DirectoryNode::new(name, directory_digest, directory_size).map_err(|e| { - IngestionError::UploadDirectoryError( - entry.path().to_owned(), - crate::Error::StorageError(e.to_string()), - ) - })?, - ) + Node::Directory(DirectoryNode::new(directory_digest, directory_size)) } - IngestionEntry::Symlink { ref target, .. } => Node::Symlink( - SymlinkNode::new(name, target.to_owned().into()).map_err(|e| { + IngestionEntry::Symlink { ref target, .. } => { + Node::Symlink(SymlinkNode::new(target.to_owned().into()).map_err(|e| { IngestionError::UploadDirectoryError( entry.path().to_owned(), crate::Error::StorageError(e.to_string()), ) - })?, - ), + })?) + } IngestionEntry::Regular { size, executable, digest, .. - } => Node::File( - FileNode::new(name, digest.clone(), *size, *executable).map_err(|e| { - IngestionError::UploadDirectoryError( - entry.path().to_owned(), - crate::Error::StorageError(e.to_string()), - ) - })?, - ), + } => Node::File(FileNode::new(digest.clone(), *size, *executable)), }; let parent = entry @@ -132,11 +110,19 @@ where if parent == crate::Path::ROOT { break node; } else { + let name = entry + .path() + .file_name() + // If this is the root node, it will have an empty name. + .unwrap_or_default() + .to_owned() + .into(); + // record node in parent directory, creating a new [Directory] if not there yet. directories .entry(parent.to_owned()) .or_default() - .add(node) + .add(name, node) .map_err(|e| { IngestionError::UploadDirectoryError( entry.path().to_owned(), @@ -227,18 +213,18 @@ mod test { executable: true, digest: DUMMY_DIGEST.clone(), }], - Node::File(FileNode::new("foo".into(), DUMMY_DIGEST.clone(), 42, true).unwrap()) + Node::File(FileNode::new(DUMMY_DIGEST.clone(), 42, true)) )] #[case::single_symlink(vec![IngestionEntry::Symlink { path: "foo".parse().unwrap(), target: b"blub".into(), }], - Node::Symlink(SymlinkNode::new("foo".into(), "blub".into()).unwrap()) + Node::Symlink(SymlinkNode::new("blub".into()).unwrap()) )] #[case::single_dir(vec![IngestionEntry::Dir { path: "foo".parse().unwrap(), }], - Node::Directory(DirectoryNode::new("foo".into(), Directory::default().digest(), Directory::default().size()).unwrap()) + Node::Directory(DirectoryNode::new(Directory::default().digest(), Directory::default().size())) )] #[case::dir_with_keep(vec![ IngestionEntry::Regular { @@ -251,7 +237,7 @@ mod test { path: "foo".parse().unwrap(), }, ], - Node::Directory(DirectoryNode::new("foo".into(), DIRECTORY_WITH_KEEP.digest(), DIRECTORY_WITH_KEEP.size()).unwrap()) + Node::Directory(DirectoryNode::new(DIRECTORY_WITH_KEEP.digest(), DIRECTORY_WITH_KEEP.size())) )] /// This is intentionally a bit unsorted, though it still satisfies all /// requirements we have on the order of elements in the stream. @@ -279,7 +265,7 @@ mod test { path: "blub".parse().unwrap(), }, ], - Node::Directory(DirectoryNode::new("blub".into(), DIRECTORY_COMPLICATED.digest(), DIRECTORY_COMPLICATED.size()).unwrap()) + Node::Directory(DirectoryNode::new(DIRECTORY_COMPLICATED.digest(), DIRECTORY_COMPLICATED.size())) )] #[tokio::test] async fn test_ingestion(#[case] entries: Vec<IngestionEntry>, #[case] exp_root_node: Node) { diff --git a/tvix/castore/src/lib.rs b/tvix/castore/src/lib.rs index e09926afe70d..f56ddf827722 100644 --- a/tvix/castore/src/lib.rs +++ b/tvix/castore/src/lib.rs @@ -21,7 +21,7 @@ pub mod proto; pub mod tonic; pub use digests::{B3Digest, B3_LEN}; -pub use errors::{Error, ValidateDirectoryError, ValidateNodeError}; +pub use errors::{DirectoryError, Error, ValidateNodeError}; pub use hashing_reader::{B3HashingReader, HashingReader}; #[cfg(test)] diff --git a/tvix/castore/src/nodes/directory.rs b/tvix/castore/src/nodes/directory.rs index 1752681c89c3..d58b7822a434 100644 --- a/tvix/castore/src/nodes/directory.rs +++ b/tvix/castore/src/nodes/directory.rs @@ -1,23 +1,25 @@ -use crate::{ - proto, B3Digest, DirectoryNode, FileNode, NamedNode, Node, SymlinkNode, ValidateDirectoryError, - ValidateNodeError, -}; - -/// A Directory can contain Directory, File or Symlink nodes. -/// Each of these nodes have a name attribute, which is the basename in that -/// directory and node type specific attributes. -/// While a Node by itself may have any name, the names of Directory entries: +use std::collections::BTreeMap; + +use crate::{errors::DirectoryError, proto, B3Digest, DirectoryNode, FileNode, Node, SymlinkNode}; + +/// A Directory contains nodes, which can be Directory, File or Symlink nodes. +/// It attached names to these nodes, which is the basename in that directory. +/// These names: /// - MUST not contain slashes or null bytes /// - MUST not be '.' or '..' /// - MUST be unique across all three lists #[derive(Default, Debug, Clone, PartialEq, Eq)] pub struct Directory { - nodes: Vec<Node>, + nodes: BTreeMap<bytes::Bytes, Node>, } impl Directory { + /// Constructs a new, empty Directory. + /// FUTUREWORK: provide a constructor from an interator of (sorted) names and nodes. pub fn new() -> Self { - Directory { nodes: vec![] } + Directory { + nodes: BTreeMap::new(), + } } /// The size of a directory is the number of all regular and symlink elements, @@ -25,7 +27,7 @@ impl Directory { pub fn size(&self) -> u64 { // It's impossible to create a Directory where the size overflows, because we // check before every add() that the size won't overflow. - (self.nodes.len() as u64) + self.directories().map(|e| e.size()).sum::<u64>() + (self.nodes.len() as u64) + self.directories().map(|(_name, dn)| dn.size()).sum::<u64>() } /// Calculates the digest of a Directory, which is the blake3 hash of a @@ -35,62 +37,66 @@ impl Directory { } /// Allows iterating over all nodes (directories, files and symlinks) - /// ordered by their name. - pub fn nodes(&self) -> impl Iterator<Item = &Node> + Send + Sync + '_ { + /// For each, it returns a tuple of its name and node. + /// The elements are sorted by their names. + pub fn nodes(&self) -> impl Iterator<Item = (&bytes::Bytes, &Node)> + Send + Sync + '_ { self.nodes.iter() } - /// Allows iterating over the FileNode entries of this directory - /// ordered by their name - pub fn files(&self) -> impl Iterator<Item = &FileNode> + Send + Sync + '_ { - self.nodes.iter().filter_map(|node| match node { - Node::File(n) => Some(n), + /// Allows iterating over the FileNode entries of this directory. + /// For each, it returns a tuple of its name and node. + /// The elements are sorted by their names. + pub fn files(&self) -> impl Iterator<Item = (&bytes::Bytes, &FileNode)> + Send + Sync + '_ { + self.nodes.iter().filter_map(|(name, node)| match node { + Node::File(n) => Some((name, n)), _ => None, }) } - /// Allows iterating over the subdirectories of this directory - /// ordered by their name - pub fn directories(&self) -> impl Iterator<Item = &DirectoryNode> + Send + Sync + '_ { - self.nodes.iter().filter_map(|node| match node { - Node::Directory(n) => Some(n), + /// Allows iterating over the DirectoryNode entries (subdirectories) of this directory. + /// For each, it returns a tuple of its name and node. + /// The elements are sorted by their names. + pub fn directories( + &self, + ) -> impl Iterator<Item = (&bytes::Bytes, &DirectoryNode)> + Send + Sync + '_ { + self.nodes.iter().filter_map(|(name, node)| match node { + Node::Directory(n) => Some((name, n)), _ => None, }) } /// Allows iterating over the SymlinkNode entries of this directory - /// ordered by their name - pub fn symlinks(&self) -> impl Iterator<Item = &SymlinkNode> + Send + Sync + '_ { - self.nodes.iter().filter_map(|node| match node { - Node::Symlink(n) => Some(n), + /// For each, it returns a tuple of its name and node. + /// The elements are sorted by their names. + pub fn symlinks( + &self, + ) -> impl Iterator<Item = (&bytes::Bytes, &SymlinkNode)> + Send + Sync + '_ { + self.nodes.iter().filter_map(|(name, node)| match node { + Node::Symlink(n) => Some((name, n)), _ => None, }) } /// Checks a Node name for validity as a directory entry /// We disallow slashes, null bytes, '.', '..' and the empty string. - pub(crate) fn validate_node_name(name: &[u8]) -> Result<(), ValidateNodeError> { - if name.is_empty() + pub(crate) fn is_valid_name(name: &[u8]) -> bool { + !(name.is_empty() || name == b".." || name == b"." || name.contains(&0x00) - || name.contains(&b'/') - { - Err(ValidateNodeError::InvalidName(name.to_owned().into())) - } else { - Ok(()) - } + || name.contains(&b'/')) } - /// Adds the specified [Node] to the [Directory], preserving sorted entries. + /// Adds the specified [Node] to the [Directory] with a given name. /// /// Inserting an element that already exists with the same name in the directory will yield an /// error. - /// Inserting an element will validate that its name fulfills the stricter requirements for - /// directory entries and yield an error if it is not. - pub fn add(&mut self, node: Node) -> Result<(), ValidateDirectoryError> { - Self::validate_node_name(node.get_name()) - .map_err(|e| ValidateDirectoryError::InvalidNode(node.get_name().clone().into(), e))?; + /// Inserting an element will validate that its name fulfills the + /// requirements for directory entries and yield an error if it is not. + pub fn add(&mut self, name: bytes::Bytes, node: Node) -> Result<(), DirectoryError> { + if !Self::is_valid_name(&name) { + return Err(DirectoryError::InvalidName(name)); + } // Check that the even after adding this new directory entry, the size calculation will not // overflow @@ -104,24 +110,17 @@ impl Directory { _ => 0, }, ]) - .ok_or(ValidateDirectoryError::SizeOverflow)?; - - // This assumes the [Directory] is sorted, since we don't allow accessing the nodes list - // directly and all previous inserts should have been in-order - let pos = match self - .nodes - .binary_search_by_key(&node.get_name(), |n| n.get_name()) - { - Err(pos) => pos, // There is no node with this name; good! - Ok(_) => { - return Err(ValidateDirectoryError::DuplicateName( - node.get_name().to_vec(), - )) - } - }; + .ok_or(DirectoryError::SizeOverflow)?; - self.nodes.insert(pos, node); - Ok(()) + match self.nodes.entry(name) { + std::collections::btree_map::Entry::Vacant(e) => { + e.insert(node); + Ok(()) + } + std::collections::btree_map::Entry::Occupied(occupied) => { + Err(DirectoryError::DuplicateName(occupied.key().to_vec())) + } + } } } @@ -133,49 +132,58 @@ fn checked_sum(iter: impl IntoIterator<Item = u64>) -> Option<u64> { mod test { use super::{Directory, DirectoryNode, FileNode, Node, SymlinkNode}; use crate::fixtures::DUMMY_DIGEST; - use crate::ValidateDirectoryError; + use crate::DirectoryError; #[test] fn add_nodes_to_directory() { let mut d = Directory::new(); - d.add(Node::Directory( - DirectoryNode::new("b".into(), DUMMY_DIGEST.clone(), 1).unwrap(), - )) + d.add( + "b".into(), + Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)), + ) .unwrap(); - d.add(Node::Directory( - DirectoryNode::new("a".into(), DUMMY_DIGEST.clone(), 1).unwrap(), - )) + d.add( + "a".into(), + Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)), + ) .unwrap(); - d.add(Node::Directory( - DirectoryNode::new("z".into(), DUMMY_DIGEST.clone(), 1).unwrap(), - )) + d.add( + "z".into(), + Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)), + ) .unwrap(); - d.add(Node::File( - FileNode::new("f".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(), - )) + d.add( + "f".into(), + Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)), + ) .unwrap(); - d.add(Node::File( - FileNode::new("c".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(), - )) + d.add( + "c".into(), + Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)), + ) .unwrap(); - d.add(Node::File( - FileNode::new("g".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(), - )) + d.add( + "g".into(), + Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)), + ) .unwrap(); - d.add(Node::Symlink( - SymlinkNode::new("t".into(), "a".into()).unwrap(), - )) + d.add( + "t".into(), + Node::Symlink(SymlinkNode::new("a".into()).unwrap()), + ) .unwrap(); - d.add(Node::Symlink( - SymlinkNode::new("o".into(), "a".into()).unwrap(), - )) + d.add( + "o".into(), + Node::Symlink(SymlinkNode::new("a".into()).unwrap()), + ) .unwrap(); - d.add(Node::Symlink( - SymlinkNode::new("e".into(), "a".into()).unwrap(), - )) + d.add( + "e".into(), + Node::Symlink(SymlinkNode::new("a".into()).unwrap()), + ) .unwrap(); // Convert to proto struct and back to ensure we are not generating any invalid structures @@ -188,10 +196,11 @@ mod test { let mut d = Directory::new(); assert_eq!( - d.add(Node::Directory( - DirectoryNode::new("foo".into(), DUMMY_DIGEST.clone(), u64::MAX).unwrap(), - )), - Err(ValidateDirectoryError::SizeOverflow) + d.add( + "foo".into(), + Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), u64::MAX)) + ), + Err(DirectoryError::SizeOverflow) ); } @@ -199,16 +208,18 @@ mod test { fn add_duplicate_node_to_directory() { let mut d = Directory::new(); - d.add(Node::Directory( - DirectoryNode::new("a".into(), DUMMY_DIGEST.clone(), 1).unwrap(), - )) + d.add( + "a".into(), + Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)), + ) .unwrap(); assert_eq!( format!( "{}", - d.add(Node::File( - FileNode::new("a".into(), DUMMY_DIGEST.clone(), 1, true).unwrap(), - )) + d.add( + "a".into(), + Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)) + ) .expect_err("adding duplicate dir entry must fail") ), "\"a\" is a duplicate name" @@ -220,13 +231,10 @@ mod test { async fn directory_reject_invalid_name() { let mut dir = Directory::new(); assert!( - dir.add(Node::Symlink( - SymlinkNode::new( - "".into(), // wrong! can not be added to directory - "doesntmatter".into(), - ) - .unwrap() - )) + dir.add( + "".into(), // wrong! can not be added to directory + Node::Symlink(SymlinkNode::new("doesntmatter".into(),).unwrap()) + ) .is_err(), "invalid symlink entry be rejected" ); diff --git a/tvix/castore/src/nodes/directory_node.rs b/tvix/castore/src/nodes/directory_node.rs index afbae1695c63..e6e6312a7a0c 100644 --- a/tvix/castore/src/nodes/directory_node.rs +++ b/tvix/castore/src/nodes/directory_node.rs @@ -1,13 +1,11 @@ -use crate::{B3Digest, NamedNode, ValidateNodeError}; +use crate::B3Digest; /// A DirectoryNode is a pointer to a [Directory], by its [Directory::digest]. -/// It also gives it a `name` and `size`. +/// It also records a`size`. /// Such a node is either an element in the [Directory] it itself is contained in, /// or a standalone root node./ #[derive(Debug, Clone, PartialEq, Eq)] pub struct DirectoryNode { - /// The (base)name of the directory - name: bytes::Bytes, /// The blake3 hash of a Directory message, serialized in protobuf canonical form. digest: B3Digest, /// Number of child elements in the Directory referred to by `digest`. @@ -23,8 +21,8 @@ pub struct DirectoryNode { } impl DirectoryNode { - pub fn new(name: bytes::Bytes, digest: B3Digest, size: u64) -> Result<Self, ValidateNodeError> { - Ok(Self { name, digest, size }) + pub fn new(digest: B3Digest, size: u64) -> Self { + Self { digest, size } } pub fn digest(&self) -> &B3Digest { @@ -34,31 +32,4 @@ impl DirectoryNode { pub fn size(&self) -> u64 { self.size } - - pub fn rename(self, name: bytes::Bytes) -> Self { - Self { name, ..self } - } -} - -impl PartialOrd for DirectoryNode { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for DirectoryNode { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -impl NamedNode for &DirectoryNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } -} -impl NamedNode for DirectoryNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } } diff --git a/tvix/castore/src/nodes/file_node.rs b/tvix/castore/src/nodes/file_node.rs index 5dd677c17b49..73abe21e59b4 100644 --- a/tvix/castore/src/nodes/file_node.rs +++ b/tvix/castore/src/nodes/file_node.rs @@ -1,11 +1,8 @@ -use crate::{B3Digest, NamedNode, ValidateNodeError}; +use crate::B3Digest; /// A FileNode represents a regular or executable file in a Directory or at the root. #[derive(Debug, Clone, PartialEq, Eq)] pub struct FileNode { - /// The (base)name of the file - name: bytes::Bytes, - /// The blake3 digest of the file contents digest: B3Digest, @@ -17,18 +14,12 @@ pub struct FileNode { } impl FileNode { - pub fn new( - name: bytes::Bytes, - digest: B3Digest, - size: u64, - executable: bool, - ) -> Result<Self, ValidateNodeError> { - Ok(Self { - name, + pub fn new(digest: B3Digest, size: u64, executable: bool) -> Self { + Self { digest, size, executable, - }) + } } pub fn digest(&self) -> &B3Digest { @@ -42,31 +33,4 @@ impl FileNode { pub fn executable(&self) -> bool { self.executable } - - pub fn rename(self, name: bytes::Bytes) -> Self { - Self { name, ..self } - } -} - -impl PartialOrd for FileNode { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for FileNode { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -impl NamedNode for &FileNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } -} -impl NamedNode for FileNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } } diff --git a/tvix/castore/src/nodes/mod.rs b/tvix/castore/src/nodes/mod.rs index bb8e14cf856e..47f39ae8f94a 100644 --- a/tvix/castore/src/nodes/mod.rs +++ b/tvix/castore/src/nodes/mod.rs @@ -4,66 +4,17 @@ mod directory_node; mod file_node; mod symlink_node; -use bytes::Bytes; pub use directory::Directory; pub use directory_node::DirectoryNode; pub use file_node::FileNode; pub use symlink_node::SymlinkNode; /// A Node is either a [DirectoryNode], [FileNode] or [SymlinkNode]. -/// While a Node by itself may have any name, only those matching specific requirements -/// can can be added as entries to a [Directory] (see the documentation on [Directory] for details). +/// Nodes themselves don't have names, what gives them names is either them +/// being inside a [Directory], or a root node with its own name attached to it. #[derive(Debug, Clone, PartialEq, Eq)] pub enum Node { Directory(DirectoryNode), File(FileNode), Symlink(SymlinkNode), } - -impl Node { - /// Returns the node with a new name. - pub fn rename(self, name: Bytes) -> Self { - match self { - Node::Directory(n) => Node::Directory(n.rename(name)), - Node::File(n) => Node::File(n.rename(name)), - Node::Symlink(n) => Node::Symlink(n.rename(name)), - } - } -} - -impl PartialOrd for Node { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for Node { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -/// NamedNode is implemented for [FileNode], [DirectoryNode] and [SymlinkNode] -/// and [Node], so we can ask all of them for the name easily. -pub trait NamedNode { - fn get_name(&self) -> &Bytes; -} - -impl NamedNode for &Node { - fn get_name(&self) -> &Bytes { - match self { - Node::File(node_file) => node_file.get_name(), - Node::Directory(node_directory) => node_directory.get_name(), - Node::Symlink(node_symlink) => node_symlink.get_name(), - } - } -} -impl NamedNode for Node { - fn get_name(&self) -> &Bytes { - match self { - Node::File(node_file) => node_file.get_name(), - Node::Directory(node_directory) => node_directory.get_name(), - Node::Symlink(node_symlink) => node_symlink.get_name(), - } - } -} diff --git a/tvix/castore/src/nodes/symlink_node.rs b/tvix/castore/src/nodes/symlink_node.rs index 2fac27231a85..6b9df96a5dd3 100644 --- a/tvix/castore/src/nodes/symlink_node.rs +++ b/tvix/castore/src/nodes/symlink_node.rs @@ -1,50 +1,21 @@ -use crate::{NamedNode, ValidateNodeError}; +use crate::ValidateNodeError; /// A SymlinkNode represents a symbolic link in a Directory or at the root. #[derive(Debug, Clone, PartialEq, Eq)] pub struct SymlinkNode { - /// The (base)name of the symlink - name: bytes::Bytes, /// The target of the symlink. target: bytes::Bytes, } impl SymlinkNode { - pub fn new(name: bytes::Bytes, target: bytes::Bytes) -> Result<Self, ValidateNodeError> { + pub fn new(target: bytes::Bytes) -> Result<Self, ValidateNodeError> { if target.is_empty() || target.contains(&b'\0') { return Err(ValidateNodeError::InvalidSymlinkTarget(target)); } - Ok(Self { name, target }) + Ok(Self { target }) } pub fn target(&self) -> &bytes::Bytes { &self.target } - - pub(crate) fn rename(self, name: bytes::Bytes) -> Self { - Self { name, ..self } - } -} - -impl PartialOrd for SymlinkNode { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for SymlinkNode { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.get_name().cmp(other.get_name()) - } -} - -impl NamedNode for &SymlinkNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } -} -impl NamedNode for SymlinkNode { - fn get_name(&self) -> &bytes::Bytes { - &self.name - } } diff --git a/tvix/castore/src/path.rs b/tvix/castore/src/path.rs index fb340478c08c..8a55e9f5a1d3 100644 --- a/tvix/castore/src/path.rs +++ b/tvix/castore/src/path.rs @@ -38,7 +38,9 @@ impl Path { if !bytes.is_empty() { // Ensure all components are valid castore node names. for component in bytes.split_str(b"/") { - Directory::validate_node_name(component).ok()?; + if !Directory::is_valid_name(component) { + return None; + } } } @@ -211,7 +213,9 @@ impl PathBuf { /// Adjoins `name` to self. pub fn try_push(&mut self, name: &[u8]) -> Result<(), std::io::Error> { - Directory::validate_node_name(name).map_err(|_| std::io::ErrorKind::InvalidData)?; + if !Directory::is_valid_name(name) { + return Err(std::io::ErrorKind::InvalidData.into()); + } if !self.inner.is_empty() { self.inner.push(b'/'); diff --git a/tvix/castore/src/proto/grpc_directoryservice_wrapper.rs b/tvix/castore/src/proto/grpc_directoryservice_wrapper.rs index e65145509184..62fdb34a25a0 100644 --- a/tvix/castore/src/proto/grpc_directoryservice_wrapper.rs +++ b/tvix/castore/src/proto/grpc_directoryservice_wrapper.rs @@ -1,5 +1,5 @@ use crate::directoryservice::{DirectoryGraph, DirectoryService, LeavesToRootValidator}; -use crate::{proto, B3Digest, ValidateDirectoryError}; +use crate::{proto, B3Digest, DirectoryError}; use futures::stream::BoxStream; use futures::TryStreamExt; use std::ops::Deref; @@ -84,7 +84,7 @@ where let mut validator = DirectoryGraph::<LeavesToRootValidator>::default(); while let Some(directory) = req_inner.message().await? { validator - .add(directory.try_into().map_err(|e: ValidateDirectoryError| { + .add(directory.try_into().map_err(|e: DirectoryError| { tonic::Status::new(tonic::Code::Internal, e.to_string()) })?) .map_err(|e| tonic::Status::new(tonic::Code::Internal, e.to_string()))?; diff --git a/tvix/castore/src/proto/mod.rs b/tvix/castore/src/proto/mod.rs index 8f8f3c08347a..7cb1cecd27fa 100644 --- a/tvix/castore/src/proto/mod.rs +++ b/tvix/castore/src/proto/mod.rs @@ -5,12 +5,10 @@ use prost::Message; mod grpc_blobservice_wrapper; mod grpc_directoryservice_wrapper; +use crate::{B3Digest, DirectoryError}; pub use grpc_blobservice_wrapper::GRPCBlobServiceWrapper; pub use grpc_directoryservice_wrapper::GRPCDirectoryServiceWrapper; -use crate::NamedNode; -use crate::{B3Digest, ValidateDirectoryError, ValidateNodeError}; - tonic::include_proto!("tvix.castore.v1"); #[cfg(feature = "tonic-reflection")] @@ -71,168 +69,83 @@ impl Directory { /// Accepts a name, and a mutable reference to the previous name. /// If the passed name is larger than the previous one, the reference is updated. /// If it's not, an error is returned. -fn update_if_lt_prev<'n>( - prev_name: &mut &'n [u8], - name: &'n [u8], -) -> Result<(), ValidateDirectoryError> { +fn update_if_lt_prev<'n>(prev_name: &mut &'n [u8], name: &'n [u8]) -> Result<(), DirectoryError> { if *name < **prev_name { - return Err(ValidateDirectoryError::WrongSorting(name.to_vec())); + return Err(DirectoryError::WrongSorting(bytes::Bytes::copy_from_slice( + name, + ))); } *prev_name = name; Ok(()) } -impl TryFrom<&node::Node> for crate::Node { - type Error = ValidateNodeError; - - fn try_from(node: &node::Node) -> Result<crate::Node, ValidateNodeError> { - Ok(match node { - node::Node::Directory(n) => crate::Node::Directory(n.try_into()?), - node::Node::File(n) => crate::Node::File(n.try_into()?), - node::Node::Symlink(n) => crate::Node::Symlink(n.try_into()?), - }) - } -} - -impl TryFrom<&Node> for crate::Node { - type Error = ValidateNodeError; - - fn try_from(node: &Node) -> Result<crate::Node, ValidateNodeError> { - match node { - Node { node: None } => Err(ValidateNodeError::NoNodeSet), - Node { node: Some(node) } => node.try_into(), - } - } -} - -impl TryFrom<&DirectoryNode> for crate::DirectoryNode { - type Error = ValidateNodeError; - - fn try_from(node: &DirectoryNode) -> Result<crate::DirectoryNode, ValidateNodeError> { - crate::DirectoryNode::new( - node.name.clone(), - node.digest.clone().try_into()?, - node.size, - ) - } -} - -impl TryFrom<&SymlinkNode> for crate::SymlinkNode { - type Error = ValidateNodeError; - - fn try_from(node: &SymlinkNode) -> Result<crate::SymlinkNode, ValidateNodeError> { - crate::SymlinkNode::new(node.name.clone(), node.target.clone()) - } -} - -impl TryFrom<&FileNode> for crate::FileNode { - type Error = ValidateNodeError; - - fn try_from(node: &FileNode) -> Result<crate::FileNode, ValidateNodeError> { - crate::FileNode::new( - node.name.clone(), - node.digest.clone().try_into()?, - node.size, - node.executable, - ) - } -} - +// TODO: add a proper owned version here that moves various fields impl TryFrom<Directory> for crate::Directory { - type Error = ValidateDirectoryError; + type Error = DirectoryError; - fn try_from(directory: Directory) -> Result<crate::Directory, ValidateDirectoryError> { - (&directory).try_into() + fn try_from(value: Directory) -> Result<Self, Self::Error> { + (&value).try_into() } } impl TryFrom<&Directory> for crate::Directory { - type Error = ValidateDirectoryError; + type Error = DirectoryError; - fn try_from(directory: &Directory) -> Result<crate::Directory, ValidateDirectoryError> { + fn try_from(directory: &Directory) -> Result<crate::Directory, DirectoryError> { let mut dir = crate::Directory::new(); + let mut last_file_name: &[u8] = b""; + + // TODO: this currently loops over all three types separately, rather + // than peeking and picking from where would be the next. + for file in directory.files.iter().map(move |file| { update_if_lt_prev(&mut last_file_name, &file.name).map(|()| file.clone()) }) { let file = file?; - dir.add(crate::Node::File((&file).try_into().map_err(|e| { - ValidateDirectoryError::InvalidNode(file.name.into(), e) - })?))?; + + let (name, node) = Node { + node: Some(node::Node::File(file)), + } + .into_name_and_node()?; + + dir.add(name, node)?; } let mut last_directory_name: &[u8] = b""; for directory in directory.directories.iter().map(move |directory| { update_if_lt_prev(&mut last_directory_name, &directory.name).map(|()| directory.clone()) }) { let directory = directory?; - dir.add(crate::Node::Directory((&directory).try_into().map_err( - |e| ValidateDirectoryError::InvalidNode(directory.name.into(), e), - )?))?; + + let (name, node) = Node { + node: Some(node::Node::Directory(directory)), + } + .into_name_and_node()?; + + dir.add(name, node)?; } let mut last_symlink_name: &[u8] = b""; for symlink in directory.symlinks.iter().map(move |symlink| { update_if_lt_prev(&mut last_symlink_name, &symlink.name).map(|()| symlink.clone()) }) { let symlink = symlink?; - dir.add(crate::Node::Symlink((&symlink).try_into().map_err( - |e| ValidateDirectoryError::InvalidNode(symlink.name.into(), e), - )?))?; - } - Ok(dir) - } -} -impl From<&crate::Node> for node::Node { - fn from(node: &crate::Node) -> node::Node { - match node { - crate::Node::Directory(n) => node::Node::Directory(n.into()), - crate::Node::File(n) => node::Node::File(n.into()), - crate::Node::Symlink(n) => node::Node::Symlink(n.into()), - } - } -} - -impl From<&crate::Node> for Node { - fn from(node: &crate::Node) -> Node { - Node { - node: Some(node.into()), - } - } -} - -impl From<&crate::DirectoryNode> for DirectoryNode { - fn from(node: &crate::DirectoryNode) -> DirectoryNode { - DirectoryNode { - digest: node.digest().clone().into(), - size: node.size(), - name: node.get_name().clone(), - } - } -} + let (name, node) = Node { + node: Some(node::Node::Symlink(symlink)), + } + .into_name_and_node()?; -impl From<&crate::FileNode> for FileNode { - fn from(node: &crate::FileNode) -> FileNode { - FileNode { - digest: node.digest().clone().into(), - size: node.size(), - name: node.get_name().clone(), - executable: node.executable(), + dir.add(name, node)?; } - } -} -impl From<&crate::SymlinkNode> for SymlinkNode { - fn from(node: &crate::SymlinkNode) -> SymlinkNode { - SymlinkNode { - name: node.get_name().clone(), - target: node.target().clone(), - } + Ok(dir) } } +// TODO: add a proper owned version here that moves various fields impl From<crate::Directory> for Directory { - fn from(directory: crate::Directory) -> Directory { - (&directory).into() + fn from(value: crate::Directory) -> Self { + (&value).into() } } @@ -241,16 +154,25 @@ impl From<&crate::Directory> for Directory { let mut directories = vec![]; let mut files = vec![]; let mut symlinks = vec![]; - for node in directory.nodes() { + + for (name, node) in directory.nodes() { match node { - crate::Node::File(n) => { - files.push(n.into()); - } - crate::Node::Directory(n) => { - directories.push(n.into()); - } + crate::Node::File(n) => files.push(FileNode { + name: name.clone(), + digest: n.digest().to_owned().into(), + size: n.size(), + executable: n.executable(), + }), + crate::Node::Directory(n) => directories.push(DirectoryNode { + name: name.clone(), + digest: n.digest().to_owned().into(), + size: n.size(), + }), crate::Node::Symlink(n) => { - symlinks.push(n.into()); + symlinks.push(SymlinkNode { + name: name.clone(), + target: n.target().to_owned(), + }); } } } @@ -262,6 +184,67 @@ impl From<&crate::Directory> for Directory { } } +impl Node { + /// Converts a proto [Node] to a [crate::Node], and splits off the name. + pub fn into_name_and_node(self) -> Result<(bytes::Bytes, crate::Node), DirectoryError> { + match self.node.ok_or_else(|| DirectoryError::NoNodeSet)? { + node::Node::Directory(n) => { + let digest = B3Digest::try_from(n.digest) + .map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e.into()))?; + + let node = crate::Node::Directory(crate::DirectoryNode::new(digest, n.size)); + + Ok((n.name, node)) + } + node::Node::File(n) => { + let digest = B3Digest::try_from(n.digest) + .map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e.into()))?; + + let node = crate::Node::File(crate::FileNode::new(digest, n.size, n.executable)); + + Ok((n.name, node)) + } + + node::Node::Symlink(n) => { + let node = crate::Node::Symlink( + crate::SymlinkNode::new(n.target) + .map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e))?, + ); + + Ok((n.name, node)) + } + } + } + + /// Construsts a [Node] from a name and [crate::Node]. + pub fn from_name_and_node(name: bytes::Bytes, n: crate::Node) -> Self { + // TODO: make these pub(crate) so we can avoid cloning? + match n { + crate::Node::Directory(directory_node) => Self { + node: Some(node::Node::Directory(DirectoryNode { + name, + digest: directory_node.digest().to_owned().into(), + size: directory_node.size(), + })), + }, + crate::Node::File(file_node) => Self { + node: Some(node::Node::File(FileNode { + name, + digest: file_node.digest().to_owned().into(), + size: file_node.size(), + executable: file_node.executable(), + })), + }, + crate::Node::Symlink(symlink_node) => Self { + node: Some(node::Node::Symlink(SymlinkNode { + name, + target: symlink_node.target().to_owned(), + })), + }, + } + } +} + impl StatBlobResponse { /// Validates a StatBlobResponse. All chunks must have valid blake3 digests. /// It is allowed to send an empty list, if no more granular chunking is diff --git a/tvix/castore/src/proto/tests/directory.rs b/tvix/castore/src/proto/tests/directory.rs index 7847b9c4c9cb..215bce7275dd 100644 --- a/tvix/castore/src/proto/tests/directory.rs +++ b/tvix/castore/src/proto/tests/directory.rs @@ -1,4 +1,4 @@ -use crate::proto::{Directory, DirectoryNode, FileNode, SymlinkNode, ValidateDirectoryError}; +use crate::proto::{Directory, DirectoryError, DirectoryNode, FileNode, SymlinkNode}; use crate::ValidateNodeError; use hex_literal::hex; @@ -162,8 +162,8 @@ fn validate_invalid_names() { ..Default::default() }; match crate::Directory::try_from(d).expect_err("must fail") { - ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName(_)) => { - assert_eq!(n, b"") + DirectoryError::InvalidName(n) => { + assert_eq!(n.as_ref(), b"") } _ => panic!("unexpected error"), }; @@ -179,8 +179,8 @@ fn validate_invalid_names() { ..Default::default() }; match crate::Directory::try_from(d).expect_err("must fail") { - ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName(_)) => { - assert_eq!(n, b".") + DirectoryError::InvalidName(n) => { + assert_eq!(n.as_ref(), b".") } _ => panic!("unexpected error"), }; @@ -197,8 +197,8 @@ fn validate_invalid_names() { ..Default::default() }; match crate::Directory::try_from(d).expect_err("must fail") { - ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName(_)) => { - assert_eq!(n, b"..") + DirectoryError::InvalidName(n) => { + assert_eq!(n.as_ref(), b"..") } _ => panic!("unexpected error"), }; @@ -213,8 +213,8 @@ fn validate_invalid_names() { ..Default::default() }; match crate::Directory::try_from(d).expect_err("must fail") { - ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName(_)) => { - assert_eq!(n, b"\x00") + DirectoryError::InvalidName(n) => { + assert_eq!(n.as_ref(), b"\x00") } _ => panic!("unexpected error"), }; @@ -229,8 +229,8 @@ fn validate_invalid_names() { ..Default::default() }; match crate::Directory::try_from(d).expect_err("must fail") { - ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName(_)) => { - assert_eq!(n, b"foo/bar") + DirectoryError::InvalidName(n) => { + assert_eq!(n.as_ref(), b"foo/bar") } _ => panic!("unexpected error"), }; @@ -248,7 +248,7 @@ fn validate_invalid_digest() { ..Default::default() }; match crate::Directory::try_from(d).expect_err("must fail") { - ValidateDirectoryError::InvalidNode(_, ValidateNodeError::InvalidDigestLen(n)) => { + DirectoryError::InvalidNode(_, ValidateNodeError::InvalidDigestLen(n)) => { assert_eq!(n, 2) } _ => panic!("unexpected error"), @@ -275,8 +275,8 @@ fn validate_sorting() { ..Default::default() }; match crate::Directory::try_from(d).expect_err("must fail") { - ValidateDirectoryError::WrongSorting(s) => { - assert_eq!(s, b"a"); + DirectoryError::WrongSorting(s) => { + assert_eq!(s.as_ref(), b"a"); } _ => panic!("unexpected error"), } @@ -300,7 +300,7 @@ fn validate_sorting() { ..Default::default() }; match crate::Directory::try_from(d).expect_err("must fail") { - ValidateDirectoryError::DuplicateName(s) => { + DirectoryError::DuplicateName(s) => { assert_eq!(s, b"a"); } _ => panic!("unexpected error"), diff --git a/tvix/castore/src/tests/import.rs b/tvix/castore/src/tests/import.rs index 5dff61782cf2..bbf28a2981bf 100644 --- a/tvix/castore/src/tests/import.rs +++ b/tvix/castore/src/tests/import.rs @@ -2,15 +2,11 @@ use crate::blobservice::{self, BlobService}; use crate::directoryservice; use crate::fixtures::*; use crate::import::fs::ingest_path; -use crate::proto; use crate::{DirectoryNode, Node, SymlinkNode}; use tempfile::TempDir; #[cfg(target_family = "unix")] -use std::os::unix::ffi::OsStrExt; - -#[cfg(target_family = "unix")] #[tokio::test] async fn symlink() { let blob_service = blobservice::from_addr("memory://").await.unwrap(); @@ -34,9 +30,7 @@ async fn symlink() { .expect("must succeed"); assert_eq!( - Node::Symlink( - SymlinkNode::new("doesntmatter".into(), "/nix/store/somewhereelse".into(),).unwrap() - ), + Node::Symlink(SymlinkNode::new("/nix/store/somewhereelse".into(),).unwrap()), root_node, ) } @@ -59,13 +53,12 @@ async fn single_file() { .expect("must succeed"); assert_eq!( - proto::node::Node::File(proto::FileNode { - name: "root".into(), - digest: HELLOWORLD_BLOB_DIGEST.clone().into(), - size: HELLOWORLD_BLOB_CONTENTS.len() as u64, - executable: false, - }), - (&root_node).into(), + Node::File(crate::FileNode::new( + HELLOWORLD_BLOB_DIGEST.clone(), + HELLOWORLD_BLOB_CONTENTS.len() as u64, + false, + )), + root_node, ); // ensure the blob has been uploaded @@ -95,20 +88,10 @@ async fn complicated() { // ensure root_node matched expectations assert_eq!( - Node::Directory( - DirectoryNode::new( - tmpdir - .path() - .file_name() - .unwrap() - .as_bytes() - .to_owned() - .into(), - DIRECTORY_COMPLICATED.digest().clone(), - DIRECTORY_COMPLICATED.size(), - ) - .unwrap() - ), + Node::Directory(DirectoryNode::new( + DIRECTORY_COMPLICATED.digest().clone(), + DIRECTORY_COMPLICATED.size(), + )), root_node, ); diff --git a/tvix/glue/src/builtins/derivation.rs b/tvix/glue/src/builtins/derivation.rs index 27ae2e1d6925..a79545437a67 100644 --- a/tvix/glue/src/builtins/derivation.rs +++ b/tvix/glue/src/builtins/derivation.rs @@ -577,10 +577,7 @@ pub(crate) mod derivation_builtins { }) .map_err(DerivationError::InvalidDerivation)?; - let root_node = Node::File( - FileNode::new(store_path.to_string().into(), blob_digest, blob_size, false) - .map_err(|e| ErrorKind::TvixError(Rc::new(e)))?, - ); + let root_node = Node::File(FileNode::new(blob_digest, blob_size, false)); // calculate the nar hash let (nar_size, nar_sha256) = state @@ -600,7 +597,10 @@ pub(crate) mod derivation_builtins { state .path_info_service .put(PathInfo { - node: Some((&root_node).into()), + node: Some(tvix_castore::proto::Node::from_name_and_node( + store_path.to_string().into(), + root_node, + )), references: reference_paths .iter() .map(|x| bytes::Bytes::copy_from_slice(x.digest())) diff --git a/tvix/glue/src/builtins/import.rs b/tvix/glue/src/builtins/import.rs index 1cf4145427b8..a09dc5a06b35 100644 --- a/tvix/glue/src/builtins/import.rs +++ b/tvix/glue/src/builtins/import.rs @@ -213,16 +213,7 @@ mod import_builtins { .tokio_handle .block_on(async { blob_writer.close().await })?; - let root_node = Node::File( - FileNode::new( - // The name gets set further down, while constructing the PathInfo. - "".into(), - blob_digest, - blob_size, - false, - ) - .map_err(|e| tvix_eval::ErrorKind::TvixError(Rc::new(e)))?, - ); + let root_node = Node::File(FileNode::new(blob_digest, blob_size, false)); let ca_hash = if recursive_ingestion { let (_nar_size, nar_sha256) = state diff --git a/tvix/glue/src/fetchers/mod.rs b/tvix/glue/src/fetchers/mod.rs index 88d6da9f1e3c..46db5d093009 100644 --- a/tvix/glue/src/fetchers/mod.rs +++ b/tvix/glue/src/fetchers/mod.rs @@ -10,9 +10,7 @@ use tokio::io::{AsyncBufRead, AsyncRead, AsyncWrite, AsyncWriteExt, BufReader}; use tokio_util::io::{InspectReader, InspectWriter}; use tracing::{instrument, warn, Span}; use tracing_indicatif::span_ext::IndicatifSpanExt; -use tvix_castore::{ - blobservice::BlobService, directoryservice::DirectoryService, FileNode, Node, ValidateNodeError, -}; +use tvix_castore::{blobservice::BlobService, directoryservice::DirectoryService, FileNode, Node}; use tvix_store::{nar::NarCalculationService, pathinfoservice::PathInfoService, proto::PathInfo}; use url::Url; @@ -329,10 +327,7 @@ where // Construct and return the FileNode describing the downloaded contents. Ok(( - Node::File( - FileNode::new(vec![].into(), blob_writer.close().await?, blob_size, false) - .map_err(|e| FetcherError::Io(std::io::Error::other(e.to_string())))?, - ), + Node::File(FileNode::new(blob_writer.close().await?, blob_size, false)), CAHash::Flat(actual_hash), blob_size, )) @@ -527,13 +522,7 @@ where // Construct and return the FileNode describing the downloaded contents, // make it executable. - let root_node = Node::File( - FileNode::new(vec![].into(), blob_digest, file_size, true).map_err( - |e: ValidateNodeError| { - FetcherError::Io(std::io::Error::other(e.to_string())) - }, - )?, - ); + let root_node = Node::File(FileNode::new(blob_digest, file_size, true)); Ok((root_node, CAHash::Nar(actual_hash), file_size)) } @@ -557,9 +546,6 @@ where // Calculate the store path to return, by calculating from ca_hash. let store_path = build_ca_path(name, &ca_hash, Vec::<String>::new(), false)?; - // Rename the node name to match the Store Path. - let node = node.rename(store_path.to_string().into()); - // If the resulting hash is not a CAHash::Nar, we also need to invoke // `calculate_nar` to calculate this representation, as it's required in // the [PathInfo]. @@ -577,7 +563,10 @@ where // Construct the PathInfo and persist it. let path_info = PathInfo { - node: Some((&node).into()), + node: Some(tvix_castore::proto::Node::from_name_and_node( + store_path.to_string().into(), + node.clone(), + )), references: vec![], narinfo: Some(tvix_store::proto::NarInfo { nar_size, @@ -589,20 +578,12 @@ where }), }; - let path_info = self - .path_info_service + self.path_info_service .put(path_info) .await .map_err(|e| FetcherError::Io(e.into()))?; - Ok(( - store_path, - (&path_info.node.unwrap().node.unwrap()) - .try_into() - .map_err(|e: ValidateNodeError| { - FetcherError::Io(std::io::Error::other(e.to_string())) - })?, - )) + Ok((store_path, node)) } } diff --git a/tvix/glue/src/tvix_build.rs b/tvix/glue/src/tvix_build.rs index 7fff0b9cf337..77fa5d92a197 100644 --- a/tvix/glue/src/tvix_build.rs +++ b/tvix/glue/src/tvix_build.rs @@ -1,7 +1,7 @@ //! This module contains glue code translating from //! [nix_compat::derivation::Derivation] to [tvix_build::proto::BuildRequest]. -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::BTreeMap; use bytes::Bytes; use nix_compat::{derivation::Derivation, nixbase32}; @@ -39,7 +39,7 @@ const NIX_ENVIRONMENT_VARS: [(&str, &str); 12] = [ #[allow(clippy::mutable_key_type)] pub(crate) fn derivation_to_build_request( derivation: &Derivation, - inputs: BTreeSet<Node>, + inputs: BTreeMap<bytes::Bytes, Node>, ) -> std::io::Result<BuildRequest> { debug_assert!(derivation.validate(true).is_ok(), "drv must validate"); @@ -109,7 +109,10 @@ pub(crate) fn derivation_to_build_request( .into_iter() .map(|(key, value)| EnvVar { key, value }) .collect(), - inputs: inputs.iter().map(Into::into).collect(), + inputs: inputs + .into_iter() + .map(|(name, node)| tvix_castore::proto::Node::from_name_and_node(name, node)) + .collect(), inputs_dir: nix_compat::store_path::STORE_DIR[1..].into(), constraints, working_dir: "build".into(), @@ -189,10 +192,9 @@ fn calculate_pass_as_file_env(k: &str) -> (String, String) { #[cfg(test)] mod test { - use std::collections::BTreeSet; - use bytes::Bytes; use nix_compat::derivation::Derivation; + use std::collections::BTreeMap; use tvix_build::proto::{ build_request::{AdditionalFile, BuildConstraints, EnvVar}, BuildRequest, @@ -206,14 +208,9 @@ mod test { use lazy_static::lazy_static; lazy_static! { - static ref INPUT_NODE_FOO: Node = Node::Directory( - DirectoryNode::new( - Bytes::from("mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar"), - DUMMY_DIGEST.clone(), - 42, - ) - .unwrap() - ); + static ref INPUT_NODE_FOO_NAME: Bytes = "mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar".into(); + static ref INPUT_NODE_FOO: Node = + Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 42,)); } #[test] @@ -222,9 +219,11 @@ mod test { let derivation = Derivation::from_aterm_bytes(aterm_bytes).expect("must parse"); - let build_request = - derivation_to_build_request(&derivation, BTreeSet::from([INPUT_NODE_FOO.clone()])) - .expect("must succeed"); + let build_request = derivation_to_build_request( + &derivation, + BTreeMap::from([(INPUT_NODE_FOO_NAME.clone(), INPUT_NODE_FOO.clone())]), + ) + .expect("must succeed"); let mut expected_environment_vars = vec![ EnvVar { @@ -261,7 +260,10 @@ mod test { command_args: vec![":".into()], outputs: vec!["nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo".into()], environment_vars: expected_environment_vars, - inputs: vec![(&*INPUT_NODE_FOO).into()], + inputs: vec![tvix_castore::proto::Node::from_name_and_node( + INPUT_NODE_FOO_NAME.clone(), + INPUT_NODE_FOO.clone() + )], inputs_dir: "nix/store".into(), constraints: Some(BuildConstraints { system: derivation.system.clone(), @@ -285,7 +287,7 @@ mod test { let derivation = Derivation::from_aterm_bytes(aterm_bytes).expect("must parse"); let build_request = - derivation_to_build_request(&derivation, BTreeSet::from([])).expect("must succeed"); + derivation_to_build_request(&derivation, BTreeMap::from([])).expect("must succeed"); let mut expected_environment_vars = vec![ EnvVar { @@ -355,7 +357,7 @@ mod test { let derivation = Derivation::from_aterm_bytes(aterm_bytes).expect("must parse"); let build_request = - derivation_to_build_request(&derivation, BTreeSet::from([])).expect("must succeed"); + derivation_to_build_request(&derivation, BTreeMap::from([])).expect("must succeed"); let mut expected_environment_vars = vec![ // Note how bar and baz are not present in the env anymore, diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs index 8c5631382844..4185c9554810 100644 --- a/tvix/glue/src/tvix_store_io.rs +++ b/tvix/glue/src/tvix_store_io.rs @@ -4,9 +4,9 @@ use futures::{StreamExt, TryStreamExt}; use nix_compat::nixhash::NixHash; use nix_compat::store_path::StorePathRef; use nix_compat::{nixhash::CAHash, store_path::StorePath}; +use std::collections::BTreeMap; use std::{ cell::RefCell, - collections::BTreeSet, io, path::{Path, PathBuf}, sync::Arc, @@ -21,7 +21,7 @@ use tvix_store::nar::NarCalculationService; use tvix_castore::{ blobservice::BlobService, directoryservice::{self, DirectoryService}, - {NamedNode, Node}, + Node, }; use tvix_store::{pathinfoservice::PathInfoService, proto::PathInfo}; @@ -120,12 +120,22 @@ impl TvixStoreIO { .await? { // if we have a PathInfo, we know there will be a root_node (due to validation) - Some(path_info) => path_info - .node - .as_ref() - .expect("no node") - .try_into() - .expect("invalid node"), + // TODO: use stricter typed BuildRequest here. + Some(path_info) => { + let (name, node) = path_info + .node + .expect("no node") + .into_name_and_node() + .expect("invalid node"); + + assert_eq!( + store_path.to_string().as_bytes(), + name.as_ref(), + "returned node basename must match requested store path" + ); + + node + } // If there's no PathInfo found, this normally means we have to // trigger the build (and insert into PathInfoService, after // reference scanning). @@ -189,7 +199,7 @@ impl TvixStoreIO { // Provide them, which means, here is where we recursively build // all dependencies. #[allow(clippy::mutable_key_type)] - let mut input_nodes: BTreeSet<Node> = + let mut inputs: BTreeMap<Bytes, Node> = futures::stream::iter(drv.input_derivations.iter()) .map(|(input_drv_path, output_names)| { // look up the derivation object @@ -217,6 +227,7 @@ impl TvixStoreIO { .clone() }) .collect(); + // For each output, ask for the castore node. // We're in a per-derivation context, so if they're // not built yet they'll all get built together. @@ -231,7 +242,7 @@ impl TvixStoreIO { .await?; if let Some(node) = node { - Ok(node) + Ok((output_path.to_string().into(), node)) } else { Err(io::Error::other("no node produced")) } @@ -245,26 +256,30 @@ impl TvixStoreIO { .try_collect() .await?; - // add input sources // FUTUREWORK: merge these who things together #[allow(clippy::mutable_key_type)] - let input_nodes_input_sources: BTreeSet<Node> = + // add input sources + let input_sources: BTreeMap<_, _> = futures::stream::iter(drv.input_sources.iter()) .then(|input_source| { - Box::pin(async { - let node = self - .store_path_to_node(input_source, Path::new("")) - .await?; - if let Some(node) = node { - Ok(node) - } else { - Err(io::Error::other("no node produced")) + Box::pin({ + let input_source = input_source.clone(); + async move { + let node = self + .store_path_to_node(&input_source, Path::new("")) + .await?; + if let Some(node) = node { + Ok((input_source.to_string().into(), node)) + } else { + Err(io::Error::other("no node produced")) + } } }) }) .try_collect() .await?; - input_nodes.extend(input_nodes_input_sources); + + inputs.extend(input_sources); span.pb_set_message(&format!("🔨Building {}", &store_path)); @@ -273,7 +288,7 @@ impl TvixStoreIO { // operations, so dealt with in the Some(…) match arm // synthesize the build request. - let build_request = derivation_to_build_request(&drv, input_nodes)?; + let build_request = derivation_to_build_request(&drv, inputs)?; // create a build let build_result = self @@ -287,17 +302,21 @@ impl TvixStoreIO { // For each output, insert a PathInfo. for output in &build_result.outputs { - let root_node = output.try_into().expect("invalid root node"); + let (output_name, output_node) = + output.clone().into_name_and_node().expect("invalid node"); // calculate the nar representation let (nar_size, nar_sha256) = self .nar_calculation_service - .calculate_nar(&root_node) + .calculate_nar(&output_node) .await?; // assemble the PathInfo to persist let path_info = PathInfo { - node: Some((&root_node).into()), + node: Some(tvix_castore::proto::Node::from_name_and_node( + output_name, + output_node, + )), references: vec![], // TODO: refscan narinfo: Some(tvix_store::proto::NarInfo { nar_size, @@ -330,14 +349,15 @@ impl TvixStoreIO { } // find the output for the store path requested + let s = store_path.to_string(); + build_result .outputs .into_iter() - .map(|output_node| Node::try_from(&output_node).expect("invalid node")) - .find(|output_node| { - output_node.get_name() == store_path.to_string().as_bytes() - }) + .map(|e| e.into_name_and_node().expect("invalid node")) + .find(|(output_name, _output_node)| output_name == s.as_bytes()) .expect("build didn't produce the store path") + .1 } } } @@ -380,12 +400,16 @@ impl TvixStoreIO { }, )?; - // assemble a new root_node with a name that is derived from the nar hash. - let root_node = root_node.rename(output_path.to_string().into_bytes().into()); - tvix_store::import::log_node(&root_node, path); + tvix_store::import::log_node(name.as_bytes(), &root_node, path); - let path_info = - tvix_store::import::derive_nar_ca_path_info(nar_size, nar_sha256, Some(ca), root_node); + // construct a PathInfo + let path_info = tvix_store::import::derive_nar_ca_path_info( + nar_size, + nar_sha256, + Some(ca), + output_path.to_string().into(), + root_node, + ); Ok(( path_info, @@ -540,13 +564,12 @@ impl EvalIO for TvixStoreIO { self.directory_service.as_ref().get(&digest).await })? { let mut children: Vec<(bytes::Bytes, FileType)> = Vec::new(); - for node in directory.nodes() { + // TODO: into_nodes() to avoid cloning + for (name, node) in directory.nodes() { children.push(match node { - Node::Directory(e) => { - (e.get_name().clone(), FileType::Directory) - } - Node::File(e) => (e.get_name().clone(), FileType::Regular), - Node::Symlink(e) => (e.get_name().clone(), FileType::Symlink), + Node::Directory(_) => (name.clone(), FileType::Directory), + Node::File(_) => (name.clone(), FileType::Regular), + Node::Symlink(_) => (name.clone(), FileType::Symlink), }) } Ok(children) diff --git a/tvix/nar-bridge/src/lib.rs b/tvix/nar-bridge/src/lib.rs index 2f3dd82439b1..56b7e19a3cb0 100644 --- a/tvix/nar-bridge/src/lib.rs +++ b/tvix/nar-bridge/src/lib.rs @@ -7,7 +7,7 @@ use std::num::NonZeroUsize; use std::sync::Arc; use tvix_castore::blobservice::BlobService; use tvix_castore::directoryservice::DirectoryService; -use tvix_castore::proto::node::Node; +use tvix_castore::Node; use tvix_store::pathinfoservice::PathInfoService; mod nar; diff --git a/tvix/nar-bridge/src/nar.rs b/tvix/nar-bridge/src/nar.rs index acbcfa2817bc..f65c8f0be2ab 100644 --- a/tvix/nar-bridge/src/nar.rs +++ b/tvix/nar-bridge/src/nar.rs @@ -52,14 +52,15 @@ pub async fn get( StatusCode::NOT_FOUND })?; - let root_node: tvix_castore::Node = (&root_node).try_into().map_err(|e| { + let (root_name, root_node) = root_node.into_name_and_node().map_err(|e| { warn!(err=%e, "root node validation failed"); StatusCode::BAD_REQUEST })?; - // validate the node, but add a dummy node name, as we only send unnamed - // nodes - let root_node = root_node.rename("00000000000000000000000000000000-dummy".into()); + if !root_name.is_empty() { + warn!("root node has name, which it shouldn't"); + return Err(StatusCode::BAD_REQUEST); + } let (w, r) = tokio::io::duplex(1024 * 8); @@ -125,7 +126,7 @@ pub async fn put( // store mapping of narhash to root node into root_nodes. // we need it later to populate the root node when accepting the PathInfo. - root_nodes.write().put(nar_hash_actual, (&root_node).into()); + root_nodes.write().put(nar_hash_actual, root_node); Ok("") } diff --git a/tvix/nar-bridge/src/narinfo.rs b/tvix/nar-bridge/src/narinfo.rs index fc19bdc871cd..f97ee970819d 100644 --- a/tvix/nar-bridge/src/narinfo.rs +++ b/tvix/nar-bridge/src/narinfo.rs @@ -1,8 +1,9 @@ use axum::http::StatusCode; use bytes::Bytes; use nix_compat::{narinfo::NarInfo, nixbase32}; +use prost::Message; use tracing::{instrument, warn, Span}; -use tvix_castore::proto::{self as castorepb, node::Node}; +use tvix_castore::proto::{self as castorepb}; use tvix_store::proto::PathInfo; use crate::AppState; @@ -67,17 +68,21 @@ pub async fn get( })?; // encode the (unnamed) root node in the NAR url itself. - let root_node = - tvix_castore::Node::try_from(path_info.node.as_ref().expect("root node must not be none")) - .unwrap() // PathInfo is validated - .rename("".into()); - - let mut buf = Vec::new(); - Node::encode(&(&root_node).into(), &mut buf); + // We strip the name from the proto node before sending it out. + // It's not needed to render the NAR, it'll make the URL shorter, and it + // will make caching these requests easier. + let (_, root_node) = path_info + .node + .as_ref() + .expect("invalid pathinfo") + .to_owned() + .into_name_and_node() + .expect("invalid pathinfo"); let url = format!( "nar/tvix-castore/{}?narsize={}", - data_encoding::BASE64URL_NOPAD.encode(&buf), + data_encoding::BASE64URL_NOPAD + .encode(&castorepb::Node::from_name_and_node("".into(), root_node).encode_to_vec()), narinfo.nar_size, ); @@ -125,19 +130,18 @@ pub async fn put( // Lookup root node with peek, as we don't want to update the LRU list. // We need to be careful to not hold the RwLock across the await point. - let maybe_root_node: Option<tvix_castore::Node> = root_nodes - .read() - .peek(&narinfo.nar_hash) - .and_then(|v| v.try_into().ok()); + let maybe_root_node: Option<tvix_castore::Node> = + root_nodes.read().peek(&narinfo.nar_hash).cloned(); match maybe_root_node { Some(root_node) => { // Set the root node from the lookup. // We need to rename the node to the narinfo storepath basename, as // that's where it's stored in PathInfo. - pathinfo.node = Some(castorepb::Node { - node: Some((&root_node.rename(narinfo.store_path.to_string().into())).into()), - }); + pathinfo.node = Some(castorepb::Node::from_name_and_node( + narinfo.store_path.to_string().into(), + root_node, + )); // Persist the PathInfo. path_info_service.put(pathinfo).await.map_err(|e| { diff --git a/tvix/store/src/bin/tvix-store.rs b/tvix/store/src/bin/tvix-store.rs index 2af8f0ee436b..d9d5b1597bb7 100644 --- a/tvix/store/src/bin/tvix-store.rs +++ b/tvix/store/src/bin/tvix-store.rs @@ -351,9 +351,10 @@ async fn run_cli(cli: Cli) -> Result<(), Box<dyn std::error::Error + Send + Sync // Create and upload a PathInfo pointing to the root_node, // annotated with information we have from the reference graph. let path_info = PathInfo { - node: Some(tvix_castore::proto::Node { - node: Some((&root_node).into()), - }), + node: Some(tvix_castore::proto::Node::from_name_and_node( + elem.path.to_string().into(), + root_node, + )), references: Vec::from_iter( elem.references.iter().map(|e| e.digest().to_vec().into()), ), diff --git a/tvix/store/src/import.rs b/tvix/store/src/import.rs index a94ff9f2cdfc..e279bb4c04c2 100644 --- a/tvix/store/src/import.rs +++ b/tvix/store/src/import.rs @@ -1,10 +1,8 @@ +use bstr::ByteSlice; use std::path::Path; use tracing::{debug, instrument}; use tvix_castore::{ - blobservice::BlobService, - directoryservice::DirectoryService, - import::fs::ingest_path, - {NamedNode, Node}, + blobservice::BlobService, directoryservice::DirectoryService, import::fs::ingest_path, Node, }; use nix_compat::{ @@ -29,12 +27,12 @@ impl From<CAHash> for nar_info::Ca { } } -pub fn log_node(node: &Node, path: &Path) { +pub fn log_node(name: &[u8], node: &Node, path: &Path) { match node { Node::Directory(directory_node) => { debug!( path = ?path, - name = ?directory_node.get_name(), + name = %name.as_bstr(), digest = %directory_node.digest(), "import successful", ) @@ -42,7 +40,7 @@ pub fn log_node(node: &Node, path: &Path) { Node::File(file_node) => { debug!( path = ?path, - name = ?file_node.get_name(), + name = %name.as_bstr(), digest = %file_node.digest(), "import successful" ) @@ -50,7 +48,7 @@ pub fn log_node(node: &Node, path: &Path) { Node::Symlink(symlink_node) => { debug!( path = ?path, - name = ?symlink_node.get_name(), + name = %name.as_bstr(), target = ?symlink_node.target(), "import successful" ) @@ -84,13 +82,14 @@ pub fn derive_nar_ca_path_info( nar_size: u64, nar_sha256: [u8; 32], ca: Option<&CAHash>, + name: bytes::Bytes, root_node: Node, ) -> PathInfo { // assemble the [crate::proto::PathInfo] object. PathInfo { - node: Some(tvix_castore::proto::Node { - node: Some((&root_node).into()), - }), + node: Some(tvix_castore::proto::Node::from_name_and_node( + name, root_node, + )), // There's no reference scanning on path contents ingested like this. references: vec![], narinfo: Some(NarInfo { @@ -140,14 +139,14 @@ where ) })?; - // rename the root node to match the calculated output path. - let root_node = root_node.rename(output_path.to_string().into_bytes().into()); - log_node(&root_node, path.as_ref()); + let name = bytes::Bytes::from(output_path.to_string()); + log_node(name.as_ref(), &root_node, path.as_ref()); let path_info = derive_nar_ca_path_info( nar_size, nar_sha256, Some(&CAHash::Nar(NixHash::Sha256(nar_sha256))), + output_path.to_string().into_bytes().into(), root_node, ); diff --git a/tvix/store/src/nar/import.rs b/tvix/store/src/nar/import.rs index ac50e7e4301f..71ab9bd588cd 100644 --- a/tvix/store/src/nar/import.rs +++ b/tvix/store/src/nar/import.rs @@ -12,7 +12,7 @@ use tvix_castore::{ blobs::{self, ConcurrentBlobUploader}, ingest_entries, IngestionEntry, IngestionError, }, - PathBuf, {NamedNode, Node}, + Node, PathBuf, }; /// Ingests the contents from a [AsyncRead] providing NAR into the tvix store, @@ -97,9 +97,7 @@ where let (_, node) = try_join!(produce, consume)?; - // remove the fake "root" name again - debug_assert_eq!(&node.get_name()[..], b"root"); - Ok(node.rename("".into())) + Ok(node) } async fn produce_nar_inner<BS>( @@ -198,13 +196,7 @@ mod test { .expect("must parse"); assert_eq!( - Node::Symlink( - SymlinkNode::new( - "".into(), // name must be empty - "/nix/store/somewhereelse".into(), - ) - .unwrap() - ), + Node::Symlink(SymlinkNode::new("/nix/store/somewhereelse".into(),).unwrap()), root_node ); } @@ -224,15 +216,11 @@ mod test { .expect("must parse"); assert_eq!( - Node::File( - FileNode::new( - "".into(), // name must be empty - HELLOWORLD_BLOB_DIGEST.clone(), - HELLOWORLD_BLOB_CONTENTS.len() as u64, - false, - ) - .unwrap() - ), + Node::File(FileNode::new( + HELLOWORLD_BLOB_DIGEST.clone(), + HELLOWORLD_BLOB_CONTENTS.len() as u64, + false, + )), root_node ); @@ -255,14 +243,10 @@ mod test { .expect("must parse"); assert_eq!( - Node::Directory( - DirectoryNode::new( - "".into(), // name must be empty - DIRECTORY_COMPLICATED.digest(), - DIRECTORY_COMPLICATED.size(), - ) - .unwrap() - ), + Node::Directory(DirectoryNode::new( + DIRECTORY_COMPLICATED.digest(), + DIRECTORY_COMPLICATED.size(), + )), root_node, ); diff --git a/tvix/store/src/nar/mod.rs b/tvix/store/src/nar/mod.rs index ca2423b5ce59..da798bbf3a3c 100644 --- a/tvix/store/src/nar/mod.rs +++ b/tvix/store/src/nar/mod.rs @@ -37,13 +37,13 @@ pub enum RenderError { #[error("failure talking to a backing store client: {0}")] StoreError(#[source] std::io::Error), - #[error("unable to find directory {}, referred from {:?}", .0, .1)] + #[error("unable to find directory {0}, referred from {1:?}")] DirectoryNotFound(B3Digest, bytes::Bytes), - #[error("unable to find blob {}, referred from {:?}", .0, .1)] + #[error("unable to find blob {0}, referred from {1:?}")] BlobNotFound(B3Digest, bytes::Bytes), - #[error("unexpected size in metadata for blob {}, referred from {:?} returned, expected {}, got {}", .0, .1, .2, .3)] + #[error("unexpected size in metadata for blob {0}, referred from {1:?} returned, expected {2}, got {3}")] UnexpectedBlobMeta(B3Digest, bytes::Bytes, u32, u32), #[error("failure using the NAR writer: {0}")] diff --git a/tvix/store/src/nar/renderer.rs b/tvix/store/src/nar/renderer.rs index fefc76956e7e..d83ef9c354f6 100644 --- a/tvix/store/src/nar/renderer.rs +++ b/tvix/store/src/nar/renderer.rs @@ -8,11 +8,7 @@ use tokio::io::{self, AsyncWrite, BufReader}; use tonic::async_trait; use tracing::{instrument, Span}; use tracing_indicatif::span_ext::IndicatifSpanExt; -use tvix_castore::{ - blobservice::BlobService, - directoryservice::DirectoryService, - {NamedNode, Node}, -}; +use tvix_castore::{blobservice::BlobService, directoryservice::DirectoryService, Node}; pub struct SimpleRenderer<BS, DS> { blob_service: BS, @@ -103,6 +99,7 @@ where walk_node( nar_root_node, proto_root_node, + b"", blob_service, directory_service, ) @@ -115,7 +112,8 @@ where /// This consumes the node. async fn walk_node<BS, DS>( nar_node: nar_writer::Node<'_, '_>, - proto_node: &Node, + castore_node: &Node, + name: &[u8], blob_service: BS, directory_service: DS, ) -> Result<(BS, DS), RenderError> @@ -123,10 +121,10 @@ where BS: BlobService + Send, DS: DirectoryService + Send, { - match proto_node { - Node::Symlink(proto_symlink_node) => { + match castore_node { + Node::Symlink(symlink_node) => { nar_node - .symlink(proto_symlink_node.target()) + .symlink(symlink_node.target()) .await .map_err(RenderError::NARWriterError)?; } @@ -154,19 +152,19 @@ where .await .map_err(RenderError::NARWriterError)?; } - Node::Directory(proto_directory_node) => { + Node::Directory(directory_node) => { // look it up with the directory service match directory_service - .get(proto_directory_node.digest()) + .get(directory_node.digest()) .await .map_err(|e| RenderError::StoreError(e.into()))? { // if it's None, that's an error! None => Err(RenderError::DirectoryNotFound( - proto_directory_node.digest().clone(), - proto_directory_node.get_name().clone(), + directory_node.digest().clone(), + bytes::Bytes::copy_from_slice(name), ))?, - Some(proto_directory) => { + Some(directory) => { // start a directory node let mut nar_node_directory = nar_node .directory() @@ -180,15 +178,16 @@ where // for each node in the directory, create a new entry with its name, // and then recurse on that entry. - for proto_node in proto_directory.nodes() { + for (name, node) in directory.nodes() { let child_node = nar_node_directory - .entry(proto_node.get_name()) + .entry(name) .await .map_err(RenderError::NARWriterError)?; (blob_service, directory_service) = Box::pin(walk_node( child_node, - proto_node, + node, + name.as_ref(), blob_service, directory_service, )) diff --git a/tvix/store/src/pathinfoservice/fs/mod.rs b/tvix/store/src/pathinfoservice/fs/mod.rs index 9a991a41d28d..7a46e1fc8fa5 100644 --- a/tvix/store/src/pathinfoservice/fs/mod.rs +++ b/tvix/store/src/pathinfoservice/fs/mod.rs @@ -3,7 +3,7 @@ use futures::StreamExt; use tonic::async_trait; use tvix_castore::fs::{RootNodes, TvixStoreFs}; use tvix_castore::{blobservice::BlobService, directoryservice::DirectoryService}; -use tvix_castore::{Error, Node, ValidateNodeError}; +use tvix_castore::{Error, Node}; use super::PathInfoService; @@ -58,25 +58,31 @@ where .get(*store_path.digest()) .await? .map(|path_info| { - path_info + let node = path_info .node .as_ref() .expect("missing root node") - .try_into() - .map_err(|e: ValidateNodeError| Error::StorageError(e.to_string())) + .to_owned(); + + match node.into_name_and_node() { + Ok((_name, node)) => Ok(node), + Err(e) => Err(Error::StorageError(e.to_string())), + } }) .transpose()?) } - fn list(&self) -> BoxStream<Result<Node, Error>> { + fn list(&self) -> BoxStream<Result<(bytes::Bytes, Node), Error>> { Box::pin(self.0.as_ref().list().map(|result| { result.and_then(|path_info| { - path_info + let node = path_info .node .as_ref() .expect("missing root node") - .try_into() - .map_err(|e: ValidateNodeError| Error::StorageError(e.to_string())) + .to_owned(); + + node.into_name_and_node() + .map_err(|e| Error::StorageError(e.to_string())) }) })) } diff --git a/tvix/store/src/pathinfoservice/grpc.rs b/tvix/store/src/pathinfoservice/grpc.rs index 187d9a148472..7510ccd911f0 100644 --- a/tvix/store/src/pathinfoservice/grpc.rs +++ b/tvix/store/src/pathinfoservice/grpc.rs @@ -132,9 +132,10 @@ where let path_info = self .grpc_client .clone() - .calculate_nar(tvix_castore::proto::Node { - node: Some(root_node.into()), - }) + .calculate_nar(tvix_castore::proto::Node::from_name_and_node( + "".into(), + root_node.to_owned(), + )) .await .map_err(|e| Error::StorageError(e.to_string()))? .into_inner(); diff --git a/tvix/store/src/pathinfoservice/lru.rs b/tvix/store/src/pathinfoservice/lru.rs index 3a7f01eb70ac..695c04636089 100644 --- a/tvix/store/src/pathinfoservice/lru.rs +++ b/tvix/store/src/pathinfoservice/lru.rs @@ -108,11 +108,17 @@ mod test { let mut p = PATHINFO_1.clone(); let root_node = p.node.as_mut().unwrap(); if let castorepb::Node { node: Some(node) } = root_node { - let n = node.to_owned(); - *node = (&tvix_castore::Node::try_from(&n) - .unwrap() - .rename("11111111111111111111111111111111-dummy2".into())) - .into(); + match node { + castorepb::node::Node::Directory(n) => { + n.name = "11111111111111111111111111111111-dummy2".into() + } + castorepb::node::Node::File(n) => { + n.name = "11111111111111111111111111111111-dummy2".into() + } + castorepb::node::Node::Symlink(n) => { + n.name = "11111111111111111111111111111111-dummy2".into() + } + } } else { unreachable!() } diff --git a/tvix/store/src/pathinfoservice/nix_http.rs b/tvix/store/src/pathinfoservice/nix_http.rs index c10b97857ea1..5f1eed1a0a9f 100644 --- a/tvix/store/src/pathinfoservice/nix_http.rs +++ b/tvix/store/src/pathinfoservice/nix_http.rs @@ -228,10 +228,10 @@ where } Ok(Some(PathInfo { - node: Some(castorepb::Node { - // set the name of the root node to the digest-name of the store path. - node: Some((&root_node.rename(narinfo.store_path.to_string().into())).into()), - }), + node: Some(castorepb::Node::from_name_and_node( + narinfo.store_path.to_string().into(), + root_node, + )), references: pathinfo.references, narinfo: pathinfo.narinfo, })) diff --git a/tvix/store/src/proto/grpc_pathinfoservice_wrapper.rs b/tvix/store/src/proto/grpc_pathinfoservice_wrapper.rs index e420801ce528..60da73012df7 100644 --- a/tvix/store/src/proto/grpc_pathinfoservice_wrapper.rs +++ b/tvix/store/src/proto/grpc_pathinfoservice_wrapper.rs @@ -74,7 +74,7 @@ where &self, request: Request<castorepb::Node>, ) -> Result<Response<proto::CalculateNarResponse>> { - let root_node = (&request.into_inner()).try_into().map_err(|e| { + let (_, root_node) = request.into_inner().into_name_and_node().map_err(|e| { warn!(err = %e, "invalid root node"); Status::invalid_argument("invalid root node") })?; diff --git a/tvix/store/src/proto/mod.rs b/tvix/store/src/proto/mod.rs index 375fb0dcadb8..13b24f3aaeb4 100644 --- a/tvix/store/src/proto/mod.rs +++ b/tvix/store/src/proto/mod.rs @@ -9,7 +9,7 @@ use nix_compat::{ store_path::{self, StorePathRef}, }; use thiserror::Error; -use tvix_castore::{NamedNode, ValidateNodeError}; +use tvix_castore::DirectoryError; mod grpc_pathinfoservice_wrapper; @@ -39,7 +39,7 @@ pub enum ValidatePathInfoError { /// Node fails validation #[error("Invalid root node: {:?}", .0.to_string())] - InvalidRootNode(ValidateNodeError), + InvalidRootNode(DirectoryError), /// Invalid node name encountered. Root nodes in PathInfos have more strict name requirements #[error("Failed to parse {} as StorePath: {1}", .0.to_str_lossy())] @@ -160,12 +160,13 @@ impl PathInfo { let root_nix_path = match &self.node { None => Err(ValidatePathInfoError::NoNodePresent)?, Some(node) => { - // TODO save result somewhere - let node: tvix_castore::Node = node - .try_into() + let (name, _node) = node + .clone() + .into_name_and_node() .map_err(ValidatePathInfoError::InvalidRootNode)?; + // parse the name of the node itself and return - parse_node_name_root(node.get_name(), ValidatePathInfoError::InvalidNodeName)? + parse_node_name_root(name.as_ref(), ValidatePathInfoError::InvalidNodeName)? .to_owned() } }; diff --git a/tvix/store/src/proto/tests/pathinfo.rs b/tvix/store/src/proto/tests/pathinfo.rs index 928bb8c8b185..ed8c64f6ccf8 100644 --- a/tvix/store/src/proto/tests/pathinfo.rs +++ b/tvix/store/src/proto/tests/pathinfo.rs @@ -6,11 +6,11 @@ use nix_compat::nixbase32; use nix_compat::store_path::{self, StorePath, StorePathRef}; use rstest::rstest; use tvix_castore::proto as castorepb; -use tvix_castore::ValidateNodeError; +use tvix_castore::{DirectoryError, ValidateNodeError}; #[rstest] #[case::no_node(None, Err(ValidatePathInfoError::NoNodePresent))] -#[case::no_node_2(Some(castorepb::Node { node: None}), Err(ValidatePathInfoError::InvalidRootNode(ValidateNodeError::NoNodeSet)))] +#[case::no_node_2(Some(castorepb::Node { node: None}), Err(ValidatePathInfoError::InvalidRootNode(DirectoryError::NoNodeSet)))] fn validate_pathinfo( #[case] node: Option<castorepb::Node>, @@ -35,7 +35,7 @@ fn validate_pathinfo( name: DUMMY_PATH.into(), digest: Bytes::new(), size: 0, -}, Err(ValidatePathInfoError::InvalidRootNode(tvix_castore::ValidateNodeError::InvalidDigestLen(0))))] +}, Err(ValidatePathInfoError::InvalidRootNode(DirectoryError::InvalidNode(DUMMY_PATH.into(), ValidateNodeError::InvalidDigestLen(0)))))] #[case::invalid_node_name_no_storepath(castorepb::DirectoryNode { name: "invalid".into(), digest: DUMMY_DIGEST.clone().into(), @@ -74,7 +74,7 @@ fn validate_directory( digest: Bytes::new(), ..Default::default() }, - Err(ValidatePathInfoError::InvalidRootNode(tvix_castore::ValidateNodeError::InvalidDigestLen(0))) + Err(ValidatePathInfoError::InvalidRootNode(DirectoryError::InvalidNode(DUMMY_PATH.into(), ValidateNodeError::InvalidDigestLen(0)))) )] #[case::invalid_node_name( castorepb::FileNode { @@ -226,24 +226,28 @@ fn validate_inconsistent_narinfo_reference_name_digest() { /// Create a node with an empty symlink target, and ensure it fails validation. #[test] fn validate_symlink_empty_target_invalid() { - let node = castorepb::node::Node::Symlink(castorepb::SymlinkNode { - name: "foo".into(), - target: "".into(), - }); - - tvix_castore::Node::try_from(&node).expect_err("must fail validation"); + castorepb::Node { + node: Some(castorepb::node::Node::Symlink(castorepb::SymlinkNode { + name: "foo".into(), + target: "".into(), + })), + } + .into_name_and_node() + .expect_err("must fail validation"); } /// Create a node with a symlink target including null bytes, and ensure it /// fails validation. #[test] fn validate_symlink_target_null_byte_invalid() { - let node = castorepb::node::Node::Symlink(castorepb::SymlinkNode { - name: "foo".into(), - target: "foo\0".into(), - }); - - tvix_castore::Node::try_from(&node).expect_err("must fail validation"); + castorepb::Node { + node: Some(castorepb::node::Node::Symlink(castorepb::SymlinkNode { + name: "foo".into(), + target: "foo\0".into(), + })), + } + .into_name_and_node() + .expect_err("must fail validation"); } /// Create a PathInfo with a correct deriver field and ensure it succeeds. diff --git a/tvix/store/src/tests/nar_renderer.rs b/tvix/store/src/tests/nar_renderer.rs index 5c2c60ade410..fafcaf39e1e7 100644 --- a/tvix/store/src/tests/nar_renderer.rs +++ b/tvix/store/src/tests/nar_renderer.rs @@ -22,9 +22,7 @@ async fn single_symlink( write_nar( &mut buf, - &Node::Symlink( - SymlinkNode::new("doesntmatter".into(), "/nix/store/somewhereelse".into()).unwrap(), - ), + &Node::Symlink(SymlinkNode::new("/nix/store/somewhereelse".into()).unwrap()), // don't put anything in the stores, as we don't actually do any requests. blob_service, directory_service, @@ -44,15 +42,11 @@ async fn single_file_missing_blob( ) { let e = write_nar( sink(), - &Node::File( - FileNode::new( - "doesntmatter".into(), - HELLOWORLD_BLOB_DIGEST.clone(), - HELLOWORLD_BLOB_CONTENTS.len() as u64, - false, - ) - .unwrap(), - ), + &Node::File(FileNode::new( + HELLOWORLD_BLOB_DIGEST.clone(), + HELLOWORLD_BLOB_CONTENTS.len() as u64, + false, + )), // the blobservice is empty intentionally, to provoke the error. blob_service, directory_service, @@ -92,15 +86,11 @@ async fn single_file_wrong_blob_size( // Test with a root FileNode of a too big size let e = write_nar( sink(), - &Node::File( - FileNode::new( - "doesntmatter".into(), - HELLOWORLD_BLOB_DIGEST.clone(), - 42, // <- note the wrong size here! - false, - ) - .unwrap(), - ), + &Node::File(FileNode::new( + HELLOWORLD_BLOB_DIGEST.clone(), + 42, // <- note the wrong size here! + false, + )), blob_service.clone(), directory_service.clone(), ) @@ -117,15 +107,11 @@ async fn single_file_wrong_blob_size( // Test with a root FileNode of a too small size let e = write_nar( sink(), - &Node::File( - FileNode::new( - "doesntmatter".into(), - HELLOWORLD_BLOB_DIGEST.clone(), - 2, // <- note the wrong size here! - false, - ) - .unwrap(), - ), + &Node::File(FileNode::new( + HELLOWORLD_BLOB_DIGEST.clone(), + 2, // <- note the wrong size here! + false, + )), blob_service, directory_service, ) @@ -161,15 +147,11 @@ async fn single_file( write_nar( &mut buf, - &Node::File( - FileNode::new( - "doesntmatter".into(), - HELLOWORLD_BLOB_DIGEST.clone(), - HELLOWORLD_BLOB_CONTENTS.len() as u64, - false, - ) - .unwrap(), - ), + &Node::File(FileNode::new( + HELLOWORLD_BLOB_DIGEST.clone(), + HELLOWORLD_BLOB_CONTENTS.len() as u64, + false, + )), blob_service, directory_service, ) @@ -207,14 +189,10 @@ async fn test_complicated( write_nar( &mut buf, - &Node::Directory( - DirectoryNode::new( - "doesntmatter".into(), - DIRECTORY_COMPLICATED.digest(), - DIRECTORY_COMPLICATED.size(), - ) - .unwrap(), - ), + &Node::Directory(DirectoryNode::new( + DIRECTORY_COMPLICATED.digest(), + DIRECTORY_COMPLICATED.size(), + )), blob_service.clone(), directory_service.clone(), ) @@ -225,14 +203,10 @@ async fn test_complicated( // ensure calculate_nar does return the correct sha256 digest and sum. let (nar_size, nar_digest) = calculate_size_and_sha256( - &Node::Directory( - DirectoryNode::new( - "doesntmatter".into(), - DIRECTORY_COMPLICATED.digest(), - DIRECTORY_COMPLICATED.size(), - ) - .unwrap(), - ), + &Node::Directory(DirectoryNode::new( + DIRECTORY_COMPLICATED.digest(), + DIRECTORY_COMPLICATED.size(), + )), blob_service, directory_service, ) |